|
a |
|
b/train_fpred_patch.py |
|
|
1 |
import cPickle as pickle |
|
|
2 |
import string |
|
|
3 |
import sys |
|
|
4 |
import time |
|
|
5 |
from itertools import izip |
|
|
6 |
import lasagne as nn |
|
|
7 |
import numpy as np |
|
|
8 |
nn.random.set_rng(np.random.RandomState(317070)) |
|
|
9 |
import theano |
|
|
10 |
from datetime import datetime, timedelta |
|
|
11 |
import utils |
|
|
12 |
import logger |
|
|
13 |
import theano.tensor as T |
|
|
14 |
import buffering |
|
|
15 |
from configuration import config, set_configuration |
|
|
16 |
import pathfinder |
|
|
17 |
|
|
|
18 |
theano.config.warn_float64 = 'raise' |
|
|
19 |
|
|
|
20 |
if len(sys.argv) < 2: |
|
|
21 |
sys.exit("Usage: train.py <configuration_name>") |
|
|
22 |
|
|
|
23 |
config_name = sys.argv[1] |
|
|
24 |
set_configuration('configs_fpred_patch', config_name) |
|
|
25 |
expid = utils.generate_expid(config_name) |
|
|
26 |
print |
|
|
27 |
print "Experiment ID: %s" % expid |
|
|
28 |
print |
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
# metadata |
|
|
34 |
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) |
|
|
35 |
metadata_path = metadata_dir + '/%s.pkl' % expid |
|
|
36 |
|
|
|
37 |
# logs |
|
|
38 |
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH) |
|
|
39 |
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid) |
|
|
40 |
sys.stderr = sys.stdout |
|
|
41 |
|
|
|
42 |
print 'Build model' |
|
|
43 |
model = config().build_model() |
|
|
44 |
all_layers = nn.layers.get_all_layers(model.l_out) |
|
|
45 |
all_params = nn.layers.get_all_params(model.l_out) |
|
|
46 |
num_params = nn.layers.count_params(model.l_out) |
|
|
47 |
print ' number of parameters: %d' % num_params |
|
|
48 |
print string.ljust(' layer output shapes:', 36), |
|
|
49 |
print string.ljust('#params:', 10), |
|
|
50 |
print 'output shape:' |
|
|
51 |
for layer in all_layers: |
|
|
52 |
name = string.ljust(layer.__class__.__name__, 32) |
|
|
53 |
num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) |
|
|
54 |
num_param = string.ljust(num_param.__str__(), 10) |
|
|
55 |
print ' %s %s %s' % (name, num_param, layer.output_shape) |
|
|
56 |
|
|
|
57 |
train_loss = config().build_objective(model, deterministic=False) |
|
|
58 |
valid_loss = config().build_objective(model, deterministic=True) |
|
|
59 |
|
|
|
60 |
learning_rate_schedule = config().learning_rate_schedule |
|
|
61 |
learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) |
|
|
62 |
updates = config().build_updates(train_loss, model, learning_rate) |
|
|
63 |
|
|
|
64 |
x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape)) |
|
|
65 |
y_shared = nn.utils.shared_empty(dim=len(model.l_target.shape)) |
|
|
66 |
|
|
|
67 |
idx = T.lscalar('idx') |
|
|
68 |
givens_train = {} |
|
|
69 |
givens_train[model.l_in.input_var] = x_shared[idx * config().batch_size:(idx + 1) * config().batch_size] |
|
|
70 |
givens_train[model.l_target.input_var] = y_shared[idx * config().batch_size:(idx + 1) * config().batch_size] |
|
|
71 |
|
|
|
72 |
givens_valid = {} |
|
|
73 |
givens_valid[model.l_in.input_var] = x_shared |
|
|
74 |
givens_valid[model.l_target.input_var] = y_shared |
|
|
75 |
|
|
|
76 |
# theano functions |
|
|
77 |
iter_train = theano.function([idx], train_loss, givens=givens_train, updates=updates) |
|
|
78 |
iter_validate = theano.function([], valid_loss, givens=givens_valid) |
|
|
79 |
|
|
|
80 |
if config().restart_from_save: |
|
|
81 |
print 'Load model parameters for resuming' |
|
|
82 |
resume_metadata = utils.load_pkl(config().restart_from_save) |
|
|
83 |
nn.layers.set_all_param_values(model.l_out, resume_metadata['param_values']) |
|
|
84 |
start_chunk_idx = resume_metadata['chunks_since_start'] + 1 |
|
|
85 |
chunk_idxs = range(start_chunk_idx, config().max_nchunks) |
|
|
86 |
|
|
|
87 |
lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) |
|
|
88 |
print ' setting learning rate to %.7f' % lr |
|
|
89 |
learning_rate.set_value(lr) |
|
|
90 |
losses_eval_train = resume_metadata['losses_eval_train'] |
|
|
91 |
losses_eval_valid = resume_metadata['losses_eval_valid'] |
|
|
92 |
else: |
|
|
93 |
chunk_idxs = range(config().max_nchunks) |
|
|
94 |
losses_eval_train = [] |
|
|
95 |
losses_eval_valid = [] |
|
|
96 |
start_chunk_idx = 0 |
|
|
97 |
|
|
|
98 |
train_data_iterator = config().train_data_iterator |
|
|
99 |
valid_data_iterator = config().valid_data_iterator |
|
|
100 |
|
|
|
101 |
print |
|
|
102 |
print 'Data' |
|
|
103 |
print 'n train: %d' % train_data_iterator.nsamples |
|
|
104 |
print 'n validation: %d' % valid_data_iterator.nsamples |
|
|
105 |
print 'n chunks per epoch', config().nchunks_per_epoch |
|
|
106 |
|
|
|
107 |
print |
|
|
108 |
print 'Train model' |
|
|
109 |
chunk_idx = 0 |
|
|
110 |
start_time = time.time() |
|
|
111 |
prev_time = start_time |
|
|
112 |
tmp_losses_train = [] |
|
|
113 |
losses_train_print = [] |
|
|
114 |
|
|
|
115 |
# use buffering.buffered_gen_threaded() |
|
|
116 |
for chunk_idx, (x_chunk_train, y_chunk_train, id_train) in izip(chunk_idxs, buffering.buffered_gen_threaded( |
|
|
117 |
train_data_iterator.generate())): |
|
|
118 |
if chunk_idx in learning_rate_schedule: |
|
|
119 |
lr = np.float32(learning_rate_schedule[chunk_idx]) |
|
|
120 |
print ' setting learning rate to %.7f' % lr |
|
|
121 |
print |
|
|
122 |
learning_rate.set_value(lr) |
|
|
123 |
|
|
|
124 |
# load chunk to GPU |
|
|
125 |
x_shared.set_value(x_chunk_train) |
|
|
126 |
y_shared.set_value(y_chunk_train) |
|
|
127 |
|
|
|
128 |
# make nbatches_chunk iterations |
|
|
129 |
for b in xrange(config().nbatches_chunk): |
|
|
130 |
loss = iter_train(b) |
|
|
131 |
# print loss |
|
|
132 |
tmp_losses_train.append(loss) |
|
|
133 |
losses_train_print.append(loss) |
|
|
134 |
|
|
|
135 |
if (chunk_idx + 1) % 10 == 0: |
|
|
136 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks), np.mean(losses_train_print) |
|
|
137 |
losses_train_print = [] |
|
|
138 |
|
|
|
139 |
if ((chunk_idx + 1) % config().validate_every) == 0: |
|
|
140 |
print |
|
|
141 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) |
|
|
142 |
# calculate mean train loss since the last validation phase |
|
|
143 |
mean_train_loss = np.mean(tmp_losses_train) |
|
|
144 |
print 'Mean train loss: %7f' % mean_train_loss |
|
|
145 |
losses_eval_train.append(mean_train_loss) |
|
|
146 |
tmp_losses_train = [] |
|
|
147 |
|
|
|
148 |
# load validation data to GPU |
|
|
149 |
tmp_losses_valid = [] |
|
|
150 |
for i, (x_chunk_valid, y_chunk_valid, ids_batch) in enumerate( |
|
|
151 |
buffering.buffered_gen_threaded(valid_data_iterator.generate(), |
|
|
152 |
buffer_size=2)): |
|
|
153 |
x_shared.set_value(x_chunk_valid) |
|
|
154 |
y_shared.set_value(y_chunk_valid) |
|
|
155 |
l_valid = iter_validate() |
|
|
156 |
print i, l_valid |
|
|
157 |
tmp_losses_valid.append(l_valid) |
|
|
158 |
|
|
|
159 |
# calculate validation loss across validation set |
|
|
160 |
valid_loss = np.mean(tmp_losses_valid) |
|
|
161 |
print 'Validation loss: ', valid_loss |
|
|
162 |
losses_eval_valid.append(valid_loss) |
|
|
163 |
|
|
|
164 |
now = time.time() |
|
|
165 |
time_since_start = now - start_time |
|
|
166 |
time_since_prev = now - prev_time |
|
|
167 |
prev_time = now |
|
|
168 |
est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) |
|
|
169 |
eta = datetime.now() + timedelta(seconds=est_time_left) |
|
|
170 |
eta_str = eta.strftime("%c") |
|
|
171 |
print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) |
|
|
172 |
print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) |
|
|
173 |
print |
|
|
174 |
|
|
|
175 |
if ((chunk_idx + 1) % config().save_every) == 0: |
|
|
176 |
print |
|
|
177 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) |
|
|
178 |
print 'Saving metadata, parameters' |
|
|
179 |
|
|
|
180 |
with open(metadata_path, 'w') as f: |
|
|
181 |
pickle.dump({ |
|
|
182 |
'configuration_file': config_name, |
|
|
183 |
'git_revision_hash': utils.get_git_revision_hash(), |
|
|
184 |
'experiment_id': expid, |
|
|
185 |
'chunks_since_start': chunk_idx, |
|
|
186 |
'losses_eval_train': losses_eval_train, |
|
|
187 |
'losses_eval_valid': losses_eval_valid, |
|
|
188 |
'param_values': nn.layers.get_all_param_values(model.l_out) |
|
|
189 |
}, f, pickle.HIGHEST_PROTOCOL) |
|
|
190 |
print ' saved to %s' % metadata_path |
|
|
191 |
print |