|
a |
|
b/train_luna_prop_patch.py |
|
|
1 |
import cPickle as pickle |
|
|
2 |
import string |
|
|
3 |
import sys |
|
|
4 |
import time |
|
|
5 |
from itertools import izip |
|
|
6 |
import lasagne as nn |
|
|
7 |
import numpy as np |
|
|
8 |
import theano |
|
|
9 |
from datetime import datetime, timedelta |
|
|
10 |
from collections import defaultdict |
|
|
11 |
import utils |
|
|
12 |
import logger |
|
|
13 |
import theano.tensor as T |
|
|
14 |
import buffering |
|
|
15 |
from configuration import config, set_configuration |
|
|
16 |
import pathfinder |
|
|
17 |
|
|
|
18 |
theano.config.warn_float64 = 'raise' |
|
|
19 |
|
|
|
20 |
if len(sys.argv) < 2: |
|
|
21 |
sys.exit("Usage: train.py <configuration_name>") |
|
|
22 |
|
|
|
23 |
config_name = sys.argv[1] |
|
|
24 |
set_configuration('configs_luna_props_patch', config_name) |
|
|
25 |
expid = utils.generate_expid(config_name) |
|
|
26 |
print |
|
|
27 |
print "Experiment ID: %s" % expid |
|
|
28 |
print |
|
|
29 |
|
|
|
30 |
# metadata |
|
|
31 |
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH) |
|
|
32 |
metadata_path = metadata_dir + '/%s.pkl' % expid |
|
|
33 |
|
|
|
34 |
# logs |
|
|
35 |
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH) |
|
|
36 |
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid) |
|
|
37 |
sys.stderr = sys.stdout |
|
|
38 |
|
|
|
39 |
print 'Build model' |
|
|
40 |
model = config().build_model() |
|
|
41 |
all_layers = nn.layers.get_all_layers(model.l_out) |
|
|
42 |
all_params = nn.layers.get_all_params(model.l_out) |
|
|
43 |
num_params = nn.layers.count_params(model.l_out) |
|
|
44 |
print ' number of parameters: %d' % num_params |
|
|
45 |
print string.ljust(' layer output shapes:', 36), |
|
|
46 |
print string.ljust('#params:', 10), |
|
|
47 |
print 'output shape:' |
|
|
48 |
for layer in all_layers: |
|
|
49 |
name = string.ljust(layer.__class__.__name__, 32) |
|
|
50 |
num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()]) |
|
|
51 |
num_param = string.ljust(num_param.__str__(), 10) |
|
|
52 |
print ' %s %s %s %s' % (name, num_param, layer.output_shape, layer.name) |
|
|
53 |
|
|
|
54 |
train_loss = config().build_objective(model, deterministic=False) |
|
|
55 |
valid_loss = config().build_objective(model, deterministic=True) |
|
|
56 |
|
|
|
57 |
learning_rate_schedule = config().learning_rate_schedule |
|
|
58 |
learning_rate = theano.shared(np.float32(learning_rate_schedule[0])) |
|
|
59 |
updates = config().build_updates(train_loss, model, learning_rate) |
|
|
60 |
|
|
|
61 |
x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape)) |
|
|
62 |
y_shared = nn.utils.shared_empty(dim=len(model.l_target.shape)) |
|
|
63 |
if config().need_enable: |
|
|
64 |
z_shared = nn.utils.shared_empty(dim=len(model.l_enable_target.shape)) |
|
|
65 |
|
|
|
66 |
idx = T.lscalar('idx') |
|
|
67 |
givens_train = {} |
|
|
68 |
givens_train[model.l_in.input_var] = x_shared[idx * config().batch_size:(idx + 1) * config().batch_size] |
|
|
69 |
givens_train[model.l_target.input_var] = y_shared[idx * config().batch_size:(idx + 1) * config().batch_size] |
|
|
70 |
if config().need_enable: |
|
|
71 |
givens_train[model.l_enable_target.input_var] = z_shared[idx * config().batch_size:(idx + 1) * config().batch_size] |
|
|
72 |
|
|
|
73 |
givens_valid = {} |
|
|
74 |
givens_valid[model.l_in.input_var] = x_shared |
|
|
75 |
givens_valid[model.l_target.input_var] = y_shared |
|
|
76 |
# at this moment we do not use the enable target |
|
|
77 |
if config().need_enable: |
|
|
78 |
givens_valid[model.l_enable_target.input_var] = z_shared |
|
|
79 |
|
|
|
80 |
|
|
|
81 |
#first make ordered list of objective functions |
|
|
82 |
train_objectives = [config().d_objectives[obj_name] for obj_name in config().order_objectives] |
|
|
83 |
test_objectives = [config().d_objectives_deterministic[obj_name] for obj_name in config().order_objectives] |
|
|
84 |
# theano functions |
|
|
85 |
print givens_train |
|
|
86 |
iter_train = theano.function([idx], train_objectives, givens=givens_train, updates=updates) |
|
|
87 |
|
|
|
88 |
print 'test_objectives' |
|
|
89 |
print config().d_objectives_deterministic |
|
|
90 |
print 'givens_valid' |
|
|
91 |
print givens_valid |
|
|
92 |
iter_validate = theano.function([], test_objectives, givens=givens_valid) |
|
|
93 |
|
|
|
94 |
if config().restart_from_save: |
|
|
95 |
print 'Load model parameters for resuming' |
|
|
96 |
resume_metadata = utils.load_pkl(config().restart_from_save) |
|
|
97 |
nn.layers.set_all_param_values(model.l_out, resume_metadata['param_values']) |
|
|
98 |
start_chunk_idx = resume_metadata['chunks_since_start'] + 1 |
|
|
99 |
chunk_idxs = range(start_chunk_idx, config().max_nchunks) |
|
|
100 |
|
|
|
101 |
lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx)) |
|
|
102 |
print ' setting learning rate to %.7f' % lr |
|
|
103 |
learning_rate.set_value(lr) |
|
|
104 |
losses_eval_train = resume_metadata['losses_eval_train'] |
|
|
105 |
losses_eval_valid = resume_metadata['losses_eval_valid'] |
|
|
106 |
else: |
|
|
107 |
chunk_idxs = range(config().max_nchunks) |
|
|
108 |
losses_eval_train = defaultdict(list) |
|
|
109 |
losses_eval_valid = defaultdict(list) |
|
|
110 |
start_chunk_idx = 0 |
|
|
111 |
|
|
|
112 |
train_data_iterator = config().train_data_iterator |
|
|
113 |
valid_data_iterator = config().valid_data_iterator |
|
|
114 |
|
|
|
115 |
print |
|
|
116 |
print 'Data' |
|
|
117 |
print 'n train: %d' % train_data_iterator.nsamples |
|
|
118 |
print 'n validation: %d' % valid_data_iterator.nsamples |
|
|
119 |
print 'n chunks per epoch', config().nchunks_per_epoch |
|
|
120 |
|
|
|
121 |
print |
|
|
122 |
print 'Train model' |
|
|
123 |
chunk_idx = 0 |
|
|
124 |
start_time = time.time() |
|
|
125 |
prev_time = start_time |
|
|
126 |
|
|
|
127 |
tmp_losses_train = defaultdict(list) |
|
|
128 |
losses_train_print = defaultdict(list) |
|
|
129 |
|
|
|
130 |
# use buffering.buffered_gen_threaded() |
|
|
131 |
for chunk_idx, (x_chunk_train, y_chunk_train, z_chunk_train, id_train) in izip(chunk_idxs, buffering.buffered_gen_threaded( |
|
|
132 |
train_data_iterator.generate())): |
|
|
133 |
if chunk_idx in learning_rate_schedule: |
|
|
134 |
lr = np.float32(learning_rate_schedule[chunk_idx]) |
|
|
135 |
print ' setting learning rate to %.7f' % lr |
|
|
136 |
print |
|
|
137 |
learning_rate.set_value(lr) |
|
|
138 |
|
|
|
139 |
# load chunk to GPU |
|
|
140 |
x_shared.set_value(x_chunk_train) |
|
|
141 |
y_shared.set_value(y_chunk_train) |
|
|
142 |
if config().need_enable: |
|
|
143 |
z_shared.set_value(z_chunk_train) |
|
|
144 |
|
|
|
145 |
# make nbatches_chunk iterations |
|
|
146 |
for b in xrange(config().nbatches_chunk): |
|
|
147 |
losses = iter_train(b) |
|
|
148 |
# print loss |
|
|
149 |
for obj_idx, obj_name in enumerate(config().order_objectives): |
|
|
150 |
tmp_losses_train[obj_name].append(losses[obj_idx]) |
|
|
151 |
losses_train_print[obj_name].append(losses[obj_idx]) |
|
|
152 |
|
|
|
153 |
if (chunk_idx + 1) % 10 == 0: |
|
|
154 |
means = [] |
|
|
155 |
for obj_idx, obj_name in enumerate(config().order_objectives): |
|
|
156 |
mean = np.mean(losses_train_print[obj_name]) |
|
|
157 |
means.append(mean) |
|
|
158 |
print obj_name, mean |
|
|
159 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks), sum(means) |
|
|
160 |
|
|
|
161 |
losses_train_print = defaultdict(list) |
|
|
162 |
|
|
|
163 |
if ((chunk_idx + 1) % config().validate_every) == 0: |
|
|
164 |
# calculate mean train loss since the last validation phase |
|
|
165 |
means = [] |
|
|
166 |
print 'Mean train losses:' |
|
|
167 |
for obj_idx, obj_name in enumerate(config().order_objectives): |
|
|
168 |
train_mean = np.mean(tmp_losses_train[obj_name]) |
|
|
169 |
losses_eval_train[obj_name] = train_mean |
|
|
170 |
means.append(train_mean) |
|
|
171 |
print obj_name, train_mean |
|
|
172 |
tmp_losses_train = defaultdict(list) |
|
|
173 |
print 'Sum of train losses:', sum(means) |
|
|
174 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks), sum(means) |
|
|
175 |
|
|
|
176 |
# load validation data to GPU |
|
|
177 |
tmp_losses_valid = defaultdict(list) |
|
|
178 |
for i, (x_chunk_valid, y_chunk_valid, z_chunk_valid, ids_batch) in enumerate( |
|
|
179 |
buffering.buffered_gen_threaded(valid_data_iterator.generate(), |
|
|
180 |
buffer_size=2)): |
|
|
181 |
x_shared.set_value(x_chunk_valid) |
|
|
182 |
y_shared.set_value(y_chunk_valid) |
|
|
183 |
if config().need_enable: |
|
|
184 |
z_shared.set_value(z_chunk_valid) |
|
|
185 |
losses_valid = iter_validate() |
|
|
186 |
print i, losses_valid[0], np.sum(losses_valid) |
|
|
187 |
for obj_idx, obj_name in enumerate(config().order_objectives): |
|
|
188 |
if z_chunk_valid[0, obj_idx]>0.5: |
|
|
189 |
tmp_losses_valid[obj_name].append(losses_valid[obj_idx]) |
|
|
190 |
|
|
|
191 |
|
|
|
192 |
# calculate validation loss across validation set |
|
|
193 |
means = [] |
|
|
194 |
for obj_idx, obj_name in enumerate(config().order_objectives): |
|
|
195 |
valid_mean = np.mean(tmp_losses_valid[obj_name]) |
|
|
196 |
losses_eval_valid[obj_name] = valid_mean |
|
|
197 |
means.append(valid_mean) |
|
|
198 |
print obj_name, valid_mean |
|
|
199 |
print 'Sum of mean losses:', sum(means) |
|
|
200 |
|
|
|
201 |
|
|
|
202 |
now = time.time() |
|
|
203 |
time_since_start = now - start_time |
|
|
204 |
time_since_prev = now - prev_time |
|
|
205 |
prev_time = now |
|
|
206 |
est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx) |
|
|
207 |
eta = datetime.now() + timedelta(seconds=est_time_left) |
|
|
208 |
eta_str = eta.strftime("%c") |
|
|
209 |
print " %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev) |
|
|
210 |
print " estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str) |
|
|
211 |
print |
|
|
212 |
|
|
|
213 |
if ((chunk_idx + 1) % config().save_every) == 0: |
|
|
214 |
print |
|
|
215 |
print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks) |
|
|
216 |
print 'Saving metadata, parameters' |
|
|
217 |
|
|
|
218 |
with open(metadata_path, 'w') as f: |
|
|
219 |
pickle.dump({ |
|
|
220 |
'configuration_file': config_name, |
|
|
221 |
'git_revision_hash': utils.get_git_revision_hash(), |
|
|
222 |
'experiment_id': expid, |
|
|
223 |
'chunks_since_start': chunk_idx, |
|
|
224 |
'losses_eval_train': losses_eval_train, |
|
|
225 |
'losses_eval_valid': losses_eval_valid, |
|
|
226 |
'param_values': nn.layers.get_all_param_values(model.l_out) |
|
|
227 |
}, f, pickle.HIGHEST_PROTOCOL) |
|
|
228 |
print ' saved to %s' % metadata_path |
|
|
229 |
print |