a b/train_luna_prop_patch.py
1
import cPickle as pickle
2
import string
3
import sys
4
import time
5
from itertools import izip
6
import lasagne as nn
7
import numpy as np
8
import theano
9
from datetime import datetime, timedelta
10
from collections import defaultdict
11
import utils
12
import logger
13
import theano.tensor as T
14
import buffering
15
from configuration import config, set_configuration
16
import pathfinder
17
18
theano.config.warn_float64 = 'raise'
19
20
if len(sys.argv) < 2:
21
    sys.exit("Usage: train.py <configuration_name>")
22
23
config_name = sys.argv[1]
24
set_configuration('configs_luna_props_patch', config_name)
25
expid = utils.generate_expid(config_name)
26
print
27
print "Experiment ID: %s" % expid
28
print
29
30
# metadata
31
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
32
metadata_path = metadata_dir + '/%s.pkl' % expid
33
34
# logs
35
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
36
sys.stdout = logger.Logger(logs_dir + '/%s.log' % expid)
37
sys.stderr = sys.stdout
38
39
print 'Build model'
40
model = config().build_model()
41
all_layers = nn.layers.get_all_layers(model.l_out)
42
all_params = nn.layers.get_all_params(model.l_out)
43
num_params = nn.layers.count_params(model.l_out)
44
print '  number of parameters: %d' % num_params
45
print string.ljust('  layer output shapes:', 36),
46
print string.ljust('#params:', 10),
47
print 'output shape:'
48
for layer in all_layers:
49
    name = string.ljust(layer.__class__.__name__, 32)
50
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
51
    num_param = string.ljust(num_param.__str__(), 10)
52
    print '    %s %s %s %s' % (name, num_param, layer.output_shape, layer.name)
53
54
train_loss = config().build_objective(model, deterministic=False)
55
valid_loss = config().build_objective(model, deterministic=True)
56
57
learning_rate_schedule = config().learning_rate_schedule
58
learning_rate = theano.shared(np.float32(learning_rate_schedule[0]))
59
updates = config().build_updates(train_loss, model, learning_rate)
60
61
x_shared = nn.utils.shared_empty(dim=len(model.l_in.shape))
62
y_shared = nn.utils.shared_empty(dim=len(model.l_target.shape))
63
if config().need_enable:
64
    z_shared = nn.utils.shared_empty(dim=len(model.l_enable_target.shape))
65
66
idx = T.lscalar('idx')
67
givens_train = {}
68
givens_train[model.l_in.input_var] = x_shared[idx * config().batch_size:(idx + 1) * config().batch_size]
69
givens_train[model.l_target.input_var] = y_shared[idx * config().batch_size:(idx + 1) * config().batch_size]
70
if config().need_enable:
71
    givens_train[model.l_enable_target.input_var] =  z_shared[idx * config().batch_size:(idx + 1) * config().batch_size]
72
73
givens_valid = {}
74
givens_valid[model.l_in.input_var] = x_shared
75
givens_valid[model.l_target.input_var] = y_shared
76
# at this moment we do not use the enable target
77
if config().need_enable:
78
    givens_valid[model.l_enable_target.input_var] = z_shared
79
80
81
#first make ordered list of objective functions
82
train_objectives = [config().d_objectives[obj_name] for obj_name in config().order_objectives]
83
test_objectives = [config().d_objectives_deterministic[obj_name] for obj_name in config().order_objectives]
84
# theano functions
85
print givens_train
86
iter_train = theano.function([idx], train_objectives, givens=givens_train, updates=updates)
87
88
print 'test_objectives'
89
print config().d_objectives_deterministic
90
print 'givens_valid'
91
print givens_valid
92
iter_validate = theano.function([], test_objectives, givens=givens_valid)
93
94
if config().restart_from_save:
95
    print 'Load model parameters for resuming'
96
    resume_metadata = utils.load_pkl(config().restart_from_save)
97
    nn.layers.set_all_param_values(model.l_out, resume_metadata['param_values'])
98
    start_chunk_idx = resume_metadata['chunks_since_start'] + 1
99
    chunk_idxs = range(start_chunk_idx, config().max_nchunks)
100
101
    lr = np.float32(utils.current_learning_rate(learning_rate_schedule, start_chunk_idx))
102
    print '  setting learning rate to %.7f' % lr
103
    learning_rate.set_value(lr)
104
    losses_eval_train = resume_metadata['losses_eval_train']
105
    losses_eval_valid = resume_metadata['losses_eval_valid']
106
else:
107
    chunk_idxs = range(config().max_nchunks)
108
    losses_eval_train = defaultdict(list)
109
    losses_eval_valid = defaultdict(list)
110
    start_chunk_idx = 0
111
112
train_data_iterator = config().train_data_iterator
113
valid_data_iterator = config().valid_data_iterator
114
115
print
116
print 'Data'
117
print 'n train: %d' % train_data_iterator.nsamples
118
print 'n validation: %d' % valid_data_iterator.nsamples
119
print 'n chunks per epoch', config().nchunks_per_epoch
120
121
print
122
print 'Train model'
123
chunk_idx = 0
124
start_time = time.time()
125
prev_time = start_time
126
127
tmp_losses_train = defaultdict(list)
128
losses_train_print = defaultdict(list)
129
130
# use buffering.buffered_gen_threaded()
131
for chunk_idx, (x_chunk_train, y_chunk_train, z_chunk_train, id_train) in izip(chunk_idxs, buffering.buffered_gen_threaded(
132
        train_data_iterator.generate())):
133
    if chunk_idx in learning_rate_schedule:
134
        lr = np.float32(learning_rate_schedule[chunk_idx])
135
        print '  setting learning rate to %.7f' % lr
136
        print
137
        learning_rate.set_value(lr)
138
139
    # load chunk to GPU
140
    x_shared.set_value(x_chunk_train)
141
    y_shared.set_value(y_chunk_train)
142
    if config().need_enable:
143
        z_shared.set_value(z_chunk_train)
144
145
    # make nbatches_chunk iterations
146
    for b in xrange(config().nbatches_chunk):
147
        losses = iter_train(b)
148
        # print loss
149
        for obj_idx, obj_name in enumerate(config().order_objectives):
150
            tmp_losses_train[obj_name].append(losses[obj_idx])
151
            losses_train_print[obj_name].append(losses[obj_idx])
152
153
    if (chunk_idx + 1) % 10 == 0:
154
        means = []
155
        for obj_idx, obj_name in enumerate(config().order_objectives):
156
            mean = np.mean(losses_train_print[obj_name])
157
            means.append(mean)
158
            print obj_name, mean
159
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks), sum(means)
160
        
161
        losses_train_print = defaultdict(list)
162
163
    if ((chunk_idx + 1) % config().validate_every) == 0:
164
        # calculate mean train loss since the last validation phase
165
        means = []
166
        print 'Mean train losses:'
167
        for obj_idx, obj_name in enumerate(config().order_objectives):
168
            train_mean = np.mean(tmp_losses_train[obj_name])
169
            losses_eval_train[obj_name] = train_mean
170
            means.append(train_mean)
171
            print obj_name, train_mean
172
        tmp_losses_train = defaultdict(list)
173
        print 'Sum of train losses:', sum(means)
174
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks), sum(means)
175
176
        # load validation data to GPU
177
        tmp_losses_valid = defaultdict(list)
178
        for i, (x_chunk_valid, y_chunk_valid, z_chunk_valid, ids_batch) in enumerate(
179
                buffering.buffered_gen_threaded(valid_data_iterator.generate(),
180
                                                buffer_size=2)):
181
            x_shared.set_value(x_chunk_valid)
182
            y_shared.set_value(y_chunk_valid)
183
            if config().need_enable:
184
                z_shared.set_value(z_chunk_valid)
185
            losses_valid = iter_validate()
186
            print i, losses_valid[0], np.sum(losses_valid)
187
            for obj_idx, obj_name in enumerate(config().order_objectives):
188
                if z_chunk_valid[0, obj_idx]>0.5:
189
                    tmp_losses_valid[obj_name].append(losses_valid[obj_idx])
190
191
192
        # calculate validation loss across validation set
193
        means = [] 
194
        for obj_idx, obj_name in enumerate(config().order_objectives):
195
            valid_mean = np.mean(tmp_losses_valid[obj_name])
196
            losses_eval_valid[obj_name] = valid_mean
197
            means.append(valid_mean)
198
            print obj_name, valid_mean
199
        print 'Sum of mean losses:', sum(means) 
200
201
202
        now = time.time()
203
        time_since_start = now - start_time
204
        time_since_prev = now - prev_time
205
        prev_time = now
206
        est_time_left = time_since_start * (config().max_nchunks - chunk_idx + 1.) / (chunk_idx + 1. - start_chunk_idx)
207
        eta = datetime.now() + timedelta(seconds=est_time_left)
208
        eta_str = eta.strftime("%c")
209
        print "  %s since start (%.2f s)" % (utils.hms(time_since_start), time_since_prev)
210
        print "  estimated %s to go (ETA: %s)" % (utils.hms(est_time_left), eta_str)
211
        print
212
213
    if ((chunk_idx + 1) % config().save_every) == 0:
214
        print
215
        print 'Chunk %d/%d' % (chunk_idx + 1, config().max_nchunks)
216
        print 'Saving metadata, parameters'
217
218
        with open(metadata_path, 'w') as f:
219
            pickle.dump({
220
                'configuration_file': config_name,
221
                'git_revision_hash': utils.get_git_revision_hash(),
222
                'experiment_id': expid,
223
                'chunks_since_start': chunk_idx,
224
                'losses_eval_train': losses_eval_train,
225
                'losses_eval_valid': losses_eval_valid,
226
                'param_values': nn.layers.get_all_param_values(model.l_out)
227
            }, f, pickle.HIGHEST_PROTOCOL)
228
            print '  saved to %s' % metadata_path
229
            print