Diff of /test_class_dsb.py [000000] .. [70b6b3]

Switch to unified view

a b/test_class_dsb.py
1
import string
2
import sys
3
import lasagne as nn
4
import numpy as np
5
import theano
6
import utils
7
import logger
8
import buffering
9
from configuration import config, set_configuration
10
import pathfinder
11
import utils_lung
12
import os
13
import evaluate_submission
14
15
theano.config.warn_float64 = 'raise'
16
17
if len(sys.argv) < 2:
18
    sys.exit("Usage: test_class_dsb.py <configuration_name> <valid|test>")
19
20
config_name = sys.argv[1]
21
set_configuration('configs_class_dsb', config_name)
22
23
set = sys.argv[2] if len(sys.argv) == 3 else 'test'
24
25
# metadata
26
metadata_dir = utils.get_dir_path('models', pathfinder.METADATA_PATH)
27
metadata_path = utils.find_model_metadata(metadata_dir, config_name)
28
29
metadata = utils.load_pkl(metadata_path)
30
expid = metadata['experiment_id']
31
32
# logs
33
logs_dir = utils.get_dir_path('logs', pathfinder.METADATA_PATH)
34
sys.stdout = logger.Logger(logs_dir + '/%s-%s.log' % (expid, set))
35
sys.stderr = sys.stdout
36
37
# predictions path
38
predictions_dir = utils.get_dir_path('model-predictions', pathfinder.METADATA_PATH)
39
output_pkl_file = predictions_dir + '/%s-%s.pkl' % (expid, set)
40
41
submissions_dir = utils.get_dir_path('submissions', pathfinder.METADATA_PATH)
42
output_csv_file = submissions_dir + '/%s-%s.csv' % (expid, set)
43
44
# if os.path.isfile(output_pkl_file):
45
#     pid2prediction = utils.load_pkl(output_pkl_file)
46
#     utils_lung.write_submission(pid2prediction, output_csv_file)
47
#     print 'saved csv'
48
#     print output_csv_file
49
#     sys.exit(0)
50
51
print 'Build model'
52
model = config().build_model()
53
all_layers = nn.layers.get_all_layers(model.l_out)
54
all_params = nn.layers.get_all_params(model.l_out)
55
num_params = nn.layers.count_params(model.l_out)
56
print '  number of parameters: %d' % num_params
57
print string.ljust('  layer output shapes:', 36),
58
print string.ljust('#params:', 10),
59
print 'output shape:'
60
for layer in all_layers:
61
    name = string.ljust(layer.__class__.__name__, 32)
62
    num_param = sum([np.prod(p.get_value().shape) for p in layer.get_params()])
63
    num_param = string.ljust(num_param.__str__(), 10)
64
    print '    %s %s %s' % (name, num_param, layer.output_shape)
65
66
nn.layers.set_all_param_values(model.l_out, metadata['param_values'])
67
68
# theano functions
69
iter_test = theano.function([model.l_in.input_var], nn.layers.get_output(model.l_out, deterministic=True))
70
71
if set == 'test':
72
    pid2label = utils_lung.read_test_labels(pathfinder.TEST_LABELS_PATH)
73
    data_iterator = config().test_data_iterator
74
75
    print
76
    print 'Data'
77
    print 'n test: %d' % data_iterator.nsamples
78
79
    pid2prediction = {}
80
    for i, (x_test, _, id_test) in enumerate(buffering.buffered_gen_threaded(
81
            data_iterator.generate())):
82
        predictions = iter_test(x_test)
83
        pid = id_test[0]
84
        print predictions
85
        pid2prediction[pid] = predictions[1] if predictions.shape[-1] == 2 else predictions[0]
86
        print i, pid, predictions#, pid2label[pid]
87
88
    utils.save_pkl(pid2prediction, output_pkl_file)
89
    print 'Saved validation predictions into pkl', os.path.basename(output_pkl_file)
90
91
    test_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label)
92
    print 'Test loss', test_loss
93
94
    utils_lung.write_submission(pid2prediction, output_csv_file)
95
    print 'Saved predictions into csv'
96
    loss = evaluate_submission.leaderboard_performance(output_csv_file)
97
    print loss
98
99
elif set == 'valid':
100
    data_iterator = config().valid_data_iterator
101
102
    print
103
    print 'Data'
104
    print 'n valid: %d' % data_iterator.nsamples
105
106
    pid2prediction, pid2label = {}, {}
107
    for i, (x_test, y_test, id_test) in enumerate(buffering.buffered_gen_threaded(
108
            data_iterator.generate())):
109
        predictions = iter_test(x_test)
110
        pid = id_test[0]
111
        pid2prediction[pid] = predictions[0, 1] if predictions.shape[-1] == 2 else predictions[0]
112
        pid2label[pid] = y_test[0]
113
        print i, pid, predictions, pid2label[pid]
114
115
    utils.save_pkl(pid2prediction, output_pkl_file)
116
    print 'Saved validation predictions into pkl', os.path.basename(output_pkl_file)
117
    valid_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label)
118
    print 'Validation loss', valid_loss
119
120
elif set == 'stage2':
121
    data_iterator = config().test_data_iterator
122
123
    print
124
    print 'Data'
125
    print 'n test: %d' % data_iterator.nsamples
126
127
    pid2prediction = {}
128
    for i, (x_test, _, id_test) in enumerate(buffering.buffered_gen_threaded(
129
            data_iterator.generate())):
130
        predictions = iter_test(x_test)
131
        pid = id_test[0]
132
        print predictions
133
        pid2prediction[pid] = predictions[1] if predictions.shape[-1] == 2 else predictions[0]
134
        print i, pid, predictions
135
136
    utils.save_pkl(pid2prediction, output_pkl_file)
137
    print 'Saved validation predictions into pkl', os.path.basename(output_pkl_file)
138
139
    utils_lung.write_submission(pid2prediction, output_csv_file)
140
    print 'Saved predictions into csv'
141
142
elif set == 'tta_test':
143
    pid2label = utils_lung.read_test_labels(pathfinder.TEST_LABELS_PATH)
144
    data_iterator = config().tta_test_data_iterator
145
    tta_bs = config().tta_batch_size
146
147
    print
148
    print 'Data'
149
    print 'n valid: %d' % data_iterator.nsamples
150
151
152
    pid2prediction = {}
153
    for i, (x_test, _, id_test) in enumerate(buffering.buffered_gen_threaded(
154
            data_iterator.generate())):
155
        preds = []
156
        for bidx, pos in enumerate(range(0,x_test.shape[0],tta_bs)):
157
            predictions = iter_test(x_test[pos:pos+tta_bs])
158
            predictions = predictions[:, 1] if predictions.shape[-1] == 2 else predictions
159
            preds.append(predictions)
160
        
161
        preds = np.concatenate(preds)
162
        pred = np.average(preds)
163
        pid = id_test
164
165
        pid2prediction[pid] = pred
166
        print i, pid, pred, pid2label[pid]
167
168
169
170
    output_pkl_file = predictions_dir + '/%s-%s-%s.pkl' % (expid, set, str(data_iterator.tta))
171
    output_csv_file = submissions_dir + '/%s-%s-%s.csv' % (expid, set, str(data_iterator.tta))
172
173
    utils.save_pkl(pid2prediction, output_pkl_file)
174
    print 'Saved predictions into pkl', os.path.basename(output_pkl_file)
175
176
    test_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label)
177
    print 'Test loss', test_loss
178
179
    utils_lung.write_submission(pid2prediction, output_csv_file)
180
    print 'Saved predictions into csv'
181
    loss = evaluate_submission.leaderboard_performance(output_csv_file)
182
    print loss
183
184
185
186
elif set == 'tta_valid':
187
    pid2label = utils_lung.read_labels(pathfinder.LABELS_PATH)
188
    data_iterator = config().tta_valid_data_iterator
189
    tta_bs = config().tta_batch_size
190
191
    print
192
    print 'Data'
193
    print 'n valid: %d' % data_iterator.nsamples
194
195
196
    pid2prediction = {}
197
    for i, (x_valid, _, pid) in enumerate(buffering.buffered_gen_threaded(
198
            data_iterator.generate())):
199
        preds = []
200
        print x_valid.shape[0]
201
        for bidx, pos in enumerate(range(0,x_valid.shape[0],tta_bs)):
202
            predictions = iter_test(x_valid[pos:pos+tta_bs])
203
            predictions = predictions[:, 1] if predictions.shape[-1] == 2 else predictions
204
            preds.append(predictions)
205
        
206
        preds = np.concatenate(preds)
207
        pred = np.average(preds)
208
209
        pid2prediction[pid] = pred
210
        print i, pid, pred, pid2label[pid]
211
212
213
214
    output_pkl_file = predictions_dir + '/%s-%s-%s.pkl' % (expid, set, str(data_iterator.tta))
215
    output_csv_file = submissions_dir + '/%s-%s-%s.csv' % (expid, set, str(data_iterator.tta))
216
217
    utils.save_pkl(pid2prediction, output_pkl_file)
218
    print 'Saved predictions into pkl', os.path.basename(output_pkl_file)
219
220
    test_loss = utils_lung.evaluate_log_loss(pid2prediction, pid2label)
221
    print 'Test loss', test_loss
222
223
    utils_lung.write_submission(pid2prediction, output_csv_file)
224
    print 'Saved predictions into csv'
225
    loss = evaluate_submission.leaderboard_performance(output_csv_file)
226
    print loss
227
228
229
else:
230
    raise ValueError('wrong set argument')