Diff of /simdeep/deepmodel_base.py [000000] .. [53737a]

Switch to unified view

a b/simdeep/deepmodel_base.py
1
import numpy as np
2
3
import random
4
random.seed(2020)
5
6
try:
7
    from tensorflow.compat.v1 import set_random_seed
8
except Exception:
9
    set_random_seed = None
10
11
np.random.seed(2020)
12
set_random_seed(2020)
13
14
from simdeep.config import SEED
15
import os
16
17
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
18
19
import warnings
20
21
try:
22
    with warnings.catch_warnings():
23
        warnings.simplefilter("ignore")
24
        import tensorflow as tf
25
        tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
26
except Exception:
27
    pass
28
29
with warnings.catch_warnings():
30
    warnings.simplefilter("ignore")
31
    from keras.layers import Dense
32
    from keras.layers import Dropout
33
    from keras.layers import Input
34
35
    from keras.models import Sequential
36
    from keras.models import load_model
37
    from keras.models import Model
38
39
    from keras import regularizers
40
41
from simdeep.extract_data import LoadData
42
43
from time import time
44
45
from simdeep.config import EPOCHS
46
from simdeep.config import LEVEL_DIMS_IN
47
from simdeep.config import LEVEL_DIMS_OUT
48
from simdeep.config import NEW_DIM
49
from simdeep.config import LOSS
50
from simdeep.config import OPTIMIZER
51
from simdeep.config import ACT_REG
52
from simdeep.config import W_REG
53
from simdeep.config import DROPOUT
54
from simdeep.config import ACTIVATION
55
from simdeep.config import PATH_TO_SAVE_MODEL
56
from simdeep.config import DATA_SPLIT
57
58
from os.path import isfile
59
60
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
61
62
63
def main():
64
    """ """
65
    simdeep = DeepBase(seed=2)
66
    simdeep.load_training_dataset()
67
    simdeep.construct_autoencoders()
68
    simdeep.encoder_predict('METH', simdeep.matrix_train_array['METH'])
69
70
71
class DeepBase(object):
72
    """ """
73
    def __init__(self,
74
                 dataset=None,
75
                 verbose=True,
76
                 epochs=EPOCHS,
77
                 level_dims_in=LEVEL_DIMS_IN,
78
                 level_dims_out=LEVEL_DIMS_OUT,
79
                 new_dim=NEW_DIM,
80
                 loss=LOSS,
81
                 optimizer=OPTIMIZER,
82
                 act_reg=ACT_REG,
83
                 w_reg=W_REG,
84
                 dropout=DROPOUT,
85
                 data_split=DATA_SPLIT,
86
                 activation=ACTIVATION,
87
                 seed=SEED,
88
                 alternative_embedding=None,
89
                 kwargs_alternative_embedding={},
90
                 path_to_save_model=PATH_TO_SAVE_MODEL):
91
        """
92
        ### DEFAULT PARAMETER ###:
93
            dataset=None      ExtractData instance (load the dataset),
94
            level_dims = [500]
95
            new_dim = 100
96
            dropout = 0.5
97
            act_reg = 0.0001
98
            w_reg = 0.001
99
            data_split = 0.2
100
            activation = 'tanh'
101
            epochs = 10
102
            loss = 'binary_crossentropy'
103
            optimizer = 'sgd'
104
            path_model where to save/load the models
105
        """
106
        if dataset is None:
107
            dataset = LoadData()
108
109
        self.session = None
110
        self.dataset = dataset
111
        self.verbose = verbose
112
113
        self.matrix_train_array = {}
114
115
        self.epochs = epochs
116
        self.level_dims_in = level_dims_in
117
        self.level_dims_out = level_dims_out
118
        self.new_dim = new_dim
119
        self.loss = loss
120
        self.optimizer = optimizer
121
        self.dropout = dropout
122
        self.path_to_save_model = path_to_save_model
123
        self.activation = activation
124
        self.data_split = data_split
125
        self.seed = seed
126
        self.alternative_embedding = alternative_embedding
127
128
        if self.seed:
129
            np.random.seed(self.seed)
130
131
            if set_random_seed is not None:
132
                set_random_seed(self.seed)
133
134
        self.W_l1_constant = w_reg
135
        self.A_l2_constant = act_reg
136
137
        self.alternative_embedding_array = {}
138
        self.kwargs_alternative_embedding = kwargs_alternative_embedding
139
        self.encoder_array = {}
140
        self.model_array = {}
141
142
        self.is_model_loaded = False
143
144
    def construct_autoencoders(self):
145
        """
146
        main class to create the autoencoder
147
        """
148
        self.create_autoencoders()
149
        self.compile_models()
150
        self.fit_autoencoders()
151
152
    def construct_supervized_network(self, objective):
153
        """
154
        main class to create the autoencoder
155
        """
156
        self.create_autoencoders(objective)
157
        self.compile_models()
158
        self.fit_autoencoders(objective)
159
160
    def load_training_dataset(self):
161
        """
162
        load training dataset and surival
163
        """
164
        self.dataset.load_array()
165
        self.dataset.load_survival()
166
        self.dataset.load_meta_data()
167
        self.dataset.subset_training_sets()
168
169
        self.dataset.create_a_cv_split()
170
        self.dataset.normalize_training_array()
171
172
        self.matrix_train_array = self.dataset.matrix_train_array
173
174
        for key in self.matrix_train_array:
175
            self.matrix_train_array[key] = self.matrix_train_array[key].astype('float32')
176
177
    def load_test_dataset(self):
178
        """
179
        load test dataset and test surival
180
        """
181
        self.dataset.load_matrix_test()
182
        self.dataset.load_survival_test()
183
184
    def create_autoencoders(self, matrix_out=None):
185
        """ """
186
        for key in self.matrix_train_array:
187
            self._create_autoencoder(self.matrix_train_array[key], key, matrix_out)
188
189
    def fit_alternative_embedding(self):
190
        """ """
191
        embedding_class = self.alternative_embedding
192
193
        for key in self.matrix_train_array:
194
            if self.verbose:
195
                print("Fitting alternative embedding for key: {0}, class: {1}".format(
196
                    key, embedding_class))
197
198
            self.alternative_embedding_array[key] = embedding_class(
199
                **self.kwargs_alternative_embedding)
200
201
            self.alternative_embedding_array[key].fit(
202
                self.matrix_train_array[key])
203
204
    def _create_autoencoder(self, matrix_train, key, matrix_out=None):
205
        """
206
        Instantiate the  autoencoder architecture
207
        """
208
        if self.verbose:
209
            print('creating autoencoder...')
210
        t = time()
211
212
        model = Sequential()
213
214
        X_shape = matrix_train.shape
215
216
        nb_hidden = 0
217
218
        for dim in self.level_dims_in:
219
            nb_hidden += 1
220
            model = self._add_dense_layer(
221
                model,
222
                X_shape,
223
                dim,
224
                name='hidden_layer_nb_{0}'.format(nb_hidden))
225
226
            if self.dropout:
227
                model.add(Dropout(self.dropout))
228
229
        model = self._add_dense_layer(
230
                model,
231
                X_shape,
232
            self.new_dim,
233
            name='new_dim')
234
235
        if self.dropout:
236
            model.add(Dropout(self.dropout))
237
238
        for dim in self.level_dims_out:
239
            nb_hidden += 1
240
            model = self._add_dense_layer(
241
                model,
242
                X_shape,
243
                dim,
244
                name='hidden_layer_nb_{0}'.format(nb_hidden))
245
246
            if self.dropout:
247
                model.add(Dropout(self.dropout))
248
249
        if matrix_out is not None:
250
                    model = self._add_dense_layer(
251
                        model,
252
                        X_shape,
253
                        matrix_out.shape[1],
254
                        name='final_layer')
255
        else:
256
            model = self._add_dense_layer(
257
                model,
258
                X_shape,
259
                X_shape[1],
260
                name='final_layer')
261
262
        self.model_array[key] = model
263
264
        if self.verbose:
265
            print('model for {1} created in {0}s !'.format(time() - t, key))
266
267
    def _add_dense_layer(self, model, shape, dim, name=None):
268
        """
269
        private function to add one layer
270
        """
271
        input_dim = None
272
273
        if not model.layers:
274
            input_dim = shape[1]
275
276
        model.add(Dense(dim,
277
                        activity_regularizer=regularizers.l2(self.A_l2_constant),
278
                        kernel_regularizer=regularizers.l1(self.W_l1_constant),
279
                        name=name,
280
                        activation=self.activation,
281
                        input_dim=input_dim,
282
        ))
283
        return model
284
285
    def compile_models(self):
286
        """
287
        define the optimizer and the loss function
288
        compile the model and ready to fit the data!
289
        """
290
        for key in self.model_array:
291
            model = self.model_array[key]
292
            if self.verbose:
293
                print('compiling deep model...')
294
295
            model.compile(optimizer=self.optimizer, loss=self.loss)
296
297
            if self.verbose:
298
                print('compilation done for key {0}!'.format(key))
299
300
    def fit_autoencoders(self, objective=None):
301
        """
302
        fit the autoencoder using the training matrix
303
        """
304
        for key in self.model_array:
305
            model = self.model_array[key]
306
            matrix_train = self.matrix_train_array[key]
307
308
            if objective is None:
309
                matrix_out = matrix_train
310
            else:
311
                matrix_out = objective
312
313
            if not self.verbose:
314
                verbose = None
315
            else:
316
                verbose = 2
317
318
            model.fit(x=matrix_train,
319
                      y=matrix_out,
320
                      verbose=verbose,
321
                      epochs=self.epochs,
322
                      validation_split=self.data_split,
323
                      # shuffle=True
324
            )
325
326
            if self.verbose:
327
                print('fitting done for model {0}!'.format(key))
328
329
        self._define_encoders()
330
331
    def embedding_predict(self, key, matrix):
332
        """
333
        Predict the output value using the matrix as input and
334
        the fitted embedding model from self.alternative_embedding_array
335
        """
336
        return self.alternative_embedding_array[key].transform(matrix)
337
338
    def encoder_predict(self, key, matrix):
339
        """
340
        Predict the output value using the matrix as input for the encoder from key
341
        """
342
        return self.encoder_array[key].predict(x=matrix)
343
344
    def encoder_input_shape(self, key):
345
        """
346
        Predict the output value using the matrix as input for the encoder from key
347
        """
348
        return self.encoder_array[key].input_shape
349
350
351
    def _define_encoders(self):
352
        """
353
        Define the encoder output layers by using the middle layer of the autoencoders
354
        """
355
        for key in self.model_array:
356
            model = self.model_array[key]
357
            matrix_train = self.matrix_train_array[key]
358
359
            X_shape = matrix_train.shape
360
361
            inp = Input(shape=(X_shape[1],))
362
            encoder = model.layers[0](inp)
363
364
            if model.layers[0].name != 'new_dim':
365
366
                for layer in model.layers[1:]:
367
                    encoder = layer(encoder)
368
                    if layer.name == 'new_dim':
369
                        break
370
371
            encoder = Model(inp, encoder)
372
            encoder.compile(optimizer=self.optimizer, loss=self.loss)
373
374
            self.encoder_array[key] = encoder
375
376
    def save_encoders(self, fname='encoder.h5'):
377
        """
378
        Save a keras model in the self.path_to_save_model directory
379
        :fname: str    the name of the file to save the model
380
        """
381
        for key in self.encoder_array:
382
            encoder = self.encoder_array[key]
383
            encoder.save('{0}/{1}_{2}'.format(self.path_to_save_model, key, fname))
384
385
            if self.verbose:
386
                print('model saved for key:{0}!'.format(key))
387
388
    def load_encoders(self, fname='encoder.h5'):
389
        """
390
        Load a keras model from the self.path_to_save_model directory
391
        :fname: str    the name of the file to load
392
        """
393
        for key in self.matrix_train_array:
394
            file_path = '{0}/{1}_{2}'.format(self.path_to_save_model, key, fname)
395
            try:
396
                assert(isfile(file_path))
397
            except AssertionError:
398
                if self.verbose:
399
                    print('try loading autoencoder for {0} but file not found'.format(file_path))
400
                    print('no encoder loaded')
401
                    self.encoder_array = {}
402
                    return
403
404
            t = time()
405
            encoder = load_model(file_path)
406
407
            if self.verbose:
408
                print('model {1} loaded in {0} s!'.format(time() - t, key))
409
410
            self.encoder_array[key] = encoder
411
            self.is_model_loaded = True
412
413
414
if __name__ == "__main__":
415
    main()