Diff of /src/LiviaNet/LiviaNet.py [000000] .. [e9ece0]

Switch to unified view

a b/src/LiviaNet/LiviaNet.py
1
""" 
2
Copyright (c) 2016, Jose Dolz .All rights reserved.
3
4
Redistribution and use in source and binary forms, with or without modification,
5
are permitted provided that the following conditions are met:
6
7
    1. Redistributions of source code must retain the above copyright notice,
8
       this list of conditions and the following disclaimer.
9
    2. Redistributions in binary form must reproduce the above copyright notice,
10
       this list of conditions and the following disclaimer in the documentation
11
       and/or other materials provided with the distribution.
12
13
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14
    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15
    OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16
    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
17
    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18
    WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
    OTHER DEALINGS IN THE SOFTWARE.
21
22
NOTES: There are still some functionalities to be implemented.
23
24
    - Add pooling layer in 3D
25
    - Add more activation functions
26
    - Add more optimizers (ex. Adam)
27
28
Jose Dolz. Dec, 2016.
29
email: jose.dolz.upv@gmail.com
30
LIVIA Department, ETS, Montreal.
31
"""
32
33
import numpy
34
import numpy as np
35
36
import theano
37
import theano.tensor as T
38
from theano.tensor.nnet import conv
39
import random
40
from math import floor
41
from math import ceil
42
43
from Modules.General.Utils import computeReceptiveField
44
from Modules.General.Utils import extendLearningRateToParams
45
from Modules.General.Utils import extractCenterFeatMaps
46
from Modules.General.Utils import getCentralVoxels
47
from Modules.General.Utils import getWeightsSet
48
49
import LiviaNet3DConvLayer
50
import LiviaSoftmax
51
import pdb
52
    
53
#####################################################
54
# ------------------------------------------------- #
55
##  ##  ##  ##  ##   LIVIANET 3D   ##  ##  ##  ##  ##
56
# ------------------------------------------------- #
57
#####################################################
58
59
60
class LiviaNet3D(object):
61
    def __init__(self):
62
63
        # --- containers for Theano compiled functions ----
64
        self.networkModel_Train = ""
65
        self.networkModel_Test = ""
66
        
67
        # --- shared variables will be stored in the following variables ----
68
        self.trainingData_x = ""
69
        self.testingData_x = ""
70
        self.trainingData_y = ""
71
72
        self.lastLayer = ""
73
        self.networkLayers = []
74
        self.intermediate_ConnectedLayers = []
75
             
76
        self.networkName = ""
77
        self.folderName = ""
78
        self.cnnLayers = []
79
        self.n_classes = -1
80
81
        self.sampleSize_Train = []
82
        self.sampleSize_Test = []
83
        self.kernel_Shapes = []
84
85
        self.pooling_scales = []
86
        self.dropout_Rates = []
87
        self.activationType = -1
88
        self.weight_Initialization = -1
89
        self.dropoutRates = []
90
        self.batch_Size = -1
91
        self.receptiveField = 0
92
        
93
        self.initialLearningRate = "" 
94
        self.learning_rate = theano.shared(np.cast["float32"](0.01))
95
96
        # Symbolic variables,
97
        self.inputNetwork_Train = None  
98
        self.inputNetwork_Test = None
99
100
        self.L1_reg_C = 0
101
        self.L2_reg_C = 0
102
        self.costFunction = 0
103
        
104
        # Params for optimizers
105
        self.initialMomentum = "" 
106
        self.momentum = theano.shared(np.cast["float32"](0.))
107
        self.momentumNormalized = 0
108
        self.momentumType = 0
109
        self.vel_Momentum = [] 
110
        self.rho_RMSProp = 0
111
        self.epsilon_RMSProp = 0
112
        self.params_RmsProp = [] 
113
        self.numberOfEpochsTrained = 0
114
        self.applyBatchNorm = ""
115
        self.numberEpochToApplyBatchNorm = 0
116
        self.softmax_Temp = 1.0
117
118
        self.centralVoxelsTrain = ""
119
        self.centralVoxelsTest = ""
120
        
121
    # -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
122
123
    """ ####### Function to generate the network architecture  ######### """
124
    def generateNetworkLayers(self,
125
                            cnnLayers,
126
                            kernel_Shapes,
127
                            maxPooling_Layer,
128
                            sampleShape_Train,
129
                            sampleShape_Test,
130
                            inputSample_Train,
131
                            inputSample_Test,
132
                            layersToConnect):
133
134
        rng = np.random.RandomState(24575)
135
     
136
        # Define inputs for first layers (which will be re-used for next layers)
137
        inputSampleToNextLayer_Train = inputSample_Train
138
        inputSampleToNextLayer_Test = inputSample_Test
139
        inputSampleToNextLayerShape_Train = sampleShape_Train
140
        inputSampleToNextLayerShape_Test = sampleShape_Test
141
142
        # Get the convolutional layers
143
        numLayers = len(kernel_Shapes)
144
        numberCNNLayers = []
145
        numberFCLayers = []
146
        for l_i in range(1,len(kernel_Shapes)):
147
            if len(kernel_Shapes[l_i]) == 3:
148
                numberCNNLayers = l_i + 1
149
150
        numberFCLayers = numLayers - numberCNNLayers
151
        
152
        ######### -------------- Generate the convolutional layers --------------   #########
153
        # Some checks
154
        if self.weight_Initialization_CNN == 2:
155
            if len(self.weightsTrainedIdx) <> numberCNNLayers:
156
                print(" ... WARNING!!!! Number of indexes specified for trained layers does not correspond with number of conv layers in the created architecture...")
157
158
        if self.weight_Initialization_CNN == 2:
159
            weightsNames = getWeightsSet(self.weightsFolderName, self.weightsTrainedIdx)
160
            
161
        for l_i in xrange(0, numberCNNLayers) :
162
            
163
            # Get properties of this layer
164
            # The second element is the number of feature maps of previous layer
165
            currentLayerKernelShape = [cnnLayers[l_i], inputSampleToNextLayerShape_Train[1]] +  kernel_Shapes[l_i] 
166
167
            # If weights are going to be initialized from other pre-trained network they should be loaded in this stage
168
            # Otherwise
169
            weights = []
170
            if self.weight_Initialization_CNN == 2:
171
                weights = np.load(weightsNames[l_i])
172
173
            maxPoolingParameters = []
174
            dropoutRate = 0.0
175
            myLiviaNet3DConvLayer = LiviaNet3DConvLayer.LiviaNet3DConvLayer(rng,
176
                                                                            l_i,
177
                                                                            inputSampleToNextLayer_Train,
178
                                                                            inputSampleToNextLayer_Test,
179
                                                                            inputSampleToNextLayerShape_Train,
180
                                                                            inputSampleToNextLayerShape_Test,
181
                                                                            currentLayerKernelShape,
182
                                                                            self.applyBatchNorm,
183
                                                                            self.numberEpochToApplyBatchNorm,
184
                                                                            maxPoolingParameters,
185
                                                                            self.weight_Initialization_CNN,
186
                                                                            weights,
187
                                                                            self.activationType,
188
                                                                            dropoutRate
189
                                                                            )
190
                                                                            
191
            self.networkLayers.append(myLiviaNet3DConvLayer)
192
            
193
            # Just for printing
194
            inputSampleToNextLayer_Train_Old = inputSampleToNextLayerShape_Train
195
            inputSampleToNextLayer_Test_Old  = inputSampleToNextLayerShape_Test
196
            
197
            # Update inputs for next layer
198
            inputSampleToNextLayer_Train = myLiviaNet3DConvLayer.outputTrain
199
            inputSampleToNextLayer_Test  = myLiviaNet3DConvLayer.outputTest    
200
    
201
            inputSampleToNextLayerShape_Train = myLiviaNet3DConvLayer.outputShapeTrain
202
            inputSampleToNextLayerShape_Test  = myLiviaNet3DConvLayer.outputShapeTest
203
            
204
            print(" ----- (Training) Input shape: {}  ---> Output shape: {}  ||  kernel shape {}".format(inputSampleToNextLayer_Train_Old,inputSampleToNextLayerShape_Train, currentLayerKernelShape))
205
            print(" ----- (Testing) Input shape: {}   ---> Output shape: {}".format(inputSampleToNextLayer_Test_Old,inputSampleToNextLayerShape_Test))
206
207
        ######### -------------- Create the intermediate (i.e. multi-scale) connections from conv layers to FCN ----------------- ##################
208
        featMapsInFullyCN = inputSampleToNextLayerShape_Train[1]
209
210
        [featMapsInFullyCN, 
211
        inputToFullyCN_Train,
212
        inputToFullyCN_Test] = self.connectIntermediateLayers(layersToConnect,
213
                                                              inputSampleToNextLayer_Train,
214
                                                              inputSampleToNextLayer_Test,
215
                                                              featMapsInFullyCN)
216
        
217
        
218
        ######### --------------  Generate the Fully Connected Layers  ----------------- ##################
219
220
        # Define inputs
221
        inputFullyCNShape_Train = [self.batch_Size, featMapsInFullyCN] + inputSampleToNextLayerShape_Train[2:5]
222
        inputFullyCNShape_Test = [self.batch_Size, featMapsInFullyCN] + inputSampleToNextLayerShape_Test[2:5]
223
224
        # Kamnitsas applied padding and mirroring to the images when kernels in FC layers were larger than 1x1x1.
225
        # For this current work, we employed kernels of this size (i.e. 1x1x1), so there is no need to apply padding or mirroring.
226
        # TODO. Check
227
        
228
        print(" --- Starting to create the fully connected layers....")
229
        for l_i in xrange(numberCNNLayers, numLayers) :
230
            numberOfKernels = cnnLayers[l_i]
231
            kernel_shape = [kernel_Shapes[l_i][0],kernel_Shapes[l_i][0],kernel_Shapes[l_i][0]]
232
            
233
            currentLayerKernelShape = [cnnLayers[l_i], inputFullyCNShape_Train[1]] +  kernel_shape
234
            
235
            # If weights are going to be initialized from other pre-trained network they should be loaded in this stage
236
            # Otherwise
237
238
            weights = []
239
            applyBatchNorm = True
240
            epochsToApplyBatchNorm = 60
241
            maxPoolingParameters = []
242
            dropoutRate = self.dropout_Rates[l_i-numberCNNLayers]
243
            
244
            myLiviaNet3DFullyConnectedLayer = LiviaNet3DConvLayer.LiviaNet3DConvLayer(rng,
245
                                                                            l_i,
246
                                                                            inputToFullyCN_Train,
247
                                                                            inputToFullyCN_Test,
248
                                                                            inputFullyCNShape_Train,
249
                                                                            inputFullyCNShape_Test,
250
                                                                            currentLayerKernelShape,
251
                                                                            self.applyBatchNorm,
252
                                                                            self.numberEpochToApplyBatchNorm,
253
                                                                            maxPoolingParameters,
254
                                                                            self.weight_Initialization_FCN,
255
                                                                            weights,
256
                                                                            self.activationType,
257
                                                                            dropoutRate
258
                                                                            )
259
260
            self.networkLayers.append(myLiviaNet3DFullyConnectedLayer)
261
            
262
            # Just for printing
263
            inputFullyCNShape_Train_Old = inputFullyCNShape_Train
264
            inputFullyCNShape_Test_Old  = inputFullyCNShape_Test
265
            
266
            # Update inputs for next layer
267
            inputToFullyCN_Train = myLiviaNet3DFullyConnectedLayer.outputTrain
268
            inputToFullyCN_Test = myLiviaNet3DFullyConnectedLayer.outputTest    
269
270
            inputFullyCNShape_Train = myLiviaNet3DFullyConnectedLayer.outputShapeTrain
271
            inputFullyCNShape_Test = myLiviaNet3DFullyConnectedLayer.outputShapeTest
272
 
273
            # Print
274
            print(" ----- (Training) Input shape: {}  ---> Output shape: {}  ||  kernel shape {}".format(inputFullyCNShape_Train_Old,inputFullyCNShape_Train, currentLayerKernelShape))
275
            print(" ----- (Testing) Input shape: {}   ---> Output shape: {}".format(inputFullyCNShape_Test_Old,inputFullyCNShape_Test))
276
        
277
        
278
        ######### -------------- Do Classification layer  ----------------- ##################
279
280
        # Define kernel shape for classification layer
281
        featMaps_LastLayer = self.cnnLayers[-1]
282
        filterShape_ClassificationLayer = [self.n_classes, featMaps_LastLayer, 1, 1, 1]
283
284
        # Define inputs and shapes for the classification layer
285
        inputImageClassificationLayer_Train = inputToFullyCN_Train
286
        inputImageClassificationLayer_Test = inputToFullyCN_Test
287
288
        inputImageClassificationLayerShape_Train = inputFullyCNShape_Train
289
        inputImageClassificationLayerShape_Test = inputFullyCNShape_Test
290
        
291
        print(" ----- (Classification layer) kernel shape {}".format(filterShape_ClassificationLayer))
292
        classification_layer_Index = l_i
293
294
        weights = []
295
        applyBatchNorm = True
296
        epochsToApplyBatchNorm = 60
297
        maxPoolingParameters = []
298
        dropoutRate = self.dropout_Rates[len(self.dropout_Rates)-1]
299
        softmaxTemperature = 1.0
300
                                                                              
301
        myLiviaNet_ClassificationLayer = LiviaSoftmax.LiviaSoftmax(rng,
302
                                                                   classification_layer_Index,
303
                                                                   inputImageClassificationLayer_Train,
304
                                                                   inputImageClassificationLayer_Test,
305
                                                                   inputImageClassificationLayerShape_Train,
306
                                                                   inputImageClassificationLayerShape_Test,
307
                                                                   filterShape_ClassificationLayer,
308
                                                                   self.applyBatchNorm,
309
                                                                   self.numberEpochToApplyBatchNorm,
310
                                                                   maxPoolingParameters,
311
                                                                   self.weight_Initialization_FCN,
312
                                                                   weights,
313
                                                                   0, #self.activationType,
314
                                                                   dropoutRate,
315
                                                                   softmaxTemperature
316
                                                                   )
317
                       
318
        self.networkLayers.append(myLiviaNet_ClassificationLayer)
319
        self.lastLayer = myLiviaNet_ClassificationLayer
320
        
321
        print(" ----- (Training) Input shape: {}  ---> Output shape: {}  ||  kernel shape {}".format(inputImageClassificationLayerShape_Train,myLiviaNet_ClassificationLayer.outputShapeTrain, filterShape_ClassificationLayer))
322
        print(" ----- (Testing) Input shape:  {}  ---> Output shape: {}".format(inputImageClassificationLayerShape_Test,myLiviaNet_ClassificationLayer.outputShapeTest))
323
        
324
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
325
326
                        
327
    def updateLayersMatricesBatchNorm(self):
328
        for l_i in xrange(0, len(self.networkLayers) ) :
329
            self.networkLayers[l_i].updateLayerMatricesBatchNorm()
330
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
331
   
332
    """ Function that connects intermediate layers to the input of the first fully connected layer 
333
        This is done for multi-scale features """    
334
    def connectIntermediateLayers(self,
335
                                  layersToConnect,
336
                                  inputSampleInFullyCN_Train,
337
                                  inputSampleInFullyCN_Test,
338
                                  featMapsInFullyCN):
339
340
        centralVoxelsTrain = self.centralVoxelsTrain
341
        centralVoxelsTest = self.centralVoxelsTest
342
    
343
        for l_i in layersToConnect :
344
            currentLayer = self.networkLayers[l_i]
345
            output_train = currentLayer.outputTrain
346
            output_trainShape = currentLayer.outputShapeTrain
347
            output_test = currentLayer.outputTest
348
            output_testShape = currentLayer.outputShapeTest
349
350
            # Get the middle part of feature maps at intermediate levels to make them of the same shape at the beginning of the
351
            # first fully connected layer
352
            featMapsCenter_Train = extractCenterFeatMaps(output_train, output_trainShape, centralVoxelsTrain)
353
            featMapsCenter_Test  = extractCenterFeatMaps(output_test, output_testShape, centralVoxelsTest)
354
355
            featMapsInFullyCN = featMapsInFullyCN + currentLayer._numberOfFeatureMaps
356
            inputSampleInFullyCN_Train = T.concatenate([inputSampleInFullyCN_Train, featMapsCenter_Train], axis=1)
357
            inputSampleInFullyCN_Test = T.concatenate([inputSampleInFullyCN_Test, featMapsCenter_Test], axis=1)
358
359
        return [featMapsInFullyCN, inputSampleInFullyCN_Train, inputSampleInFullyCN_Test]
360
    
361
    
362
    #############   Functions for OPTIMIZERS ################# 
363
364
    def getUpdatesOfTrainableParameters(self, cost, paramsTraining, numberParamsPerLayer) :
365
        # Optimizers
366
        def SGD():
367
            print (" --- Optimizer: Stochastic gradient descent (SGD)")
368
            updates = self.updateParams_SGD(cost, paramsTraining, numberParamsPerLayer)
369
            return updates
370
        def RMSProp():
371
            print (" --- Optimizer: RMS Prop")
372
            updates = self.updateParams_RMSProp(cost, paramsTraining, numberParamsPerLayer)
373
            return updates
374
       
375
        # TODO. Include more optimizers here
376
        optionsOptimizer = {0 : SGD,
377
                            1 : RMSProp}
378
379
        updates = optionsOptimizer[self.optimizerType]()
380
        
381
        return updates
382
383
    """ # Optimizers:
384
    # More optimizers in : https://github.com/Lasagne/Lasagne/blob/master/lasagne/updates.py """
385
    # ========= Update the trainable parameters using Stocastic Gradient Descent ===============
386
    def updateParams_SGD(self, cost, paramsTraining, numberParamsPerLayer) :
387
        # Create a list of gradients for all model parameters
388
        grads = T.grad(cost, paramsTraining)
389
390
        # Get learning rates for each param
391
        #learning_rates = extendLearningRateToParams(numberParamsPerLayer,self.learning_rate)
392
        
393
        self.vel_Momentum = []
394
        updates = []
395
        
396
        constantForCurrentGradientUpdate = 1.0 - self.momentum*self.momentumNormalized 
397
398
        #for param, grad, lrate  in zip(paramsTraining, grads, learning_rates) :
399
        for param, grad  in zip(paramsTraining, grads) :
400
            v = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
401
            self.vel_Momentum.append(v)
402
403
            stepToGradientDirection = constantForCurrentGradientUpdate*self.learning_rate*grad
404
            newVel = self.momentum * v - stepToGradientDirection
405
            
406
            if self.momentumType == 0 : 
407
                updateToParam = newVel
408
            else : 
409
                updateToParam = self.momentum*newVel - stepToGradientDirection
410
                
411
            updates.append((v, newVel)) 
412
            updates.append((param, param + updateToParam))
413
            
414
        return updates
415
        
416
    # ========= Update the trainable parameters using RMSProp ===============
417
    def updateParams_RMSProp(self, cost, paramsTraining, numberParamsPerLayer) : 
418
        # Original code: https://gist.github.com/Newmu/acb738767acb4788bac3
419
        # epsilon=1e-4 in paper.
420
        # Kamnitsas reported NaN values in cost function when employing this value.
421
        # Worked ok with epsilon=1e-6.
422
423
        grads = T.grad(cost, paramsTraining)
424
425
        # Get learning rates for each param
426
        #learning_rates = extendLearningRateToParams(numberParamsPerLayer,self.learning_rate)
427
428
        self.params_RmsProp = []
429
        self.vel_Momentum = []
430
        updates = []
431
        
432
        constantForCurrentGradientUpdate = 1.0 - self.momentum*self.momentumNormalized 
433
        
434
        # Using theano constant to prevent upcasting of float32
435
        one = T.constant(1)
436
437
        for param, grad in zip(paramsTraining, grads):
438
            accu = theano.shared(param.get_value()*0., broadcastable=param.broadcastable)
439
            self.params_RmsProp.append(accu) 
440
            
441
            v = theano.shared(param.get_value()*0., broadcastable=param.broadcastable) 
442
            
443
            self.vel_Momentum.append(v)
444
445
            accu_new = self.rho_RMSProp * accu + (one - self.rho_RMSProp) * T.sqr(grad)
446
447
            numGradStep = self.learning_rate * grad
448
            denGradStep = T.sqrt(accu_new + self.epsilon_RMSProp)
449
            
450
            stepToGradientDirection = constantForCurrentGradientUpdate*(numGradStep /denGradStep) 
451
            
452
            newVel = self.momentum * v - stepToGradientDirection
453
            
454
            if self.momentumType == 0 : 
455
                updateToParam = newVel
456
            else : 
457
                updateToParam = self.momentum*newVel - stepToGradientDirection
458
               
459
            updates.append((accu, accu_new))
460
            updates.append((v, newVel)) 
461
            updates.append((param, param + updateToParam))
462
            
463
        return updates
464
        
465
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
466
        
467
    """ ------ Get trainable parameters --------- """
468
    def getTrainable_Params(_self):
469
        trainable_Params = []
470
        numberTrain_ParamsLayer = [] 
471
        for l_i in xrange(0, len(_self.networkLayers) ) :
472
            trainable_Params = trainable_Params + _self.networkLayers[l_i].params
473
            numberTrain_ParamsLayer.append(_self.networkLayers[l_i].numberOfTrainableParams) # TODO: Get this directly as len(_self.networkLayers[l_i].params)
474
            
475
        return trainable_Params,numberTrain_ParamsLayer
476
477
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
478
    
479
    def initTrainingParameters(self,
480
                               costFunction,
481
                               L1_reg_C,
482
                               L2_reg_C,
483
                               learning_rate,
484
                               momentumType,
485
                               momentumValue,
486
                               momentumNormalized,
487
                               optimizerType,
488
                               rho_RMSProp,
489
                               epsilon_RMSProp
490
                               ) :
491
492
        print(" ------- Initializing network training parameters...........")
493
        self.numberOfEpochsTrained = 0
494
495
        self.L1_reg_C = L1_reg_C
496
        self.L2_reg_C = L2_reg_C
497
498
        # Set Learning rate and store the last epoch where it was modified
499
        self.initialLearningRate = learning_rate
500
501
        # TODO: Check the shared variables from learning rates
502
        self.learning_rate.set_value(self.initialLearningRate[0])          
503
        
504
505
        # Set momentum type and values
506
        self.momentumType = momentumType
507
        self.initialMomentumValue = momentumValue
508
        self.momentumNormalized = momentumNormalized
509
        self.momentum.set_value(self.initialMomentumValue)
510
        
511
        # Optimizers
512
        if (optimizerType == 2):
513
            optimizerType = 1
514
            
515
        def SGD():
516
            print (" --- Optimizer: Stochastic gradient descent (SGD)")
517
            self.optimizerType = optimizerType
518
519
        def RMSProp():
520
            print (" --- Optimizer: RMS Prop")
521
            self.optimizerType = optimizerType
522
            self.rho_RMSProp = rho_RMSProp
523
            self.epsilon_RMSProp = epsilon_RMSProp
524
       
525
        # TODO. Include more optimizers here
526
        optionsOptimizer = {0 : SGD,
527
                            1 : RMSProp}
528
        
529
        optionsOptimizer[optimizerType]()
530
                              
531
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
532
        
533
    def updateParams_BatchNorm(self) : 
534
        updatesForBnRollingAverage = []
535
        for l_i in xrange(0, len(self.networkLayers) ) :
536
            currentLayer = self.networkLayers[l_i]
537
            updatesForBnRollingAverage.extend( currentLayer.getUpdatesForBnRollingAverage() ) 
538
        return updatesForBnRollingAverage
539
540
    # ------------------------------------------------------------------------------------ #
541
    # ---------------------------     Compile the Theano functions     ------------------- #
542
    # ------------------------------------------------------------------------------------ #
543
    def compileTheanoFunctions(self):
544
        print(" ----------------- Starting compilation process ----------------- ")        
545
        
546
        # ------- Create and initialize sharedVariables needed to compile the training function ------ #
547
        # -------------------------------------------------------------------------------------------- #
548
        # For training 
549
        self.trainingData_x = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
550
        self.trainingData_y = theano.shared(np.zeros([1,1,1,1], dtype="float32") , borrow = True)  
551
        
552
        # For testing 
553
        self.testingData_x = theano.shared(np.zeros([1,1,1,1,1], dtype="float32"), borrow = True)
554
        
555
        x_Train = self.inputNetwork_Train
556
        x_Test  = self.inputNetwork_Test
557
        y_Train = T.itensor4('y')
558
        
559
        # Allocate symbolic variables for the data
560
        index_Train = T.lscalar()
561
        index_Test  = T.lscalar()
562
        
563
        # ------- Needed to compile the training function ------ #
564
        # ------------------------------------------------------ #
565
        trainingData_y_CastedToInt   = T.cast( self.trainingData_y, 'int32') 
566
        
567
        # To accomodate the weights in the cost function to account for class imbalance
568
        weightsOfClassesInCostFunction = T.fvector()  
569
        weightPerClass = T.fvector() 
570
        
571
        # --------- Get trainable parameters (to be fit by gradient descent) ------- #
572
        # -------------------------------------------------------------------------- #
573
        
574
        [paramsTraining, numberParamsPerLayer] = self.getTrainable_Params()
575
        
576
        # ------------------ Define the cost function --------------------- #
577
        # ----------------------------------------------------------------- #
578
        def negLogLikelihood():
579
            print (" --- Cost function: negativeLogLikelihood")
580
            
581
            costInLastLayer = self.lastLayer.negativeLogLikelihoodWeighted(y_Train,weightPerClass)
582
            return costInLastLayer
583
            
584
        def NotDefined():
585
            print (" --- Cost function: Not defined!!!!!! WARNING!!!")
586
587
        optionsCostFunction = {0 : negLogLikelihood,
588
                               1 : NotDefined}
589
590
        costInLastLayer = optionsCostFunction[self.costFunction]()
591
        
592
        # --------------------------- Get costs --------------------------- #
593
        # ----------------------------------------------------------------- #
594
        # Get L1 and L2 weights regularization
595
        costL1 = 0
596
        costL2 = 0
597
        
598
        # Compute the costs
599
        for l_i in xrange(0, len(self.networkLayers)) :    
600
                costL1 += abs(self.networkLayers[l_i].W).sum()
601
                costL2 += (self.networkLayers[l_i].W ** 2).sum()
602
        
603
        # Add also the cost of the last layer                     
604
        cost = (costInLastLayer
605
                + self.L1_reg_C * costL1
606
                + self.L2_reg_C * costL2)
607
608
        # --------------------- Include all trainable parameters in updates (for optimization) ---------------------- #
609
        # ----------------------------------------------------------------------------------------------------------- #
610
        updates = self.getUpdatesOfTrainableParameters(cost, paramsTraining, numberParamsPerLayer)
611
        
612
        # --------------------- Include batch normalization params ---------------------- #
613
        # ------------------------------------------------------------------------------- #
614
        updates = updates + self.updateParams_BatchNorm()
615
616
        # For the testing function we need to get the Feature maps activations
617
        featMapsActivations = []
618
        lower_act = 0
619
        upper_act = 9999
620
        
621
        # TODO: Change to output_Test
622
        for l_i in xrange(0,len(self.networkLayers)):
623
            featMapsActivations.append(self.networkLayers[l_i].outputTest[:, lower_act : upper_act, :, :, :])
624
625
        # For the last layer get the predicted probabilities (p_y_given_x_test)
626
        featMapsActivations.append(self.lastLayer.p_y_given_x_test)
627
628
        # --------------------- Preparing data to compile the functions ---------------------- #
629
        # ------------------------------------------------------------------------------------ #
630
        
631
        givensDataSet_Train = { x_Train: self.trainingData_x[index_Train * self.batch_Size: (index_Train + 1) * self.batch_Size],
632
                                y_Train: trainingData_y_CastedToInt[index_Train * self.batch_Size: (index_Train + 1) * self.batch_Size],
633
                                weightPerClass: weightsOfClassesInCostFunction }
634
635
       
636
        givensDataSet_Test  = { x_Test: self.testingData_x[index_Test * self.batch_Size: (index_Test + 1) * self.batch_Size] }
637
        
638
        print(" ...Compiling the training function...")
639
        
640
        self.networkModel_Train = theano.function(
641
                                    [index_Train, weightsOfClassesInCostFunction],
642
                                    #[cost] + self.lastLayer.doEvaluation(y_Train),
643
                                    [cost],
644
                                    updates=updates,
645
                                    givens = givensDataSet_Train
646
                                    )
647
                          
648
        print(" ...The training function was compiled...")
649
650
        #self.getProbabilities = theano.function(
651
                         #[index],
652
                         #self.lastLayer.p_y_given_x_Train,
653
                         #givens={
654
                            #x: self.trainingData_x[index * _self.batch_size: (index + 1) * _self.batch_size]
655
                         #}
656
         #)
657
     
658
659
        print(" ...Compiling the testing function...")
660
        self.networkModel_Test = theano.function(
661
                                  [index_Test],
662
                                  featMapsActivations,
663
                                  givens = givensDataSet_Test
664
                                  )
665
        print(" ...The testing function was compiled...")
666
# -------------------------------------------------------------------- END Function ------------------------------------------------------------------- #
667
668
####### Function to generate the CNN #########
669
670
    def createNetwork(self,
671
                      networkName, 
672
                      folderName,
673
                      cnnLayers,
674
                      kernel_Shapes,
675
                      intermediate_ConnectedLayers,
676
                      n_classes,
677
                      sampleSize_Train,
678
                      sampleSize_Test,
679
                      batch_Size,
680
                      applyBatchNorm,
681
                      numberEpochToApplyBatchNorm,
682
                      activationType,
683
                      dropout_Rates,
684
                      pooling_Params,
685
                      weights_Initialization_CNN,
686
                      weights_Initialization_FCN,
687
                      weightsFolderName,
688
                      weightsTrainedIdx,
689
                      softmax_Temp
690
                      ):
691
692
        # ============= Model Parameters Passed as arguments ================
693
        # Assign parameters:
694
        self.networkName = networkName
695
        self.folderName = folderName
696
        self.cnnLayers = cnnLayers
697
        self.n_classes = n_classes
698
        self.kernel_Shapes = kernel_Shapes
699
        self.intermediate_ConnectedLayers = intermediate_ConnectedLayers
700
        self.pooling_scales = pooling_Params
701
        self.dropout_Rates = dropout_Rates
702
        self.activationType = activationType
703
        self.weight_Initialization_CNN = weights_Initialization_CNN
704
        self.weight_Initialization_FCN = weights_Initialization_FCN
705
        self.weightsFolderName = weightsFolderName
706
        self.weightsTrainedIdx = weightsTrainedIdx
707
        self.batch_Size = batch_Size
708
        self.sampleSize_Train = sampleSize_Train
709
        self.sampleSize_Test = sampleSize_Test
710
        self.applyBatchNorm = applyBatchNorm
711
        self.numberEpochToApplyBatchNorm = numberEpochToApplyBatchNorm
712
        self.softmax_Temp = softmax_Temp
713
714
        # Compute the CNN receptive field
715
        stride = 1;
716
        self.receptiveField = computeReceptiveField(self.kernel_Shapes, stride)
717
718
        # --- Size of Image samples ---
719
        self.sampleSize_Train = sampleSize_Train
720
        self.sampleSize_Test = sampleSize_Test
721
        
722
        ## --- Batch Size ---
723
        self.batch_Size = batch_Size
724
725
        # ======== Calculated Attributes =========
726
        self.centralVoxelsTrain = getCentralVoxels(self.sampleSize_Train, self.receptiveField) 
727
        self.centralVoxelsTest = getCentralVoxels(self.sampleSize_Test, self.receptiveField) 
728
        
729
        #==============================
730
        rng = numpy.random.RandomState(23455)
731
732
        # Transfer to LIVIA NET
733
        self.sampleSize_Train = sampleSize_Train
734
        self.sampleSize_Test = sampleSize_Test
735
        
736
        # --------- Now we build the model -------- #
737
738
        print("...[STATUS]: Building the Network model...")
739
        
740
        # Define the symbolic variables used as input of the CNN
741
        # start-snippet-1
742
        # Define tensor5
743
        tensor5 = T.TensorType(dtype='float32', broadcastable=(False, False, False, False, False))
744
        self.inputNetwork_Train = tensor5() 
745
        self.inputNetwork_Test = tensor5()
746
747
        # Define input shapes to the netwrok
748
        inputSampleShape_Train = (self.batch_Size, 1, self.sampleSize_Train[0], self.sampleSize_Train[1], self.sampleSize_Train[2])
749
        inputSampleShape_Test = (self.batch_Size, 1, self.sampleSize_Test[0], self.sampleSize_Test[1], self.sampleSize_Test[2])
750
751
        print (" - Shape of input subvolume (Training): {}".format(inputSampleShape_Train))
752
        print (" - Shape of input subvolume (Testing): {}".format(inputSampleShape_Test))
753
754
        inputSample_Train = self.inputNetwork_Train
755
        inputSample_Test = self.inputNetwork_Test
756
757
        # TODO change cnnLayers name by networkLayers
758
        self.generateNetworkLayers(cnnLayers,
759
                                   kernel_Shapes,
760
                                   self.pooling_scales,
761
                                   inputSampleShape_Train,
762
                                   inputSampleShape_Test,
763
                                   inputSample_Train,
764
                                   inputSample_Test,
765
                                   intermediate_ConnectedLayers)      
766
767
    # Release Data from GPU
768
    def releaseGPUData(self) :
769
        # GPU NOTE: Remove the input values to avoid copying data to the GPU
770
        
771
        # Image Data
772
        self.trainingData_x.set_value(np.zeros([1,1,1,1,1], dtype="float32"))
773
        self.testingData_x.set_value(np.zeros([1,1,1,1,1], dtype="float32"))
774
775
        # Labels
776
        self.trainingData_y.set_value(np.zeros([1,1,1,1], dtype="float32"))
777