[21363a]: / ensembling / NeuralNet.py

Download this file

145 lines (118 with data), 6.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# -*- coding: utf-8 -*-
"""
Created on Sat Aug 15 18:18:11 2015
@author: rc, alex
"""
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import roc_auc_score
from progressbar import Bar, ETA, Percentage, ProgressBar, RotatingMarker
from preprocessing.aux import delay_preds, delay_preds_2d
from utils.nn import buildNN
class NeuralNet(BaseEstimator,ClassifierMixin):
""" Ensembling with a Neural Network """
def __init__(self,ensemble,architecture,training_params,
partsTrain=1,partsTest=1,
delay=4000,skip=100,jump=None,subsample=1,
smallEpochs=2,majorEpochs=20,checkEveryEpochs=2,
verbose=True):
"""Init."""
### timecourse history parameters ###
# how many past time samples to include along with the most recent sample
self.delay = delay
# subsample above samples
self.skip = skip
# here can be set a custom subsampling scheme, it overrides previous params
self.jump = jump
### RAM saving ###
# due to RAM limitations the model is interchangeably trained on 'partsTrain' portions of the data
self.partsTrain = partsTrain
# also due to RAM limitations testing data has to be split into 'partsTest' parts
self.partsTest = partsTest
### training ###
# amounts of epochs to perform on the current portion of the training data
self.smallEpochs = smallEpochs
# amounts of major epochs to perform,
# i.e. on each major epoch a new portion of training data is obtained
self.majorEpochs = majorEpochs
# print AUC computed on test set every major epochs
self.checkEveryEpochs = checkEveryEpochs
# whether to calculate and print results during training
self.verbose = verbose
# used in bagging to set different starting points when subsampling the data
self.mdlNr = 0
self.subsample = subsample
self.architecture = architecture
self.ensemble = ensemble
self.training_params = training_params
def fit(self,X,y,Xtest=None,ytest=None):
"""Fit."""
input_dim = X.shape[1]
# set different data preparation schemes basing on what kind of NN is it
layers = [i.keys()[0] for i in self.architecture]
self.isCNN = 'Conv' in layers
self.isRecurrent = 'GRU' in layers or 'LSTM' in layers
if self.isCNN:
self.addDelay = delay_preds
self.training_params['num_strides'] = self.delay//self.skip
elif self.isRecurrent:
self.addDelay = delay_preds_2d
else:
input_dim *= self.delay/self.skip
input_dim = int( input_dim )
self.addDelay = delay_preds
# create the model
self.model = buildNN(self.architecture, self.training_params, input_dim)
widgets = ['Training : ', Percentage(), ' ', Bar(marker=RotatingMarker()),
' ', ETA(), ' ']
pbar = ProgressBar(widgets=widgets, maxval=self.majorEpochs)
pbar.start()
# train the model on a portion of training data; that portion is changed each majorEpoch
for majorEpoch in range(self.majorEpochs):
startingPoint = majorEpoch%self.partsTrain or self.mdlNr%self.partsTrain
if self.jump is not None:
trainData = self.addDelay(X, delay=self.delay, skip=self.skip,
subsample=self.partsTrain,start=startingPoint, jump=self.jump)
else:
trainData = self.addDelay(X, delay=self.delay, skip=self.skip,
subsample=self.partsTrain,start=startingPoint)
if self.isCNN:
trainData = trainData.reshape((trainData.shape[0],1,trainData.shape[1],1))
targets = y[startingPoint::self.partsTrain]
trainData = trainData[::self.subsample]
targets = targets[::self.subsample]
self.model.fit(trainData, targets, nb_epoch=self.smallEpochs,
batch_size=512,verbose=0,show_accuracy=True)
trainData=None
pbar.update(majorEpoch)
if self.verbose and majorEpoch%self.checkEveryEpochs == 0:
print("Total epochs: %d" % (self.smallEpochs*(majorEpoch+1)))
if Xtest is not None and ytest is not None:
pred = self._predict_proba_train(Xtest)
score = np.mean(roc_auc_score(ytest[0::self.partsTest],pred))
print("Test AUC : %.5f" % (score))
pred = None
if self.verbose:
print('Training finished after %d epochs'% (self.smallEpochs*(majorEpoch+1)))
def predict_proba(self,X):
"""Get predictions."""
pred = []
for part in range(self.partsTest):
start = part*len(X)//self.partsTest-self.delay*(part>0)
stop = (part+1)*len(X)//self.partsTest
testData = self.addDelay(X[slice(start,stop)], delay=self.delay, skip=self.skip,
jump=self.jump)[self.delay*(part>0):]
if self.isCNN:
testData = testData.reshape((testData.shape[0],1,testData.shape[1],1))
pred.append(self.model.predict_proba(testData, batch_size=512,verbose=0))
testData = None
pred = np.concatenate(pred)
return pred
def _predict_proba_train(self,X):
""" Only used internally during training - subsamples test data for speed """
testData = self.addDelay(X, delay=self.delay, skip=self.skip,subsample=self.partsTest,start=0,jump=self.jump)
if self.isCNN:
testData = testData.reshape((testData.shape[0],1,testData.shape[1],1))
pred = self.model.predict_proba(testData, batch_size=512,verbose=0)
testData = None
return pred