|
a |
|
b/GP/CNN/cnn.py |
|
|
1 |
from scipy.stats.stats import pearsonr |
|
|
2 |
import pandas as pd |
|
|
3 |
import numpy as np |
|
|
4 |
from keras import backend as K |
|
|
5 |
from keras.models import Sequential |
|
|
6 |
from keras.layers import Dense, Activation |
|
|
7 |
from keras.layers import Dropout |
|
|
8 |
from keras import regularizers |
|
|
9 |
from keras.activations import relu, elu, linear, softmax, tanh, softplus |
|
|
10 |
from keras.callbacks import EarlyStopping, Callback |
|
|
11 |
from keras.wrappers.scikit_learn import KerasRegressor |
|
|
12 |
from keras.optimizers import Adam, Nadam, sgd,Adadelta, RMSprop |
|
|
13 |
from keras.losses import mean_squared_error, categorical_crossentropy, logcosh |
|
|
14 |
from keras.utils.np_utils import to_categorical |
|
|
15 |
from keras import metrics |
|
|
16 |
|
|
|
17 |
#keras to CNN |
|
|
18 |
from keras.layers import Flatten, Conv1D, MaxPooling1D |
|
|
19 |
# defining network |
|
|
20 |
from keras.layers import Flatten, Conv1D, MaxPooling1D |
|
|
21 |
from keras import regularizers |
|
|
22 |
#keras Load Model |
|
|
23 |
from keras.models import load_model |
|
|
24 |
|
|
|
25 |
import talos as ta |
|
|
26 |
import wrangle as wr |
|
|
27 |
from talos.metrics.keras_metrics import fmeasure_acc |
|
|
28 |
from talos.model.layers import hidden_layers |
|
|
29 |
from talos import live |
|
|
30 |
from talos.model import lr_normalizer, early_stopper, hidden_layers |
|
|
31 |
import os |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
#custom metric |
|
|
35 |
def acc_pearson_r(y_true, y_pred): |
|
|
36 |
x = y_true |
|
|
37 |
y = y_pred |
|
|
38 |
mx = K.mean(x, axis=0) |
|
|
39 |
my = K.mean(y, axis=0) |
|
|
40 |
xm, ym = x - mx, y - my |
|
|
41 |
r_num = K.sum(xm * ym) |
|
|
42 |
x_square_sum = K.sum(xm * xm) |
|
|
43 |
y_square_sum = K.sum(ym * ym) |
|
|
44 |
r_den = K.sqrt(x_square_sum * y_square_sum) |
|
|
45 |
r = r_num / r_den |
|
|
46 |
return K.mean(r) |
|
|
47 |
|
|
|
48 |
def correlation_coefficient_loss(y_true, y_pred): |
|
|
49 |
x = y_true |
|
|
50 |
y = y_pred |
|
|
51 |
mx = K.mean(x) |
|
|
52 |
my = K.mean(y) |
|
|
53 |
xm, ym = x-mx, y-my |
|
|
54 |
r_num = K.sum(tf.multiply(xm,ym)) |
|
|
55 |
r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym)))) |
|
|
56 |
r = r_num / r_den |
|
|
57 |
r = K.maximum(K.minimum(r, 1.0), -1.0) |
|
|
58 |
return 1 - K.square(r) |
|
|
59 |
|
|
|
60 |
# nStride=3 # stride between convolutions |
|
|
61 |
# nFilter=32 # no. of convolutions |
|
|
62 |
|
|
|
63 |
|
|
|
64 |
def cnn_main(x, y, x_val, y_val, params): |
|
|
65 |
# next we can build the model exactly like we would normally do it |
|
|
66 |
# Instantiate |
|
|
67 |
model_cnn = Sequential() |
|
|
68 |
nSNP = x.shape[1] |
|
|
69 |
try: |
|
|
70 |
out_c = y.shape[1] |
|
|
71 |
except IndexError: |
|
|
72 |
out_c = 1 |
|
|
73 |
x = np.expand_dims(x, axis=2) |
|
|
74 |
x_val = np.expand_dims(x_val, axis=2) |
|
|
75 |
# add convolutional layer |
|
|
76 |
|
|
|
77 |
if (params['nconv']==1): |
|
|
78 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
79 |
strides=params['nStride'], input_shape=(nSNP, 1), |
|
|
80 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
81 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
82 |
|
|
|
83 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
84 |
# Solutions above are linearized to accommodate a standard layer |
|
|
85 |
|
|
|
86 |
else: |
|
|
87 |
for _ in range(params['nconv']): |
|
|
88 |
if (_==0): |
|
|
89 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
90 |
strides=params['nStride'], input_shape=(nSNP, 1), |
|
|
91 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
92 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
93 |
|
|
|
94 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
95 |
# Solutions above are linearized to accommodate a standard layer |
|
|
96 |
else: |
|
|
97 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
98 |
strides=params['nStride'], |
|
|
99 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
100 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
101 |
|
|
|
102 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
103 |
|
|
|
104 |
model_cnn.add(Flatten()) |
|
|
105 |
|
|
|
106 |
if (params['hidden_layers'] != 0): |
|
|
107 |
# if we want to also test for number of layers and shapes, that's possible |
|
|
108 |
for _ in range(params['hidden_layers']): |
|
|
109 |
model_cnn.add(Dense(params['hidden_neurons'], activation=params['activation_2'], |
|
|
110 |
kernel_regularizer=regularizers.l2(params['reg2']))) |
|
|
111 |
model_cnn.add(Dropout(params['dropout_2'])) |
|
|
112 |
|
|
|
113 |
model_cnn.add(Dense(out_c, activation=params['last_activation'], kernel_regularizer=regularizers.l2(params['reg3']) |
|
|
114 |
)) |
|
|
115 |
if params['optimizer']=='Adam': |
|
|
116 |
params['optimizer']= Adam |
|
|
117 |
if params['optimizer']=='Nadam': |
|
|
118 |
params['optimizer']= Nadam |
|
|
119 |
if params['optimizer']=='sgd': |
|
|
120 |
params['optimizer']= sgd |
|
|
121 |
model_cnn.compile(loss=mean_squared_error, |
|
|
122 |
optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), |
|
|
123 |
metrics=[acc_pearson_r]) |
|
|
124 |
|
|
|
125 |
# simple early stopping |
|
|
126 |
# if you monitor is an accuracy parameter (pearson here, you should chose mode="max"), otherwise it would be "min" |
|
|
127 |
# 7/08/2019 change mean_squared_error here by acc_pearson_r and mode='min' by mode='max' |
|
|
128 |
#es = EarlyStopping(monitor=acc_pearson_r, mode='max', verbose=1) |
|
|
129 |
|
|
|
130 |
# callbacks=[live()] see the output |
|
|
131 |
# callbacks= es to EarlyStopping |
|
|
132 |
|
|
|
133 |
out_cnn = model_cnn.fit(x, y, validation_split=0.2, |
|
|
134 |
verbose=0, batch_size=params['batch_size'], |
|
|
135 |
epochs=params['epochs'], callbacks=[live()]) |
|
|
136 |
|
|
|
137 |
return out_cnn, model_cnn |
|
|
138 |
|
|
|
139 |
|
|
|
140 |
#CNN main categories |
|
|
141 |
def cnn_main_cat(x, y, x_val, y_val, params): |
|
|
142 |
# next we can build the model exactly like we would normally do it |
|
|
143 |
# Instantiate |
|
|
144 |
model_cnn = Sequential() |
|
|
145 |
nSNP = x.shape[1] |
|
|
146 |
out_c= y.shape[1] |
|
|
147 |
x = np.expand_dims(x, axis=2) |
|
|
148 |
x_val = np.expand_dims(x_val, axis=2) |
|
|
149 |
# add convolutional layer |
|
|
150 |
if (params['nconv']==1): |
|
|
151 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
152 |
strides=params['nStride'], input_shape=(nSNP, 1), |
|
|
153 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
154 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
155 |
|
|
|
156 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
157 |
# Solutions above are linearized to accommodate a standard layer |
|
|
158 |
|
|
|
159 |
else: |
|
|
160 |
for _ in range(params['nconv']): |
|
|
161 |
if (_==0): |
|
|
162 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
163 |
strides=params['nStride'], input_shape=(nSNP, 1), |
|
|
164 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
165 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
166 |
|
|
|
167 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
168 |
# Solutions above are linearized to accommodate a standard layer |
|
|
169 |
else: |
|
|
170 |
model_cnn.add(Conv1D(params['nFilter'], kernel_size=params['kernel_size'], |
|
|
171 |
strides=params['nStride'], |
|
|
172 |
kernel_regularizer=regularizers.l2(params['reg2']), kernel_initializer='normal', |
|
|
173 |
activity_regularizer=regularizers.l1(params['reg1']),activation=params['activation_1'])) |
|
|
174 |
|
|
|
175 |
model_cnn.add(MaxPooling1D(pool_size=params['pool'])) |
|
|
176 |
|
|
|
177 |
model_cnn.add(Flatten()) |
|
|
178 |
if (params['hidden_layers'] != 0): |
|
|
179 |
# if we want to also test for number of layers and shapes, that's possible |
|
|
180 |
for _ in range(params['hidden_layers']): |
|
|
181 |
model_cnn.add(Dense(params['hidden_neurons'], activation=params['activation_1'], |
|
|
182 |
activity_regularizer=regularizers.l1(params['reg1']))) |
|
|
183 |
model_cnn.add(Dropout(params['dropout'])) |
|
|
184 |
|
|
|
185 |
model_cnn.add(Dense(out_c, activation='softmax')) |
|
|
186 |
|
|
|
187 |
if params['optimizer']=='Adam': |
|
|
188 |
params['optimizer']= Adam |
|
|
189 |
if params['optimizer']=='Nadam': |
|
|
190 |
params['optimizer']= Nadam |
|
|
191 |
if params['optimizer']=='sgd': |
|
|
192 |
params['optimizer']= sgd |
|
|
193 |
|
|
|
194 |
model_cnn.compile(loss='categorical_crossentropy', optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], |
|
|
195 |
params['optimizer'])),metrics=['accuracy']) |
|
|
196 |
|
|
|
197 |
# simple early stopping |
|
|
198 |
# if you monitor is an accuracy parameter (pearson here, you should chose mode="max"), otherwise it would be "min" |
|
|
199 |
#es = EarlyStopping(monitor=mean_squared_error, mode='min', verbose=1) |
|
|
200 |
|
|
|
201 |
# callbacks=[live()] see the output |
|
|
202 |
# callbacks= es to EarlyStopping |
|
|
203 |
|
|
|
204 |
out_cnn = model_cnn.fit(x, y, validation_split=0.2, |
|
|
205 |
verbose=0, batch_size=params['batch_size'], |
|
|
206 |
epochs=params['epochs']) |
|
|
207 |
|
|
|
208 |
return out_cnn, model_cnn |