Switch to unified view

a b/PathCNN_GradCAM_modeling.py
1
from __future__ import print_function
2
import keras
3
from keras.datasets import mnist
4
from keras.models import Sequential
5
from keras.layers import Dense, Dropout, Flatten
6
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Input
7
from keras import backend as K
8
import pandas as pd
9
import numpy as np
10
from sklearn.metrics import roc_auc_score
11
from sklearn.utils import class_weight
12
#from matplotlib import pyplot as plt
13
from scipy.cluster.hierarchy import dendrogram, linkage
14
from keras import regularizers
15
from keras.layers.core import *
16
from keras.models import Model
17
from keras.optimizers import Adam
18
from sklearn.metrics import confusion_matrix
19
20
pca_exp = pd.read_excel("data/PCA_EXP.xlsx", header=None)
21
pca_cnv = pd.read_excel("data/PCA_CNV.xlsx", header=None)
22
pca_mt = pd.read_excel("data/PCA_MT.xlsx", header=None)
23
clinical = pd.read_excel("data/Clinical.xlsx")
24
25
n = len(pca_exp)  # sample size: number of Pts
26
path_n = 146  # number of pathways
27
pc = 5  # number of PCs
28
29
# data creation-EXP
30
pca_exp = pca_exp.to_numpy()
31
exp_data = np.zeros((n, path_n, pc))
32
for i in range(n):
33
    for j in range(path_n):
34
        exp_data[i, j, :] = pca_exp[i, j * pc:(j + 1) * pc]
35
36
# data creation-CNV
37
pca_cnv = pca_cnv.to_numpy()
38
cnv_data = np.zeros((n, path_n, pc))
39
for i in range(n):
40
    for j in range(path_n):
41
        cnv_data[i, j, :] = pca_cnv[i, j * pc:(j + 1) * pc]
42
43
# data creation-MT
44
pca_mt = pca_mt.to_numpy()
45
mt_data = np.zeros((n, path_n, pc))
46
for i in range(n):
47
    for j in range(path_n):
48
        mt_data[i, j, :] = pca_mt[i, j * pc:(j + 1) * pc]
49
50
# data merge: mRNA expression, CNV, and MT with a specific number of PCs
51
no_pc = 2  # use the first 2PCs among 5 PCs
52
all_data = np.zeros((n, path_n, no_pc * 3))
53
for i in range(n):
54
        all_data[i, :, :] = np.concatenate((exp_data[i, :, 0:no_pc], cnv_data[i, :, 0:no_pc], mt_data[i, :, 0:no_pc]),axis=1)
55
56
clinical = clinical.to_numpy()
57
age = clinical[:, 4]
58
survival = clinical[:, 5]
59
os_months = clinical[:, 6]
60
idx0 = np.where((survival == 1) & (os_months <= 24))
61
idx1 = np.where(os_months > 24)
62
63
bio_data=clinical[:, 7:10]
64
all_data = all_data[:, :, :]
65
66
data_0 = all_data[idx0, :, :]
67
data_0 = data_0[0, :, :, :]
68
data_1 = all_data[idx1, :, :]
69
data_1 = data_1[0, :, :, :]
70
71
age_0 = age[idx0]
72
age_1 = age[idx1]
73
74
bio_data_0 = bio_data[idx0, : ]
75
bio_data_0 = bio_data_0[0, :, :]
76
bio_data_1 = bio_data[idx1,:]
77
bio_data_1 = bio_data_1[0, :, :]
78
79
outcomes_0 = np.zeros(len(idx0[0]))
80
outcomes_1 = np.ones(len(idx1[0]))
81
82
## data merge
83
age_r = np.concatenate((age_0, age_1))
84
data = np.concatenate((data_0, data_1))
85
outcomes = np.concatenate((outcomes_0, outcomes_1))
86
bio_data_r=np.concatenate((bio_data_0, bio_data_1))
87
88
## DEEP LEARNING
89
batch_size = 64
90
epochs = 30
91
92
num_classes = 2
93
num_runs = 30
94
k_fold = 5
95
96
# input image dimension
97
img_rows, img_cols = 146, 6
98
99
# use all data for modeling
100
train1 = train = range(data.shape[0])
101
test = range(data.shape[0])
102
validation = range(data.shape[0])
103
104
x_train = data[train, :, :]
105
y_train = outcomes[train]
106
107
x_validation = data[validation, :, :]
108
y_validation = outcomes[validation]
109
110
x_test = data[test, :, :]
111
y_test = outcomes[test]
112
113
# age
114
age_r1=age_r[train1]
115
age_train = age_r1[train]
116
age_validation = age_r1[validation]
117
age_test = age_r[test]
118
119
# bio
120
bio_data_r1 = bio_data_r[train1, :]
121
bio_data_train = bio_data_r1[train,:]
122
bio_data_validation = bio_data_r1[validation, :]
123
bio_data_test = bio_data_r[test,:]
124
125
cli_train=np.concatenate((np.matrix([age_train]).T, bio_data_train), axis=1)
126
cli_validation = np.concatenate((np.matrix([age_validation]).T, bio_data_validation), axis=1)
127
cli_test = np.concatenate((np.matrix([age_test]).T, bio_data_test), axis=1)
128
129
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)  # rgb- add one more dimensionality
130
x_validation = x_validation.reshape(x_validation.shape[0], img_rows, img_cols, 1)
131
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
132
input_shape = (img_rows, img_cols, 1)
133
134
x_train = x_train.astype('float32')
135
x_validation = x_validation.astype('float32')
136
x_test = x_test.astype('float32')
137
138
print('x_train shape:', x_train.shape)
139
print(x_train.shape[0], 'train samples')
140
print(x_validation.shape[0], 'validation samples')
141
print(x_test.shape[0], 'test samples')
142
143
y_train = keras.utils.to_categorical(y_train, num_classes)
144
y_validation = keras.utils.to_categorical(y_validation, num_classes)
145
y_test = keras.utils.to_categorical(y_test, num_classes)
146
147
image_input = Input(shape=input_shape)
148
other_data_input = Input(shape=(1,))
149
150
# First convolution
151
conv1 = Conv2D(32, kernel_size=(3, 3),
152
                activation='relu', padding='same'
153
                )(image_input)
154
# Second Convolution
155
conv2 = Conv2D(64, (3, 3), activation='relu', padding='same'
156
                )(conv1)
157
conv2 = MaxPooling2D(pool_size=(4, 2))(conv2)
158
conv2 = Dropout(0.25)(conv2)
159
first_part_output = Flatten()(conv2)
160
161
#merged_model = keras.layers.concatenate([first_part_output, other_data_input])
162
# without age
163
merged_model = first_part_output
164
merged_model = Dense(64, activation='relu')(merged_model)
165
merged_model = Dropout(0.5)(merged_model)
166
167
predictions = Dense(num_classes, activation='softmax')(merged_model)
168
169
# Now create the model
170
model = Model(inputs=[image_input, other_data_input], outputs=predictions)
171
model.summary()
172
layers = model.layers
173
174
lr = 0.0001
175
beta_1 = 0.9
176
beta_2 = 0.999
177
optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2)
178
179
model.compile(optimizer=optimizer, loss='binary_crossentropy',
180
              metrics=['accuracy'])
181
182
class_weighting = {0: 1, 1: 4.2}  # class weight
183
model.fit([x_train, age_train], [y_train],
184
          batch_size=batch_size,
185
          epochs=epochs,
186
          verbose=1, class_weight=class_weighting,
187
          validation_data=([x_validation, age_validation], [y_validation]))
188
189
score = model.predict([x_test, age_test], verbose=0)
190
model.save('PathCNN_model.h5')
191
192
observed = y_test
193
predicted = score
194
195
observed = y_test
196
predicted = score
197
198
auc = roc_auc_score(observed[:,0], predicted[:,0])
199
tn, fp, fn, tp = confusion_matrix(observed[:,0]>0.5, predicted[:,0]>0.5).ravel()
200
sen = tn/(tn+fp)
201
spe = tp/(tp+fn)
202
acc = (tp+tn)/(tp+tn+fp+fn)
203
print(f"AUC  Sen. Spe. Acc.\n{auc:0.2f} {sen:0.2f} {spe:0.2f} {acc:0.2f}")