Diff of /PathCNN.py [000000] .. [295c34]

Switch to unified view

a b/PathCNN.py
1
from __future__ import print_function
2
import keras
3
from keras.datasets import mnist
4
from keras.models import Sequential
5
from keras.layers import Dense, Dropout, Flatten
6
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Input
7
from keras import backend as K
8
import pandas as pd
9
import numpy as np
10
from sklearn.model_selection import StratifiedKFold
11
from sklearn.model_selection import train_test_split
12
from sklearn.metrics import roc_auc_score
13
from sklearn.utils import class_weight
14
#from matplotlib import pyplot as plt
15
from scipy.cluster.hierarchy import dendrogram, linkage
16
from keras import regularizers
17
from keras.layers.core import *
18
from keras.models import Model
19
from keras.optimizers import Adam
20
from sklearn.metrics import confusion_matrix
21
22
pca_exp = pd.read_excel("data/PCA_EXP.xlsx", header=None)
23
pca_cnv = pd.read_excel("data/PCA_CNV.xlsx", header=None)
24
pca_mt = pd.read_excel("data/PCA_MT.xlsx", header=None)
25
clinical = pd.read_excel("data/Clinical.xlsx")
26
27
n = len(pca_exp)  # sample size: number of Pts
28
path_n = 146  # number of pathways
29
pc = 5  # number of PCs
30
31
# data creation-EXP
32
pca_exp = pca_exp.to_numpy()
33
exp_data = np.zeros((n, path_n, pc))
34
for i in range(n):
35
    for j in range(path_n):
36
        exp_data[i, j, :] = pca_exp[i, j * pc:(j + 1) * pc]
37
38
# data creation-CNV
39
pca_cnv = pca_cnv.to_numpy()
40
cnv_data = np.zeros((n, path_n, pc))
41
for i in range(n):
42
    for j in range(path_n):
43
        cnv_data[i, j, :] = pca_cnv[i, j * pc:(j + 1) * pc]
44
45
# data creation-MT
46
pca_mt = pca_mt.to_numpy()
47
mt_data = np.zeros((n, path_n, pc))
48
for i in range(n):
49
    for j in range(path_n):
50
        mt_data[i, j, :] = pca_mt[i, j * pc:(j + 1) * pc]
51
52
# data merge: mRNA expression, CNV, and MT with a specific number of PCs
53
no_pc = 2  # use the first 2PCs among 5 PCs
54
all_data = np.zeros((n, path_n, no_pc * 3))
55
for i in range(n):
56
        all_data[i, :, :] = np.concatenate((exp_data[i, :, 0:no_pc], cnv_data[i, :, 0:no_pc], mt_data[i, :, 0:no_pc]),axis=1)
57
58
clinical = clinical.to_numpy()
59
age = clinical[:, 4]
60
survival = clinical[:, 5]
61
os_months = clinical[:, 6]
62
idx0 = np.where((survival == 1) & (os_months <= 24))
63
idx1 = np.where(os_months > 24)
64
65
bio_data=clinical[:, 7:10]
66
all_data = all_data[:, :, :]
67
68
data_0 = all_data[idx0, :, :]
69
data_0 = data_0[0, :, :, :]
70
data_1 = all_data[idx1, :, :]
71
data_1 = data_1[0, :, :, :]
72
73
age_0 = age[idx0]
74
age_1 = age[idx1]
75
76
bio_data_0 = bio_data[idx0, : ]
77
bio_data_0 = bio_data_0[0, :, :]
78
bio_data_1 = bio_data[idx1,:]
79
bio_data_1 = bio_data_1[0, :, :]
80
81
outcomes_0 = np.zeros(len(idx0[0]))
82
outcomes_1 = np.ones(len(idx1[0]))
83
84
## data merge
85
age_r = np.concatenate((age_0, age_1))
86
data = np.concatenate((data_0, data_1))
87
outcomes = np.concatenate((outcomes_0, outcomes_1))
88
bio_data_r=np.concatenate((bio_data_0, bio_data_1))
89
90
## DEEP LEARNING
91
batch_size = 64
92
epochs = 30
93
94
num_classes = 2
95
num_runs = 30
96
k_fold = 5
97
98
# input image dimension
99
img_rows, img_cols = 146, 6
100
101
auc = []
102
103
all_observed = np.array([])
104
all_predicted = np.array([])
105
106
for i in range(num_runs):
107
    print(i)
108
    # 5-fold cross validation
109
    skf = StratifiedKFold(n_splits=5)
110
111
    observed = np.array([])
112
    predicted = np.array([])
113
114
    observed = np.array([])
115
    predicted = np.array([])
116
117
    for train1, test in skf.split(data,outcomes):
118
        skf1 = StratifiedKFold(n_splits=5)
119
        train_data=data[train1,:,:]
120
        train_outcomes=outcomes[train1]
121
122
        for train, validation in skf1.split(train_data, train_outcomes):
123
            break
124
125
        x_train = train_data[train, :, :]
126
        y_train = train_outcomes[train]
127
128
        x_validation = train_data[validation, :, :]
129
        y_validation = train_outcomes[validation]
130
131
        x_test = data[test, :, :]
132
        y_test = outcomes[test]
133
134
        # age
135
        age_r1=age_r[train1]
136
        age_train = age_r1[train]
137
        age_validation = age_r1[validation]
138
        age_test = age_r[test]
139
140
        # bio
141
        bio_data_r1 = bio_data_r[train1, :]
142
        bio_data_train = bio_data_r1[train,:]
143
        bio_data_validation = bio_data_r1[validation, :]
144
        bio_data_test = bio_data_r[test,:]
145
146
        cli_train=np.concatenate((np.matrix([age_train]).T, bio_data_train), axis=1)
147
        cli_validation = np.concatenate((np.matrix([age_validation]).T, bio_data_validation), axis=1)
148
        cli_test = np.concatenate((np.matrix([age_test]).T, bio_data_test), axis=1)
149
150
        x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)  # rgb- add one more dimensionality
151
        x_validation = x_validation.reshape(x_validation.shape[0], img_rows, img_cols, 1)
152
        x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
153
        input_shape = (img_rows, img_cols, 1)
154
155
        x_train = x_train.astype('float32')
156
        x_validation = x_validation.astype('float32')
157
        x_test = x_test.astype('float32')
158
159
        print('x_train shape:', x_train.shape)
160
        print(x_train.shape[0], 'train samples')
161
        print(x_validation.shape[0], 'validation samples')
162
        print(x_test.shape[0], 'test samples')
163
164
        y_train = keras.utils.to_categorical(y_train, num_classes)
165
        y_validation = keras.utils.to_categorical(y_validation, num_classes)
166
        y_test = keras.utils.to_categorical(y_test, num_classes)
167
168
        image_input = Input(shape=input_shape)
169
        other_data_input = Input(shape=(1,))
170
171
        # First convolution
172
        conv1 = Conv2D(32, kernel_size=(3, 3),
173
                       activation='relu', padding='same'
174
                       )(image_input)
175
        # Second Convolution
176
        conv2 = Conv2D(64, (3, 3), activation='relu', padding='same'
177
                       )(conv1)
178
        conv2 = MaxPooling2D(pool_size=(4, 2))(conv2)
179
        conv2 = Dropout(0.25)(conv2)
180
        first_part_output = Flatten()(conv2)
181
182
        merged_model = keras.layers.concatenate([first_part_output, other_data_input])
183
        merged_model = Dense(64, activation='relu')(merged_model)
184
        merged_model = Dropout(0.5)(merged_model)
185
186
        predictions = Dense(num_classes, activation='softmax')(merged_model)
187
188
        # Now create the model
189
        model = Model(inputs=[image_input, other_data_input], outputs=predictions)
190
        model.summary()
191
        layers = model.layers
192
193
        lr = 0.0001
194
        beta_1 = 0.9
195
        beta_2 = 0.999
196
        optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2)
197
198
        model.compile(optimizer=optimizer, loss='binary_crossentropy',
199
                      metrics=['accuracy'])
200
201
        class_weighting = {0: 1, 1: 4.2}  # class weight
202
        model.fit([x_train, age_train], [y_train],
203
                  batch_size=batch_size,
204
                  epochs=epochs,
205
                  verbose=1, class_weight=class_weighting,
206
                  validation_data=([x_validation, age_validation], [y_validation]))
207
208
        score = model.predict([x_test, age_test], verbose=0)
209
        
210
211
        if len(observed) == 0:
212
            observed = y_test
213
            predicted = score
214
        else:
215
            observed = np.concatenate((observed, y_test))
216
            predicted = np.concatenate((predicted, score))
217
218
    auc.append(roc_auc_score(observed[:, 0], predicted[:, 0]))
219
220
    if len(all_observed) == 0:
221
        all_observed = observed[:, 0]
222
        all_predicted = predicted[:, 0]
223
    else:
224
        all_observed = np.concatenate((all_observed, observed[:, 0]))
225
        all_predicted = np.concatenate((all_predicted, predicted[:, 0]))
226
227
    print(auc)
228
print(auc)
229
pd.DataFrame(auc).to_csv("results.csv")