Switch to side-by-side view

--- a
+++ b/PathCNN_GradCAM_modeling.py
@@ -0,0 +1,203 @@
+from __future__ import print_function
+import keras
+from keras.datasets import mnist
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Flatten
+from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Input
+from keras import backend as K
+import pandas as pd
+import numpy as np
+from sklearn.metrics import roc_auc_score
+from sklearn.utils import class_weight
+#from matplotlib import pyplot as plt
+from scipy.cluster.hierarchy import dendrogram, linkage
+from keras import regularizers
+from keras.layers.core import *
+from keras.models import Model
+from keras.optimizers import Adam
+from sklearn.metrics import confusion_matrix
+
+pca_exp = pd.read_excel("data/PCA_EXP.xlsx", header=None)
+pca_cnv = pd.read_excel("data/PCA_CNV.xlsx", header=None)
+pca_mt = pd.read_excel("data/PCA_MT.xlsx", header=None)
+clinical = pd.read_excel("data/Clinical.xlsx")
+
+n = len(pca_exp)  # sample size: number of Pts
+path_n = 146  # number of pathways
+pc = 5  # number of PCs
+
+# data creation-EXP
+pca_exp = pca_exp.to_numpy()
+exp_data = np.zeros((n, path_n, pc))
+for i in range(n):
+    for j in range(path_n):
+        exp_data[i, j, :] = pca_exp[i, j * pc:(j + 1) * pc]
+
+# data creation-CNV
+pca_cnv = pca_cnv.to_numpy()
+cnv_data = np.zeros((n, path_n, pc))
+for i in range(n):
+    for j in range(path_n):
+        cnv_data[i, j, :] = pca_cnv[i, j * pc:(j + 1) * pc]
+
+# data creation-MT
+pca_mt = pca_mt.to_numpy()
+mt_data = np.zeros((n, path_n, pc))
+for i in range(n):
+    for j in range(path_n):
+        mt_data[i, j, :] = pca_mt[i, j * pc:(j + 1) * pc]
+
+# data merge: mRNA expression, CNV, and MT with a specific number of PCs
+no_pc = 2  # use the first 2PCs among 5 PCs
+all_data = np.zeros((n, path_n, no_pc * 3))
+for i in range(n):
+        all_data[i, :, :] = np.concatenate((exp_data[i, :, 0:no_pc], cnv_data[i, :, 0:no_pc], mt_data[i, :, 0:no_pc]),axis=1)
+
+clinical = clinical.to_numpy()
+age = clinical[:, 4]
+survival = clinical[:, 5]
+os_months = clinical[:, 6]
+idx0 = np.where((survival == 1) & (os_months <= 24))
+idx1 = np.where(os_months > 24)
+
+bio_data=clinical[:, 7:10]
+all_data = all_data[:, :, :]
+
+data_0 = all_data[idx0, :, :]
+data_0 = data_0[0, :, :, :]
+data_1 = all_data[idx1, :, :]
+data_1 = data_1[0, :, :, :]
+
+age_0 = age[idx0]
+age_1 = age[idx1]
+
+bio_data_0 = bio_data[idx0, : ]
+bio_data_0 = bio_data_0[0, :, :]
+bio_data_1 = bio_data[idx1,:]
+bio_data_1 = bio_data_1[0, :, :]
+
+outcomes_0 = np.zeros(len(idx0[0]))
+outcomes_1 = np.ones(len(idx1[0]))
+
+## data merge
+age_r = np.concatenate((age_0, age_1))
+data = np.concatenate((data_0, data_1))
+outcomes = np.concatenate((outcomes_0, outcomes_1))
+bio_data_r=np.concatenate((bio_data_0, bio_data_1))
+
+## DEEP LEARNING
+batch_size = 64
+epochs = 30
+
+num_classes = 2
+num_runs = 30
+k_fold = 5
+
+# input image dimension
+img_rows, img_cols = 146, 6
+
+# use all data for modeling
+train1 = train = range(data.shape[0])
+test = range(data.shape[0])
+validation = range(data.shape[0])
+
+x_train = data[train, :, :]
+y_train = outcomes[train]
+
+x_validation = data[validation, :, :]
+y_validation = outcomes[validation]
+
+x_test = data[test, :, :]
+y_test = outcomes[test]
+
+# age
+age_r1=age_r[train1]
+age_train = age_r1[train]
+age_validation = age_r1[validation]
+age_test = age_r[test]
+
+# bio
+bio_data_r1 = bio_data_r[train1, :]
+bio_data_train = bio_data_r1[train,:]
+bio_data_validation = bio_data_r1[validation, :]
+bio_data_test = bio_data_r[test,:]
+
+cli_train=np.concatenate((np.matrix([age_train]).T, bio_data_train), axis=1)
+cli_validation = np.concatenate((np.matrix([age_validation]).T, bio_data_validation), axis=1)
+cli_test = np.concatenate((np.matrix([age_test]).T, bio_data_test), axis=1)
+
+x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)  # rgb- add one more dimensionality
+x_validation = x_validation.reshape(x_validation.shape[0], img_rows, img_cols, 1)
+x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
+input_shape = (img_rows, img_cols, 1)
+
+x_train = x_train.astype('float32')
+x_validation = x_validation.astype('float32')
+x_test = x_test.astype('float32')
+
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_validation.shape[0], 'validation samples')
+print(x_test.shape[0], 'test samples')
+
+y_train = keras.utils.to_categorical(y_train, num_classes)
+y_validation = keras.utils.to_categorical(y_validation, num_classes)
+y_test = keras.utils.to_categorical(y_test, num_classes)
+
+image_input = Input(shape=input_shape)
+other_data_input = Input(shape=(1,))
+
+# First convolution
+conv1 = Conv2D(32, kernel_size=(3, 3),
+                activation='relu', padding='same'
+                )(image_input)
+# Second Convolution
+conv2 = Conv2D(64, (3, 3), activation='relu', padding='same'
+                )(conv1)
+conv2 = MaxPooling2D(pool_size=(4, 2))(conv2)
+conv2 = Dropout(0.25)(conv2)
+first_part_output = Flatten()(conv2)
+
+#merged_model = keras.layers.concatenate([first_part_output, other_data_input])
+# without age
+merged_model = first_part_output
+merged_model = Dense(64, activation='relu')(merged_model)
+merged_model = Dropout(0.5)(merged_model)
+
+predictions = Dense(num_classes, activation='softmax')(merged_model)
+
+# Now create the model
+model = Model(inputs=[image_input, other_data_input], outputs=predictions)
+model.summary()
+layers = model.layers
+
+lr = 0.0001
+beta_1 = 0.9
+beta_2 = 0.999
+optimizer = Adam(lr=lr, beta_1=beta_1, beta_2=beta_2)
+
+model.compile(optimizer=optimizer, loss='binary_crossentropy',
+              metrics=['accuracy'])
+
+class_weighting = {0: 1, 1: 4.2}  # class weight
+model.fit([x_train, age_train], [y_train],
+          batch_size=batch_size,
+          epochs=epochs,
+          verbose=1, class_weight=class_weighting,
+          validation_data=([x_validation, age_validation], [y_validation]))
+
+score = model.predict([x_test, age_test], verbose=0)
+model.save('PathCNN_model.h5')
+
+observed = y_test
+predicted = score
+
+observed = y_test
+predicted = score
+
+auc = roc_auc_score(observed[:,0], predicted[:,0])
+tn, fp, fn, tp = confusion_matrix(observed[:,0]>0.5, predicted[:,0]>0.5).ravel()
+sen = tn/(tn+fp)
+spe = tp/(tp+fn)
+acc = (tp+tn)/(tp+tn+fp+fn)
+print(f"AUC  Sen. Spe. Acc.\n{auc:0.2f} {sen:0.2f} {spe:0.2f} {acc:0.2f}")