a b/python-scripts/runSingleCNN.py
1
import numpy as np
2
from sklearn.preprocessing import normalize
3
from keras.layers import Input, Dense,concatenate,Dropout,average
4
from keras.models import Model
5
from keras import backend as K
6
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
7
import numpy as np
8
from sklearn.model_selection import StratifiedKFold
9
from keras.layers import *
10
from keras.models import Model
11
import keras
12
from sklearn.metrics import classification_report
13
from tensorflow.compat.v1 import ConfigProto
14
from tensorflow.compat.v1 import InteractiveSession
15
16
config = ConfigProto()
17
config.gpu_options.allow_growth = True
18
session = InteractiveSession(config=config)
19
20
21
# 训练三个神经网络
22
def build_NN_model1(omics, class_num):
23
    omics1 = omics[0]
24
    omics2 = omics[1]
25
    input1_dim = omics1.shape[1]
26
    input2_dim = omics2.shape[1]
27
    # class_num = 4
28
29
    # omics1
30
    input_factor1 = Input(shape=(input1_dim,), name='omics1')
31
    input_re1 = Reshape((-1, 1))(input_factor1)
32
    omics1_cnn = Conv1D(32, (300), activation='relu')(input_re1)
33
    omics1_cnn = MaxPool1D(100)(omics1_cnn)
34
35
    flatten1 = Flatten()(omics1_cnn)
36
37
    # omics2
38
    input_factor2 = Input(shape=(input2_dim,), name='omics2')
39
    input_re2 = Reshape((-1, 1))(input_factor2)
40
    omics2_cnn = Conv1D(32, (100), activation='relu', name='omics2_cnn_1')(input_re2)
41
    omics2_cnn = MaxPool1D(50)(omics2_cnn)
42
43
    flatten2 = Flatten(name='flatten2')(omics2_cnn)
44
45
46
47
48
    mid_concat = concatenate([flatten1, flatten2])
49
    # classifier
50
    nn_classifier = Dense(100, activation='relu')(mid_concat)
51
    nn_classifier = Dropout(0.1)(nn_classifier)
52
    nn_classifier = Dense(50, activation='relu')(nn_classifier)
53
    nn_classifier = Dropout(0.1)(nn_classifier)
54
    # nn_classifier = Dense(50, activation='relu')(nn_classifier)
55
    # nn_classifier = Dropout(0.1)(nn_classifier)
56
    nn_classifier = Dense(10, activation='relu')(nn_classifier)
57
    # nn_classifier = Dropout(0.1)(nn_classifier)
58
    nn_classifier = Dense(class_num, activation='softmax', name='classifier')(nn_classifier)
59
    my_metrics = {
60
        'classifier': ['acc']
61
    }
62
    my_loss = {
63
        'classifier': 'categorical_crossentropy', \
64
        }
65
    adam = keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
66
    zlyNN = Model(inputs=[input_factor1, input_factor2], outputs=nn_classifier)
67
    zlyNN.compile(optimizer=adam, loss=my_loss, metrics=my_metrics)
68
    return zlyNN
69
70
71
def build_NN_model2(omics, class_num):
72
    input_dim = omics.shape[1]
73
74
    # class_num = 5
75
76
    # omics1
77
    input_factor1 = Input(shape=(input_dim,), name='omics')
78
    input_re = Reshape((-1, 1))(input_factor1)
79
    omics1_cnn = Conv1D(32, (1000), activation='relu')(input_re)
80
    omics1_cnn = MaxPool1D(100)(omics1_cnn)
81
    omics1_cnn = Conv1D(16, (50), activation='relu')(omics1_cnn)
82
    omics1_cnn = MaxPool1D(10)(omics1_cnn)
83
    flatten = Flatten()(omics1_cnn)
84
    # NN
85
    # omics1_nn = Dense(500, activation='relu')(input_factor1)
86
    # omics1_nn = Dropout(0.1)(omics1_nn)
87
    # omics1_nn = Dense(100, activation='relu')(omics1_nn)
88
    # omics1_nn = Dropout(0.1)(omics1_nn)
89
90
    nn_classifier = Dense(50, activation='relu')(flatten)
91
    # nn_classifier = Dropout(0.1)(nn_classifier)
92
    if class_num == 2:
93
        nn_classifier = Dense(1, activation='sigmoid', name='classifier')(nn_classifier)
94
    else:
95
        nn_classifier = Dense(class_num, activation='softmax', name='classifier')(nn_classifier)
96
    my_metrics_multi = {
97
        'classifier': ['acc']
98
    }
99
    my_loss_multi = {
100
        'classifier': 'categorical_crossentropy', \
101
        }
102
    my_metrics_bi = {
103
        'classifier': ['acc']
104
    }
105
    my_loss_bi = {
106
        'classifier': 'binary_crossentropy', \
107
        }
108
    # compile autoencoder
109
    # self.autoencoder.compile(optimizer='adam', loss='mse')
110
    zlyNN = Model(inputs=[input_factor1], outputs=nn_classifier)
111
    if class_num == 2:
112
        zlyNN.compile(optimizer='adam', loss=my_loss_bi, metrics=my_metrics_bi)
113
    else:
114
        zlyNN.compile(optimizer='adam', loss=my_loss_multi, metrics=my_metrics_multi)
115
    return zlyNN
116
117
118
119
if __name__ == '__main__':
120
    
121
122
    # datatypes=["equal","heterogeneous"]
123
    # typenums=[5,10,15]
124
    # noise_factor=0.5
125
    # savepath='./result/simulations/lfnn_res.txt'
126
    # with open(savepath, 'w') as f2:
127
    #     for datatype in datatypes:
128
    #         f2.write(datatype+'\n')
129
    #         for typenum in typenums:
130
    #             f2.write(str(typenum)+'\n')
131
    #             datapath='data/simulations/{}/{}'.format(datatype, typenum)
132
    #             resultpath='result/simulations/{}/{}'.format(datatype, typenum)
133
    #             labels = np.loadtxt('{}/c.txt'.format(datapath))
134
    #             # groundtruth = list(np.int_(groundtruth))
135
136
    #             omics1 = np.loadtxt('{}/o1.txt'.format(datapath))
137
    #             omics1 = np.transpose(omics1)
138
    #             omics1 = normalize(omics1, axis=0, norm='max')
139
140
    #             omics2 = np.loadtxt('{}/o2.txt'.format(datapath))
141
    #             omics2 = np.transpose(omics2)
142
    #             omics2 = normalize(omics2, axis=0, norm='max')
143
144
    #             omics3 = np.loadtxt('{}/o3.txt'.format(datapath))
145
    #             omics3 = np.transpose(omics3)
146
    #             omics3 = normalize(omics3, axis=0, norm='max')
147
148
    #             omics = np.concatenate((omics1, omics2, omics3), axis=1)
149
150
    #             # k折交叉验证
151
    #             all_acc = []
152
    #             all_f1_macro = []
153
    #             all_f1_weighted = []
154
155
                
156
    #             kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
157
    #             for train_ix, test_ix in kfold.split(omics, labels):
158
                    
159
    #                 omics_tobuild=[omics1,omics2,omics3]
160
    #                 train_X_1=omics1[train_ix]
161
    #                 train_X_2=omics2[train_ix]
162
    #                 train_X_3=omics3[train_ix]
163
164
    #                 test_X_1=omics1[test_ix]
165
    #                 test_X_2=omics2[test_ix]
166
    #                 test_X_3=omics3[test_ix]
167
    #                 # select rows
168
    #                 train_X, test_X = [train_X_1,train_X_2,train_X_3],[test_X_1,test_X_2,test_X_3]
169
    #                 #train_X, test_X = (train_X_1,train_X_2,train_X_3),(test_X_1,test_X_2,test_X_3)
170
    #                 train_y, test_y = labels[train_ix], labels[test_ix]
171
    #                 # summarize train and test composition
172
    #                 unique, count = np.unique(train_y, return_counts=True)
173
    #                 train_data_count = dict(zip(unique, count))
174
    #                 print('train:' + str(train_data_count))
175
    #                 unique, count = np.unique(test_y, return_counts=True)
176
    #                 test_data_count = dict(zip(unique, count))
177
    #                 print('test:' + str(test_data_count))
178
179
    #                 class_num=typenum
180
    #                 # 多分类的输出
181
    #                 train_y = list(np.int_(train_y))
182
    #                 # groundtruth = np.int_(groundtruth)
183
    #                 y = []
184
    #                 num = len(train_y)
185
    #                 for i in range(num):
186
    #                     tmp = np.zeros(class_num, dtype='uint8')
187
    #                     tmp[train_y[i]] = 1
188
    #                     y.append(tmp)
189
    #                 train_y = np.array(y)
190
191
    #                 test_y = list(np.int_(test_y))
192
    #                 # groundtruth = np.int_(groundtruth)
193
    #                 y = []
194
    #                 num = len(test_y)
195
    #                 for i in range(num):
196
    #                     tmp = np.zeros(class_num, dtype='uint8')
197
    #                     tmp[test_y[i]] = 1
198
    #                     y.append(tmp)
199
    #                 test_y = np.array(y)
200
201
    #                 model = build_NN_model1(omics_tobuild,class_num)
202
    #                 history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=16, shuffle=True,validation_data=(test_X, test_y))
203
    #                 y_true = []
204
    #                 for i in range(len(test_y)):
205
    #                     y_true.append(np.argmax(test_y[i]))
206
    #                 predictions = model.predict(test_X)
207
    #                 y_pred = []
208
    #                 for i in range(len(predictions)):
209
    #                     y_pred.append(np.argmax(predictions[i]))
210
    #                 acc = accuracy_score(y_true, y_pred)
211
    #                 f1_macro = f1_score(y_true, y_pred, average='macro')
212
    #                 # f1_micro=f1_score(y_true, y_pred, average='micro')
213
    #                 f1_weighted = f1_score(y_true, y_pred, average='weighted')
214
    #                 all_acc.append(acc)
215
    #                 all_f1_macro.append(f1_macro)
216
    #                 all_f1_weighted.append(f1_weighted)
217
218
219
    #                 print(classification_report(y_true, y_pred))
220
    #                 # print_precison_recall_f1(y_true, y_pred)
221
    #             print('caicai' * 20)
222
    #             print(
223
    #                 'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
224
    #                 format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
225
    #             avg_acc = np.mean(all_acc)
226
    #             avg_f1_macro = np.mean(all_f1_macro)
227
    #             avg_f1_weighted = np.mean(all_f1_weighted)
228
229
    #             print(
230
    #                 'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
231
    #                 format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
232
    #             f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
233
    #                 format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
234
    #         f2.write('*'*20)
235
236
237
    
238
239
    
240
    # groundtruth = np.loadtxt('{}/c.txt'.format(datapath))
241
    # groundtruth = list(np.int_(groundtruth))
242
243
    
244
    savepath='./result/single-cell/efcnn_res.txt'
245
    with open(savepath, 'w') as f2:
246
        datapath = 'data/single-cell/'
247
        resultpath = 'result/single-cell/'
248
        labels = np.loadtxt('{}/c.txt'.format(datapath))
249
        # groundtruth = list(np.int_(groundtruth))
250
251
        omics = np.loadtxt('{}/omics.txt'.format(datapath))
252
        omics = np.transpose(omics)
253
        omics1=omics[0:206]
254
        omics2=omics[206:412]
255
        omics1 = normalize(omics1, axis=0, norm='max')
256
        omics2 = normalize(omics2, axis=0, norm='max')
257
        omics = np.concatenate((omics1, omics2), axis=1)
258
259
260
        # k折交叉验证
261
        all_acc = []
262
        all_f1_macro = []
263
        all_f1_weighted = []
264
265
        
266
        kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
267
        for train_ix, test_ix in kfold.split(omics, labels):
268
            
269
270
            train_X, test_X = omics[train_ix], omics[test_ix]
271
            train_y, test_y = labels[train_ix], labels[test_ix]
272
            # summarize train and test composition
273
            unique, count = np.unique(train_y, return_counts=True)
274
            train_data_count = dict(zip(unique, count))
275
            print('train:' + str(train_data_count))
276
            unique, count = np.unique(test_y, return_counts=True)
277
            test_data_count = dict(zip(unique, count))
278
            print('test:' + str(test_data_count))
279
280
            class_num=3
281
            # 多分类的输出
282
            train_y = list(np.int_(train_y))
283
            # groundtruth = np.int_(groundtruth)
284
            y = []
285
            num = len(train_y)
286
            for i in range(num):
287
                tmp = np.zeros(class_num, dtype='uint8')
288
                tmp[train_y[i]] = 1
289
                y.append(tmp)
290
            train_y = np.array(y)
291
292
            test_y = list(np.int_(test_y))
293
            # groundtruth = np.int_(groundtruth)
294
            y = []
295
            num = len(test_y)
296
            for i in range(num):
297
                tmp = np.zeros(class_num, dtype='uint8')
298
                tmp[test_y[i]] = 1
299
                y.append(tmp)
300
            test_y = np.array(y)
301
302
            model = build_NN_model2(omics, class_num)
303
            history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=8, shuffle=True,
304
                                validation_data=(test_X, test_y))
305
            y_true = []
306
            for i in range(len(test_y)):
307
                y_true.append(np.argmax(test_y[i]))
308
            predictions = model.predict(test_X)
309
            y_pred = []
310
            for i in range(len(predictions)):
311
                y_pred.append(np.argmax(predictions[i]))
312
            acc = accuracy_score(y_true, y_pred)
313
            f1_macro = f1_score(y_true, y_pred, average='macro')
314
            # f1_micro=f1_score(y_true, y_pred, average='micro')
315
            f1_weighted = f1_score(y_true, y_pred, average='weighted')
316
            all_acc.append(acc)
317
            all_f1_macro.append(f1_macro)
318
            all_f1_weighted.append(f1_weighted)
319
320
321
            print(classification_report(y_true, y_pred))
322
            # print_precison_recall_f1(y_true, y_pred)
323
        print('caicai' * 20)
324
        print(
325
            'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
326
            format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
327
        avg_acc = np.mean(all_acc)
328
        avg_f1_macro = np.mean(all_f1_macro)
329
        avg_f1_weighted = np.mean(all_f1_weighted)
330
331
        print(
332
            'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
333
            format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
334
        f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
335
            format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
336
337
338
    # savepath='./result/single-cell/lfcnn_res1.txt'
339
    # with open(savepath, 'w') as f2:
340
    #     datapath = 'data/single-cell/'
341
    #     resultpath = 'result/single-cell/'
342
    #     labels = np.loadtxt('{}/c.txt'.format(datapath))
343
    #     # groundtruth = list(np.int_(groundtruth))
344
    #
345
    #     omics = np.loadtxt('{}/omics.txt'.format(datapath))
346
    #     omics = np.transpose(omics)
347
    #     omics1=omics[0:206]
348
    #     omics2=omics[206:412]
349
    #     omics1 = normalize(omics1, axis=0, norm='max')
350
    #     omics2 = normalize(omics2, axis=0, norm='max')
351
    #     omics = np.concatenate((omics1, omics2), axis=1)
352
    #
353
    #
354
    #     # k折交叉验证
355
    #     all_acc = []
356
    #     all_f1_macro = []
357
    #     all_f1_weighted = []
358
    #
359
    #
360
    #     kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
361
    #     for train_ix, test_ix in kfold.split(omics, labels):
362
    #
363
    #         omics_tobuild=[omics1,omics2]
364
    #         train_X_1=omics1[train_ix]
365
    #         train_X_2=omics2[train_ix]
366
    #
367
    #         test_X_1=omics1[test_ix]
368
    #         test_X_2=omics2[test_ix]
369
    #
370
    #         # select rows
371
    #         train_X, test_X = [train_X_1,train_X_2],[test_X_1,test_X_2]
372
    #         train_y, test_y = labels[train_ix], labels[test_ix]
373
    #         # summarize train and test composition
374
    #         unique, count = np.unique(train_y, return_counts=True)
375
    #         train_data_count = dict(zip(unique, count))
376
    #         print('train:' + str(train_data_count))
377
    #         unique, count = np.unique(test_y, return_counts=True)
378
    #         test_data_count = dict(zip(unique, count))
379
    #         print('test:' + str(test_data_count))
380
    #
381
    #         class_num=3
382
    #         # 多分类的输出
383
    #         train_y = list(np.int_(train_y))
384
    #         # groundtruth = np.int_(groundtruth)
385
    #         y = []
386
    #         num = len(train_y)
387
    #         for i in range(num):
388
    #             tmp = np.zeros(class_num, dtype='uint8')
389
    #             tmp[train_y[i]] = 1
390
    #             y.append(tmp)
391
    #         train_y = np.array(y)
392
    #
393
    #         test_y = list(np.int_(test_y))
394
    #         # groundtruth = np.int_(groundtruth)
395
    #         y = []
396
    #         num = len(test_y)
397
    #         for i in range(num):
398
    #             tmp = np.zeros(class_num, dtype='uint8')
399
    #             tmp[test_y[i]] = 1
400
    #             y.append(tmp)
401
    #         test_y = np.array(y)
402
    #
403
    #         model = build_NN_model1(omics_tobuild,class_num)
404
    #         history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=32, shuffle=True,validation_data=(test_X, test_y))
405
    #         y_true = []
406
    #         for i in range(len(test_y)):
407
    #             y_true.append(np.argmax(test_y[i]))
408
    #         predictions = model.predict(test_X)
409
    #         y_pred = []
410
    #         for i in range(len(predictions)):
411
    #             y_pred.append(np.argmax(predictions[i]))
412
    #         acc = accuracy_score(y_true, y_pred)
413
    #         f1_macro = f1_score(y_true, y_pred, average='macro')
414
    #         # f1_micro=f1_score(y_true, y_pred, average='micro')
415
    #         f1_weighted = f1_score(y_true, y_pred, average='weighted')
416
    #         all_acc.append(acc)
417
    #         all_f1_macro.append(f1_macro)
418
    #         all_f1_weighted.append(f1_weighted)
419
    #
420
    #
421
    #         print(classification_report(y_true, y_pred))
422
    #         break
423
    #         # print_precison_recall_f1(y_true, y_pred)
424
    #     print('caicai' * 20)
425
    #     print(
426
    #         'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
427
    #         format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
428
    #     avg_acc = np.mean(all_acc)
429
    #     avg_f1_macro = np.mean(all_f1_macro)
430
    #     avg_f1_weighted = np.mean(all_f1_weighted)
431
    #
432
    #     print(
433
    #         'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
434
    #         format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
435
    #     f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
436
    #         format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
437
    #
438
439
440
    
441