a b/python-scripts/runSingleDNN.py
1
import numpy as np
2
from sklearn.preprocessing import normalize
3
from keras.layers import Input, Dense,concatenate,Dropout,average
4
from keras.models import Model
5
from keras import backend as K
6
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
7
import numpy as np
8
from sklearn.model_selection import StratifiedKFold
9
from keras.layers import Input, Dense,concatenate,Dropout,average
10
from keras.models import Model
11
import keras
12
from sklearn.metrics import classification_report
13
#训练两个神经网络
14
def build_NN_model1(omics,class_num):
15
    omics1=omics[0]
16
    omics2=omics[1]
17
18
    input1_dim=omics1.shape[1]
19
    input2_dim = omics2.shape[1]
20
21
    # class_num = 4
22
23
24
    #omics1
25
    input_factor1 = Input(shape=(input1_dim,),name='omics1')
26
    # NN
27
    omics1_nn = Dense(1000, activation='relu')(input_factor1)
28
    omics1_nn = Dropout(0.1)(omics1_nn)
29
    # omics1_nn = Dense(500, activation='relu')(omics1_nn)
30
    # omics1_nn = Dropout(0.1)(omics1_nn)
31
    omics1_nn = Dense(100, activation='relu')(omics1_nn)
32
    omics1_nn = Dropout(0.1)(omics1_nn)
33
34
35
    # omics2
36
    input_factor2 = Input(shape=(input2_dim,), name='omics2')
37
    # NN
38
    omics2_nn = Dense(1000, activation='relu')(input_factor2)
39
    omics2_nn = Dropout(0.1)(omics2_nn)
40
    # omics2_nn = Dense(100, activation='relu')(omics2_nn)
41
    # omics2_nn = Dropout(0.1)(omics2_nn)
42
    omics2_nn = Dense(100, activation='relu')(omics2_nn)
43
    omics2_nn = Dropout(0.1)(omics2_nn)
44
45
46
47
    mid_concat=concatenate([omics1_nn, omics2_nn])
48
    # classifier
49
    nn_classifier = Dense(100, activation='relu')(mid_concat)
50
    nn_classifier=Dropout(0.1)(nn_classifier)
51
    nn_classifier = Dense(50, activation='relu')(nn_classifier)
52
    nn_classifier = Dropout(0.1)(nn_classifier)
53
    # nn_classifier = Dense(50, activation='relu')(nn_classifier)
54
    # nn_classifier = Dropout(0.1)(nn_classifier)
55
    nn_classifier = Dense(10, activation='relu')(nn_classifier)
56
    #nn_classifier = Dropout(0.1)(nn_classifier)
57
    nn_classifier = Dense(class_num, activation='softmax', name='classifier')(nn_classifier)
58
    my_metrics = {
59
        'classifier': ['acc']
60
    }
61
    my_loss = {
62
        'classifier': 'categorical_crossentropy', \
63
        }
64
    adam=keras.optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
65
    zlyNN = Model(inputs=[input_factor1,input_factor2], outputs=nn_classifier)
66
    zlyNN.compile(optimizer=adam, loss=my_loss, metrics=my_metrics)
67
    return zlyNN
68
69
70
71
def build_NN_model2(omics,class_num):
72
    
73
    input_dim=omics.shape[1]
74
    
75
    #class_num = 5
76
77
78
    #omics1
79
    input_factor1 = Input(shape=(input_dim,),name='omics')
80
    # NN
81
    omics1_nn = Dense(2000, activation='relu')(input_factor1)
82
    omics1_nn = Dropout(0.1)(omics1_nn)
83
    omics1_nn = Dense(500, activation='relu')(omics1_nn)
84
    omics1_nn = Dropout(0.1)(omics1_nn)
85
    omics1_nn = Dense(100, activation='relu')(omics1_nn)
86
    omics1_nn = Dropout(0.1)(omics1_nn)
87
    # omics1_nn1 = Dense(100, activation='relu')(omics1_nn1)
88
    # omics1_nn1 = Dropout(0.1)(omics1_nn1)
89
    # omics1_nn = Dense(10, activation='relu')(omics1_nn)
90
    # omics1_nn = Dropout(0.1)(omics1_nn)
91
    # omics1_nn = average([omics1_nn1,omics1_nn])
92
    # omics1_nn = Dense(100, activation='relu')(omics1_nn)
93
    # omics1_nn = Dropout(0.1)(omics1_nn)
94
    nn_classifier = Dense(50, activation='relu')(omics1_nn)
95
    # nn_classifier = Dropout(0.1)(nn_classifier)
96
    if class_num==2:
97
        nn_classifier = Dense(1, activation='sigmoid', name='classifier')(nn_classifier)
98
    else:
99
        nn_classifier = Dense(class_num, activation='softmax', name='classifier')(nn_classifier)
100
    my_metrics_multi = {
101
        'classifier': ['acc']
102
    }
103
    my_loss_multi = {
104
        'classifier': 'categorical_crossentropy', \
105
        }
106
    my_metrics_bi = {
107
        'classifier': ['acc']
108
    }
109
    my_loss_bi = {
110
        'classifier': 'binary_crossentropy', \
111
        }
112
    # compile autoencoder
113
    # self.autoencoder.compile(optimizer='adam', loss='mse')
114
    zlyNN = Model(inputs=[input_factor1], outputs=nn_classifier) 
115
    if class_num==2:
116
        zlyNN.compile(optimizer='adam', loss=my_loss_bi, metrics=my_metrics_bi)
117
    else:
118
        zlyNN.compile(optimizer='adam', loss=my_loss_multi, metrics=my_metrics_multi)
119
    return zlyNN
120
121
122
123
if __name__ == '__main__':
124
    
125
126
    # datatypes=["equal","heterogeneous"]
127
    # typenums=[5,10,15]
128
    # noise_factor=0.5
129
    # savepath='./result/simulations/lfnn_res.txt'
130
    # with open(savepath, 'w') as f2:
131
    #     for datatype in datatypes:
132
    #         f2.write(datatype+'\n')
133
    #         for typenum in typenums:
134
    #             f2.write(str(typenum)+'\n')
135
    #             datapath='data/simulations/{}/{}'.format(datatype, typenum)
136
    #             resultpath='result/simulations/{}/{}'.format(datatype, typenum)
137
    #             labels = np.loadtxt('{}/c.txt'.format(datapath))
138
    #             # groundtruth = list(np.int_(groundtruth))
139
140
    #             omics1 = np.loadtxt('{}/o1.txt'.format(datapath))
141
    #             omics1 = np.transpose(omics1)
142
    #             omics1 = normalize(omics1, axis=0, norm='max')
143
144
    #             omics2 = np.loadtxt('{}/o2.txt'.format(datapath))
145
    #             omics2 = np.transpose(omics2)
146
    #             omics2 = normalize(omics2, axis=0, norm='max')
147
148
    #             omics3 = np.loadtxt('{}/o3.txt'.format(datapath))
149
    #             omics3 = np.transpose(omics3)
150
    #             omics3 = normalize(omics3, axis=0, norm='max')
151
152
    #             omics = np.concatenate((omics1, omics2, omics3), axis=1)
153
154
    #             # k折交叉验证
155
    #             all_acc = []
156
    #             all_f1_macro = []
157
    #             all_f1_weighted = []
158
159
                
160
    #             kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
161
    #             for train_ix, test_ix in kfold.split(omics, labels):
162
                    
163
    #                 omics_tobuild=[omics1,omics2,omics3]
164
    #                 train_X_1=omics1[train_ix]
165
    #                 train_X_2=omics2[train_ix]
166
    #                 train_X_3=omics3[train_ix]
167
168
    #                 test_X_1=omics1[test_ix]
169
    #                 test_X_2=omics2[test_ix]
170
    #                 test_X_3=omics3[test_ix]
171
    #                 # select rows
172
    #                 train_X, test_X = [train_X_1,train_X_2,train_X_3],[test_X_1,test_X_2,test_X_3]
173
    #                 #train_X, test_X = (train_X_1,train_X_2,train_X_3),(test_X_1,test_X_2,test_X_3)
174
    #                 train_y, test_y = labels[train_ix], labels[test_ix]
175
    #                 # summarize train and test composition
176
    #                 unique, count = np.unique(train_y, return_counts=True)
177
    #                 train_data_count = dict(zip(unique, count))
178
    #                 print('train:' + str(train_data_count))
179
    #                 unique, count = np.unique(test_y, return_counts=True)
180
    #                 test_data_count = dict(zip(unique, count))
181
    #                 print('test:' + str(test_data_count))
182
183
    #                 class_num=typenum
184
    #                 # 多分类的输出
185
    #                 train_y = list(np.int_(train_y))
186
    #                 # groundtruth = np.int_(groundtruth)
187
    #                 y = []
188
    #                 num = len(train_y)
189
    #                 for i in range(num):
190
    #                     tmp = np.zeros(class_num, dtype='uint8')
191
    #                     tmp[train_y[i]] = 1
192
    #                     y.append(tmp)
193
    #                 train_y = np.array(y)
194
195
    #                 test_y = list(np.int_(test_y))
196
    #                 # groundtruth = np.int_(groundtruth)
197
    #                 y = []
198
    #                 num = len(test_y)
199
    #                 for i in range(num):
200
    #                     tmp = np.zeros(class_num, dtype='uint8')
201
    #                     tmp[test_y[i]] = 1
202
    #                     y.append(tmp)
203
    #                 test_y = np.array(y)
204
205
    #                 model = build_NN_model1(omics_tobuild,class_num)
206
    #                 history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=16, shuffle=True,validation_data=(test_X, test_y))
207
    #                 y_true = []
208
    #                 for i in range(len(test_y)):
209
    #                     y_true.append(np.argmax(test_y[i]))
210
    #                 predictions = model.predict(test_X)
211
    #                 y_pred = []
212
    #                 for i in range(len(predictions)):
213
    #                     y_pred.append(np.argmax(predictions[i]))
214
    #                 acc = accuracy_score(y_true, y_pred)
215
    #                 f1_macro = f1_score(y_true, y_pred, average='macro')
216
    #                 # f1_micro=f1_score(y_true, y_pred, average='micro')
217
    #                 f1_weighted = f1_score(y_true, y_pred, average='weighted')
218
    #                 all_acc.append(acc)
219
    #                 all_f1_macro.append(f1_macro)
220
    #                 all_f1_weighted.append(f1_weighted)
221
222
223
    #                 print(classification_report(y_true, y_pred))
224
    #                 # print_precison_recall_f1(y_true, y_pred)
225
    #             print('caicai' * 20)
226
    #             print(
227
    #                 'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
228
    #                 format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
229
    #             avg_acc = np.mean(all_acc)
230
    #             avg_f1_macro = np.mean(all_f1_macro)
231
    #             avg_f1_weighted = np.mean(all_f1_weighted)
232
233
    #             print(
234
    #                 'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
235
    #                 format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
236
    #             f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
237
    #                 format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
238
    #         f2.write('*'*20)
239
240
241
    
242
243
    
244
    # groundtruth = np.loadtxt('{}/c.txt'.format(datapath))
245
    # groundtruth = list(np.int_(groundtruth))
246
247
    
248
    # savepath='./result/single-cell/efnn_res.txt'
249
    # with open(savepath, 'w') as f2:
250
    #     datapath = 'data/single-cell/'
251
    #     resultpath = 'result/single-cell/'
252
    #     labels = np.loadtxt('{}/c.txt'.format(datapath))
253
    #     # groundtruth = list(np.int_(groundtruth))
254
255
    #     omics = np.loadtxt('{}/omics.txt'.format(datapath))
256
    #     omics = np.transpose(omics)
257
    #     omics1=omics[0:206]
258
    #     omics2=omics[206:412]
259
    #     omics1 = normalize(omics1, axis=0, norm='max')
260
    #     omics2 = normalize(omics2, axis=0, norm='max')
261
    #     omics = np.concatenate((omics1, omics2), axis=1)
262
263
264
    #     # k折交叉验证
265
    #     all_acc = []
266
    #     all_f1_macro = []
267
    #     all_f1_weighted = []
268
269
        
270
    #     kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
271
    #     for train_ix, test_ix in kfold.split(omics, labels):
272
            
273
274
    #         train_X, test_X = omics[train_ix], omics[test_ix]
275
    #         train_y, test_y = labels[train_ix], labels[test_ix]
276
    #         # summarize train and test composition
277
    #         unique, count = np.unique(train_y, return_counts=True)
278
    #         train_data_count = dict(zip(unique, count))
279
    #         print('train:' + str(train_data_count))
280
    #         unique, count = np.unique(test_y, return_counts=True)
281
    #         test_data_count = dict(zip(unique, count))
282
    #         print('test:' + str(test_data_count))
283
284
    #         class_num=3
285
    #         # 多分类的输出
286
    #         train_y = list(np.int_(train_y))
287
    #         # groundtruth = np.int_(groundtruth)
288
    #         y = []
289
    #         num = len(train_y)
290
    #         for i in range(num):
291
    #             tmp = np.zeros(class_num, dtype='uint8')
292
    #             tmp[train_y[i]] = 1
293
    #             y.append(tmp)
294
    #         train_y = np.array(y)
295
296
    #         test_y = list(np.int_(test_y))
297
    #         # groundtruth = np.int_(groundtruth)
298
    #         y = []
299
    #         num = len(test_y)
300
    #         for i in range(num):
301
    #             tmp = np.zeros(class_num, dtype='uint8')
302
    #             tmp[test_y[i]] = 1
303
    #             y.append(tmp)
304
    #         test_y = np.array(y)
305
306
    #         model = build_NN_model2(omics, class_num)
307
    #         history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=8, shuffle=True,
308
    #                             validation_data=(test_X, test_y))
309
    #         y_true = []
310
    #         for i in range(len(test_y)):
311
    #             y_true.append(np.argmax(test_y[i]))
312
    #         predictions = model.predict(test_X)
313
    #         y_pred = []
314
    #         for i in range(len(predictions)):
315
    #             y_pred.append(np.argmax(predictions[i]))
316
    #         acc = accuracy_score(y_true, y_pred)
317
    #         f1_macro = f1_score(y_true, y_pred, average='macro')
318
    #         # f1_micro=f1_score(y_true, y_pred, average='micro')
319
    #         f1_weighted = f1_score(y_true, y_pred, average='weighted')
320
    #         all_acc.append(acc)
321
    #         all_f1_macro.append(f1_macro)
322
    #         all_f1_weighted.append(f1_weighted)
323
324
325
    #         print(classification_report(y_true, y_pred))
326
    #         # print_precison_recall_f1(y_true, y_pred)
327
    #     print('caicai' * 20)
328
    #     print(
329
    #         'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
330
    #         format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
331
    #     avg_acc = np.mean(all_acc)
332
    #     avg_f1_macro = np.mean(all_f1_macro)
333
    #     avg_f1_weighted = np.mean(all_f1_weighted)
334
335
    #     print(
336
    #         'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
337
    #         format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
338
    #     f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
339
    #         format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
340
341
342
    savepath='./result/single-cell/lfnn_res1.txt'
343
    with open(savepath, 'w') as f2:
344
        datapath = 'data/single-cell/'
345
        resultpath = 'result/single-cell/'
346
        labels = np.loadtxt('{}/c.txt'.format(datapath))
347
        # groundtruth = list(np.int_(groundtruth))
348
349
        omics = np.loadtxt('{}/omics.txt'.format(datapath))
350
        omics = np.transpose(omics)
351
        omics1=omics[0:206]
352
        omics2=omics[206:412]
353
        omics1 = normalize(omics1, axis=0, norm='max')
354
        omics2 = normalize(omics2, axis=0, norm='max')
355
        omics = np.concatenate((omics1, omics2), axis=1)
356
357
358
        # k折交叉验证
359
        all_acc = []
360
        all_f1_macro = []
361
        all_f1_weighted = []
362
363
        
364
        kfold = StratifiedKFold(n_splits=4, shuffle=True, random_state=1)
365
        for train_ix, test_ix in kfold.split(omics, labels):
366
            
367
            omics_tobuild=[omics1,omics2]
368
            train_X_1=omics1[train_ix]
369
            train_X_2=omics2[train_ix]
370
371
            test_X_1=omics1[test_ix]
372
            test_X_2=omics2[test_ix]
373
374
            # select rows
375
            train_X, test_X = [train_X_1,train_X_2],[test_X_1,test_X_2]
376
            train_y, test_y = labels[train_ix], labels[test_ix]
377
            # summarize train and test composition
378
            unique, count = np.unique(train_y, return_counts=True)
379
            train_data_count = dict(zip(unique, count))
380
            print('train:' + str(train_data_count))
381
            unique, count = np.unique(test_y, return_counts=True)
382
            test_data_count = dict(zip(unique, count))
383
            print('test:' + str(test_data_count))
384
385
            class_num=3
386
            # 多分类的输出
387
            train_y = list(np.int_(train_y))
388
            # groundtruth = np.int_(groundtruth)
389
            y = []
390
            num = len(train_y)
391
            for i in range(num):
392
                tmp = np.zeros(class_num, dtype='uint8')
393
                tmp[train_y[i]] = 1
394
                y.append(tmp)
395
            train_y = np.array(y)
396
397
            test_y = list(np.int_(test_y))
398
            # groundtruth = np.int_(groundtruth)
399
            y = []
400
            num = len(test_y)
401
            for i in range(num):
402
                tmp = np.zeros(class_num, dtype='uint8')
403
                tmp[test_y[i]] = 1
404
                y.append(tmp)
405
            test_y = np.array(y)
406
407
            model = build_NN_model1(omics_tobuild,class_num)
408
            history = model.fit(train_X, train_y, epochs=50, verbose=2, batch_size=32, shuffle=True,validation_data=(test_X, test_y))
409
            y_true = []
410
            for i in range(len(test_y)):
411
                y_true.append(np.argmax(test_y[i]))
412
            predictions = model.predict(test_X)
413
            y_pred = []
414
            for i in range(len(predictions)):
415
                y_pred.append(np.argmax(predictions[i]))
416
            acc = accuracy_score(y_true, y_pred)
417
            f1_macro = f1_score(y_true, y_pred, average='macro')
418
            # f1_micro=f1_score(y_true, y_pred, average='micro')
419
            f1_weighted = f1_score(y_true, y_pred, average='weighted')
420
            all_acc.append(acc)
421
            all_f1_macro.append(f1_macro)
422
            all_f1_weighted.append(f1_weighted)
423
424
425
            print(classification_report(y_true, y_pred))
426
            break
427
            # print_precison_recall_f1(y_true, y_pred)
428
        print('caicai' * 20)
429
        print(
430
            'acc:{all_acc}\nf1_macro:{all_f1_macro}\nf1_weighted:{all_f1_weighted}\n'. \
431
            format(all_acc=all_acc, all_f1_macro=all_f1_macro, all_f1_weighted=all_f1_weighted))
432
        avg_acc = np.mean(all_acc)
433
        avg_f1_macro = np.mean(all_f1_macro)
434
        avg_f1_weighted = np.mean(all_f1_weighted)
435
436
        print(
437
            'acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
438
            format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
439
        f2.write('acc:{avg_acc}\nf1_macro:{avg_f1_macro}\nf1_weighted:{avg_f1_weighted}\n'. \
440
            format(avg_acc=avg_acc, avg_f1_macro=avg_f1_macro, avg_f1_weighted=avg_f1_weighted))
441
    
442
443
444
    
445