a b/lstm_predict.py
1
# -*- coding: utf-8 -*-
2
"""
3
Created on Mon Mar 12 08:51:35 2019
4
5
@author: aaq109
6
"""
7
8
import timeit
9
import numpy as np
10
from numpy import array
11
from keras.models import *
12
from keras.layers import *
13
from keras import backend as K
14
from keras.callbacks import EarlyStopping, ModelCheckpoint
15
from keras.preprocessing import sequence
16
from sklearn.model_selection import train_test_split
17
from sklearn.metrics import *
18
from tensorflow.keras.callbacks import EarlyStopping, Callback
19
from sklearn.metrics import roc_curve, auc
20
import matplotlib.pyplot as plt
21
import pandas as pd
22
from scipy import stats
23
24
# Read data
25
N_visits=15 # Maximum number of inpatient visits in the dataset
26
27
def read_data(exp, N_visits):
28
    label='sampledata_lstm_'+str(N_visits)+'.csv'
29
    print('Reading File: ',label)
30
    pidAdmMap = {}
31
    admDetailMap={}
32
    output=[]
33
    Weights=[]
34
    VisitIds=[]
35
    if exp[0:2]=='11':
36
        ind1=6
37
        ind2=202       
38
    elif exp[0:2]=='10':
39
        ind1=6
40
        ind2=17
41
    else:
42
        ind1=17
43
        ind2=202
44
    infd = open (label,'r')
45
    infd.readline()
46
    for line in infd:
47
        tokens = line.strip().split(',')
48
        pid=int(tokens[0])
49
        admId=(tokens[1])
50
        det=(tokens[ind1:ind2]) #200 if 185 d2v vector is used
51
        output.append(tokens[5])      
52
        Weights.append(tokens[203]) 
53
        VisitIds.append(tokens[1])
54
        if admId in admDetailMap:
55
            admDetailMap[admId].append(det)
56
        else:
57
            admDetailMap[admId]=det
58
        if pid in pidAdmMap:
59
            pidAdmMap[pid].append(admId)
60
        else:
61
            pidAdmMap[pid]=[admId]
62
    infd.close()   
63
    _list = []
64
    for patient in pidAdmMap.keys():
65
        a = [admDetailMap[xx] for xx in pidAdmMap[patient]]
66
        _list.append(a)    
67
    X=np.array([np.array(xi) for xi in _list])   
68
    a,b,c=X.shape
69
    Y=np.array(output)
70
    Sample_weight=np.array(Weights)
71
    X = X.astype(np.float)
72
    Y = Y.astype(np.float)
73
    Sample_weight = Sample_weight.astype(np.float)
74
    Y=Y.reshape(X.shape[0],N_visits,1)
75
    return X, Y,Sample_weight,VisitIds
76
    
77
def ppv(y_true, y_pred):
78
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
79
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
80
    ppv = true_positives / (predicted_positives + K.epsilon())
81
    return ppv
82
    
83
def npv(y_true, y_pred):
84
    true_negatives = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1)))
85
    predicted_negatives = K.sum(K.round(K.clip(1-y_pred, 0, 1)))
86
    npv = true_negatives / (predicted_negatives + K.epsilon())
87
    return npv
88
    
89
def sensitivity(y_true, y_pred):
90
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
91
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
92
    return true_positives / (possible_positives + K.epsilon())
93
    
94
def specificity(y_true, y_pred):
95
    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
96
    possible_negatives = K.sum(K.round(K.clip(1-y_true, 0, 1)))
97
    return true_negatives / (possible_negatives + K.epsilon())
98
    
99
def recall(y_true, y_pred):
100
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
101
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
102
    recall = true_positives / (possible_positives + K.epsilon())
103
    return recall
104
105
def model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train):
106
    from sklearn.preprocessing import binarize
107
    from sklearn.metrics import f1_score
108
    from sklearn.metrics import balanced_accuracy_score,accuracy_score
109
    import operator
110
    y_pred = model.predict(X_train).ravel()
111
    y_test=Y_train.ravel()
112
    g=Sample_weight_train.ravel()
113
    g[g==0]=0
114
    g[g>0]=1
115
    indices=np.where(g==0)
116
    y_pred=np.delete(y_pred,indices,0)
117
    y_test=np.delete(y_test,indices,0)
118
    score={}
119
    for thresh in np.arange(0.001,1,0.001):
120
        y_pred_class=binarize([y_pred],thresh)[0]
121
        cm= confusion_matrix(y_test, y_pred_class)
122
        score[thresh]=(48000*cm[1,1]*0.5)-(7000*(cm[1,1]+cm[0,1]))
123
    thresh=max(score.items(), key=operator.itemgetter(1))[0]
124
    
125
    y_pred = model.predict(X_test).ravel()
126
    y_test=Y_test.ravel()
127
    g=Sample_weight_test.ravel()
128
    g[g==0]=0
129
    g[g>0]=1
130
    if exp[2]=='1':
131
        fpr, tpr, thetas = roc_curve(y_test, y_pred,sample_weight=g,pos_label=1)
132
        prc, recal, thetas = precision_recall_curve(y_test, y_pred,sample_weight=g)
133
        indices=np.where(g==0) #Patient gender
134
        y_pred=np.delete(y_pred,indices,0)
135
        y_test=np.delete(y_test,indices,0)
136
    else:
137
        fpr, tpr, thetas = roc_curve(y_test, y_pred,pos_label=1)
138
        prc, recal, thetas = precision_recall_curve(y_test, y_pred)
139
        
140
    AUC_test = auc(fpr, tpr)
141
    PR_auc = auc(recal,prc)
142
    
143
    
144
    y_pred=binarize([y_pred],thresh)[0]
145
    cm= confusion_matrix(y_test, y_pred)
146
    cost_saved=(48000*cm[1,1]*0.5)-(7000*(cm[1,1]+cm[0,1]))
147
    Accuracy=(cm[0,0]+cm[1,1])/sum(sum(cm))   
148
    Sensitivity_test=cm[1,1]/(cm[1,0]+cm[1,1])
149
    Specificity_test=cm[0,0]/(cm[0,0]+cm[0,1])
150
    F1_score=f1_score(y_test,y_pred)
151
    cost_saved=cost_saved/(np.sum(y_test)*(48000-7000)*0.5)
152
  
153
    return Accuracy, AUC_test, Sensitivity_test, Specificity_test, PR_auc, F1_score,cost_saved
154
155
def save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp):
156
    label1='AUC_test_'+exp+'.npy'
157
    label2='Sensitivity_test_'+exp+'.npy'
158
    label3='Specificity_test_'+exp+'.npy'
159
    label4='PR_auc_'+exp+'.npy'
160
    label5='f1_score_'+exp+'.npy'
161
    label6='cost_saved_'+exp+'.npy'
162
    np.save(label1, AUC_test) 
163
    np.save(label2, Sensitivity_test) 
164
    np.save(label3, Specificity_test) 
165
    np.save(label4, PR_auc) 
166
    np.save(label5, F1_score)
167
    np.save(label6, cost_saved)
168
    val1=np.fromiter(AUC_test.values(), dtype=float)
169
    val2=np.fromiter(Sensitivity_test.values(), dtype=float)
170
    val3=np.fromiter(Specificity_test.values(), dtype=float)
171
    val4=np.fromiter(PR_auc.values(), dtype=float)
172
    val5=np.fromiter(F1_score.values(), dtype=float)
173
    val6=np.fromiter(cost_saved.values(), dtype=float)
174
175
    print(label1,[np.mean(val1[np.nonzero(val1)]),np.std(val1[np.nonzero(val1)])])
176
    print(label2,[np.mean(val2[np.nonzero(val2)]),np.std(val2[np.nonzero(val2)])])
177
    print(label3,[np.mean(val3[np.nonzero(val3)]),np.std(val3[np.nonzero(val3)])])                                                                                                                                                                                                                                                                                                                                                               
178
    print(label4,[np.mean(val4[np.nonzero(val4)]),np.std(val4[np.nonzero(val4)])])
179
    print(label5,[np.mean(val5[np.nonzero(val5)]),np.std(val5[np.nonzero(val5)])])
180
    print(label6,[np.mean(val6[np.nonzero(val6)]),np.std(val6[np.nonzero(val6)])])
181
182
183
    return None
184
185
186
## Define different experiments
187
    # 1111 - HDF+MDF+LSTM+CA
188
exp='1111'
189
AUC_test={}
190
Accuracy_test={}
191
PR_auc={}
192
Sensitivity_test={}
193
Specificity_test={}
194
average_precision={}
195
F1_score={}
196
cost_saved={}
197
#Set Params
198
W_classA=0 #Dummy visit weights
199
W_classB=1 #No readmission class weight
200
W_classC=3 #Readmission class weight
201
E_pochs=80 # Traning epochs
202
B_size=32 # Batch size
203
T_size=0.3 # Samples used for testing
204
NN_nodes=[128,64,32,1] # Number of nodes in the NN
205
N_iter=10
206
207
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
208
Sample_weight[Sample_weight==0]=W_classA
209
Sample_weight[Sample_weight==1]=W_classB
210
Sample_weight[Sample_weight==2]=W_classC
211
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
212
Visits=np.array(VisitIds)
213
Visits=Visits.reshape(X.shape[0],N_visits,1)
214
es=EarlyStopping(monitor='val_loss', patience=20, mode='min')
215
216
for iter_nm in range(0,N_iter):
217
    print('Iteration ',iter_nm)    
218
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
219
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
220
    model = Sequential()  
221
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
222
    model.add(LSTM(NN_nodes[1], return_sequences=True))
223
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))
224
    model.add(TimeDistributed(Dense(NN_nodes[3], activation='sigmoid')))   
225
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
226
  #  print(model.summary())
227
    #np.random.seed(1337)
228
    print('Training start', 'for iteration ', iter_nm ) 
229
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.3, callbacks=[es])      
230
    print('Training complete', 'for iteration ', iter_nm ) 
231
    print('Evaluation', 'for iteration ', iter_nm )    
232
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
233
    print('Evaluation complete', 'for iteration ', iter_nm )
234
235
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
236
237
238
## Define different experiments
239
    # 1110 - HDF+MDF+LSTM
240
exp='1110'
241
AUC_test={}
242
Accuracy_test={}
243
PR_auc={}
244
Sensitivity_test={}
245
Specificity_test={}
246
average_precision={}
247
F1_score={}
248
cost_saved={}
249
#Set Params
250
W_classA=0 #Dummy visit weights
251
W_classB=1 #No readmission class weight
252
W_classC=1 #Readmission class weight
253
E_pochs=80 # Traning epochs
254
B_size=32 # Batch size
255
T_size=0.3 # Samples used for testing
256
NN_nodes=[128,64,32,1] # Number of nodes in the NN
257
N_iter=10
258
259
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
260
Sample_weight[Sample_weight==0]=W_classA
261
Sample_weight[Sample_weight==1]=W_classB
262
Sample_weight[Sample_weight==2]=W_classC
263
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
264
Visits=np.array(VisitIds)
265
Visits=Visits.reshape(X.shape[0],N_visits,1)
266
267
for iter_nm in range(0,N_iter):
268
    print('Iteration ',iter_nm)    
269
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
270
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
271
    model = Sequential()  
272
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
273
    model.add(LSTM(NN_nodes[1], return_sequences=True))
274
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))
275
    model.add(TimeDistributed(Dense(NN_nodes[3], activation='sigmoid')))   
276
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
277
  #  print(model.summary())
278
    #np.random.seed(1337)
279
    print('Training start', 'for iteration ', iter_nm ) 
280
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
281
    print('Training complete', 'for iteration ', iter_nm ) 
282
    print('Evaluation', 'for iteration ', iter_nm )    
283
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
284
    print('Evaluation complete', 'for iteration ', iter_nm )
285
286
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
287
288
289
## Define different experiments
290
    # 0111 - MDF+LSTM+CA
291
exp='0111'
292
AUC_test={}
293
Accuracy_test={}
294
PR_auc={}
295
Sensitivity_test={}
296
Specificity_test={}
297
average_precision={}
298
F1_score={}
299
cost_saved={}
300
301
#Set Params
302
W_classA=0 #Dummy visit weights
303
W_classB=1 #No readmission class weight
304
W_classC=3 #Readmission class weight
305
E_pochs=80 # Traning epochs
306
B_size=32 # Batch size
307
T_size=0.3 # Samples used for testing
308
NN_nodes=[128,64,32,1] # Number of nodes in the NN
309
N_iter=10
310
311
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
312
Sample_weight[Sample_weight==0]=W_classA
313
Sample_weight[Sample_weight==1]=W_classB
314
Sample_weight[Sample_weight==2]=W_classC
315
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
316
Visits=np.array(VisitIds)
317
Visits=Visits.reshape(X.shape[0],N_visits,1)
318
es=EarlyStopping(monitor='val_loss', patience=20, mode='min')
319
320
for iter_nm in range(0,N_iter):
321
    print('Iteration ',iter_nm)    
322
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
323
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
324
    model = Sequential()  
325
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
326
    model.add(LSTM(NN_nodes[1], return_sequences=True))
327
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))
328
    model.add(TimeDistributed(Dense(NN_nodes[3], activation='sigmoid')))    
329
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
330
    print(model.summary())
331
    #np.random.seed(1337)
332
    print('Training start', 'for iteration ', iter_nm ) 
333
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
334
    print('Training complete', 'for iteration ', iter_nm ) 
335
    print('Evaluation', 'for iteration ', iter_nm )    
336
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
337
    print('Evaluation complete', 'for iteration ', iter_nm )
338
339
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
340
341
## Define different experiments
342
    # 0110 - MDF+LSTM
343
exp='0110'
344
AUC_test={}
345
Accuracy_test={}
346
PR_auc={}
347
Sensitivity_test={}
348
Specificity_test={}
349
average_precision={}
350
F1_score={}
351
cost_saved={}
352
#Set Params
353
W_classA=0 #Dummy visit weights
354
W_classB=1 #No readmission class weight
355
W_classC=1 #Readmission class weight
356
E_pochs=80 # Traning epochs
357
B_size=32 # Batch size
358
T_size=0.3 # Samples used for testing
359
NN_nodes=[128,64,32,1] # Number of nodes in the NN
360
N_iter=10
361
362
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
363
Sample_weight[Sample_weight==0]=W_classA
364
Sample_weight[Sample_weight==1]=W_classB
365
Sample_weight[Sample_weight==2]=W_classC
366
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
367
Visits=np.array(VisitIds)
368
Visits=Visits.reshape(X.shape[0],N_visits,1)
369
370
for iter_nm in range(0,N_iter):
371
    print('Iteration ',iter_nm)    
372
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
373
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
374
    model = Sequential()  
375
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
376
    model.add(LSTM(NN_nodes[1], return_sequences=True))
377
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))
378
    model.add(TimeDistributed(Dense(NN_nodes[3], activation='sigmoid')))    
379
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
380
    print(model.summary())
381
    #np.random.seed(1337)
382
    print('Training start', 'for iteration ', iter_nm ) 
383
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
384
    print('Training complete', 'for iteration ', iter_nm ) 
385
    print('Evaluation', 'for iteration ', iter_nm )    
386
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
387
    print('Evaluation complete', 'for iteration ', iter_nm )
388
389
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
390
391
## Define different experiments
392
    # 1011 - HDF+LSTM+CA
393
exp='1011'
394
AUC_test={}
395
Accuracy_test={}
396
PR_auc={}
397
Sensitivity_test={}
398
Specificity_test={}
399
average_precision={}
400
F1_score={}
401
cost_saved={}
402
403
#Set Params
404
W_classA=0 #Dummy visit weights
405
W_classB=1 #No readmission class weight
406
W_classC=3 #Readmission class weight
407
E_pochs=80 # Traning epochs
408
B_size=32 # Batch size
409
T_size=0.3 # Samples used for testing
410
NN_nodes=[6,3,1] # Number of nodes in the NN
411
N_iter=10
412
413
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
414
Sample_weight[Sample_weight==0]=W_classA
415
Sample_weight[Sample_weight==1]=W_classB
416
Sample_weight[Sample_weight==2]=W_classC
417
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
418
Visits=np.array(VisitIds)
419
Visits=Visits.reshape(X.shape[0],N_visits,1)
420
es=EarlyStopping(monitor='val_loss', patience=20, mode='min')
421
422
for iter_nm in range(0,N_iter):
423
    print('Iteration ',iter_nm)    
424
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
425
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
426
    model = Sequential()  
427
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
428
    model.add(LSTM(NN_nodes[1], return_sequences=True))
429
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))
430
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
431
    print(model.summary())
432
    #np.random.seed(1337)
433
    print('Training start', 'for iteration ', iter_nm ) 
434
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
435
    print('Training complete', 'for iteration ', iter_nm ) 
436
    print('Evaluation', 'for iteration ', iter_nm )    
437
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
438
    print('Evaluation complete', 'for iteration ', iter_nm )
439
440
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
441
442
443
## Define different experiments
444
    # 1010 - HDF+LSTM
445
exp='1010'
446
AUC_test={}
447
Accuracy_test={}
448
PR_auc={}
449
Sensitivity_test={}
450
Specificity_test={}
451
average_precision={}
452
F1_score={}
453
cost_saved={}
454
455
#Set Params
456
W_classA=0 #Dummy visit weights
457
W_classB=1 #No readmission class weight
458
W_classC=1 #Readmission class weight
459
E_pochs=80 # Traning epochs
460
B_size=32 # Batch size
461
T_size=0.3 # Samples used for testing
462
NN_nodes=[6,3,1] # Number of nodes in the NN
463
N_iter=10
464
465
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
466
Sample_weight[Sample_weight==0]=W_classA
467
Sample_weight[Sample_weight==1]=W_classB
468
Sample_weight[Sample_weight==2]=W_classC
469
Sample_weight=Sample_weight.reshape(X.shape[0],N_visits,1)
470
Visits=np.array(VisitIds)
471
Visits=Visits.reshape(X.shape[0],N_visits,1)
472
473
for iter_nm in range(0,N_iter):
474
    print('Iteration ',iter_nm)    
475
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
476
    Sample_weight_train=Sample_weight_train.reshape(len(Sample_weight_train),N_visits)
477
    model = Sequential()  
478
    model.add(TimeDistributed(Dense(NN_nodes[0], activation='sigmoid'), input_shape=(N_visits, X.shape[2])))
479
    model.add(LSTM(NN_nodes[1], return_sequences=True))
480
    model.add(TimeDistributed(Dense(NN_nodes[2], activation='sigmoid')))   
481
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='temporal', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
482
    print(model.summary())
483
    #np.random.seed(1337)
484
    print(model.summary()) 
485
    print('Training start', 'for iteration ', iter_nm ) 
486
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
487
    print('Training complete', 'for iteration ', iter_nm ) 
488
    print('Evaluation', 'for iteration ', iter_nm )    
489
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
490
    print('Evaluation complete', 'for iteration ', iter_nm )
491
492
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
493
494
## Define different experiments
495
    # 1101 - HDF+MDF+CA
496
exp='1101'
497
AUC_test={}
498
Accuracy_test={}
499
PR_auc={}
500
Sensitivity_test={}
501
Specificity_test={}
502
average_precision={}
503
F1_score={}
504
cost_saved={}
505
#Set Params
506
W_classA=0 #Dummy visit weights
507
W_classB=1 #No readmission class weight
508
W_classC=3 #Readmission class weight
509
E_pochs=80 # Traning epochs
510
B_size=32*N_visits # Batch size
511
T_size=0.3 # Samples used for testing
512
NN_nodes=[128,64,1] # Number of nodes in the NN
513
N_iter=10
514
515
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
516
Sample_weight[Sample_weight==0]=W_classA
517
Sample_weight[Sample_weight==1]=W_classB
518
Sample_weight[Sample_weight==2]=W_classC
519
Visits=np.array(VisitIds)
520
a,b,c=X.shape
521
X=X.reshape(a*b,c)
522
Y=Y.reshape(a*b,1)
523
Sample_weight=Sample_weight.ravel()
524
Visits=Visits.reshape(a*N_visits,1)
525
ind=np.where(Sample_weight==0)
526
X=np.delete(X,ind,0)
527
Y=np.delete(Y,ind,0)
528
Sample_weight=np.delete(Sample_weight,ind,0)
529
Visits=np.delete(Visits,ind,0)
530
for iter_nm in range(0,N_iter):
531
    print('Iteration ',iter_nm)    
532
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
533
    model = Sequential()   
534
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
535
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
536
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
537
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
538
    print(model.summary()) 
539
    print('Training start', 'for iteration ', iter_nm ) 
540
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
541
    print('Training complete', 'for iteration ', iter_nm ) 
542
    print('Evaluation', 'for iteration ', iter_nm )    
543
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
544
    print('Evaluation complete', 'for iteration ', iter_nm )
545
546
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
547
548
## Define different experiments
549
    # 1101 - HDF+MDF
550
exp='1100'
551
AUC_test={}
552
Accuracy_test={}
553
PR_auc={}
554
Sensitivity_test={}
555
Specificity_test={}
556
average_precision={}
557
F1_score={}
558
cost_saved={}
559
#Set Params
560
W_classA=0 #Dummy visit weights
561
W_classB=1 #No readmission class weight
562
W_classC=1 #Readmission class weight
563
E_pochs=80 # Traning epochs
564
B_size=32*N_visits # Batch size
565
T_size=0.3 # Samples used for testing
566
NN_nodes=[128,64,1] # Number of nodes in the NN
567
N_iter=10
568
569
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
570
Sample_weight[Sample_weight==0]=W_classA
571
Sample_weight[Sample_weight==1]=W_classB
572
Sample_weight[Sample_weight==2]=W_classC
573
Visits=np.array(VisitIds)
574
a,b,c=X.shape
575
X=X.reshape(a*b,c)
576
Y=Y.reshape(a*b,1)
577
Sample_weight=Sample_weight.ravel()
578
Visits=Visits.reshape(a*N_visits,1)
579
ind=np.where(Sample_weight==0)
580
X=np.delete(X,ind,0)
581
Y=np.delete(Y,ind,0)
582
Sample_weight=np.delete(Sample_weight,ind,0)
583
Visits=np.delete(Visits,ind,0)
584
for iter_nm in range(0,N_iter):
585
    print('Iteration ',iter_nm)    
586
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
587
    model = Sequential()   
588
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
589
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
590
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
591
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
592
    print('Training start', 'for iteration ', iter_nm ) 
593
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.3, callbacks=[es])      
594
    print('Training complete', 'for iteration ', iter_nm ) 
595
    print('Evaluation', 'for iteration ', iter_nm )    
596
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
597
    print('Evaluation complete', 'for iteration ', iter_nm )
598
599
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp)  
600
601
602
## Define different experiments
603
    # 1001 - HDF+CA
604
exp='1001'
605
AUC_test={}
606
Accuracy_test={}
607
PR_auc={}
608
Sensitivity_test={}
609
Specificity_test={}
610
average_precision={}
611
F1_score={}
612
cost_saved={}
613
#Set Params
614
W_classA=0 #Dummy visit weights
615
W_classB=1 #No readmission class weight
616
W_classC=3 #Readmission class weight
617
E_pochs=80 # Traning epochs
618
B_size=32*N_visits # Batch size
619
T_size=0.3 # Samples used for testing
620
NN_nodes=[6,3,1] # Number of nodes in the NN
621
N_iter=10
622
623
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
624
Sample_weight[Sample_weight==0]=W_classA
625
Sample_weight[Sample_weight==1]=W_classB
626
Sample_weight[Sample_weight==2]=W_classC
627
Visits=np.array(VisitIds)
628
a,b,c=X.shape
629
X=X.reshape(a*b,c)
630
Y=Y.reshape(a*b,1)
631
Sample_weight=Sample_weight.ravel()
632
Visits=Visits.reshape(a*N_visits,1)
633
ind=np.where(Sample_weight==0)
634
X=np.delete(X,ind,0)
635
Y=np.delete(Y,ind,0)
636
Sample_weight=np.delete(Sample_weight,ind,0)
637
Visits=np.delete(Visits,ind,0)
638
for iter_nm in range(0,N_iter):
639
    print('Iteration ',iter_nm)    
640
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
641
    model = Sequential()   
642
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
643
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
644
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
645
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
646
    print('Training start', 'for iteration ', iter_nm ) 
647
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
648
    print('Training complete', 'for iteration ', iter_nm ) 
649
    print('Evaluation', 'for iteration ', iter_nm )    
650
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
651
    print('Evaluation complete', 'for iteration ', iter_nm )
652
653
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
654
655
656
## Define different experiments
657
    # 1000 - HDF only
658
exp='1000'
659
AUC_test={}
660
Accuracy_test={}
661
PR_auc={}
662
Sensitivity_test={}
663
Specificity_test={}
664
average_precision={}
665
F1_score={}
666
cost_saved={}
667
#Set Params
668
W_classA=0 #Dummy visit weights
669
W_classB=1 #No readmission class weight
670
W_classC=1 #Readmission class weight
671
E_pochs=80 # Traning epochs
672
B_size=32*N_visits # Batch size
673
T_size=0.3 # Samples used for testing
674
NN_nodes=[6,3,1] # Number of nodes in the NN
675
N_iter=10
676
677
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
678
Sample_weight[Sample_weight==0]=W_classA
679
Sample_weight[Sample_weight==1]=W_classB
680
Sample_weight[Sample_weight==2]=W_classC
681
Visits=np.array(VisitIds)
682
a,b,c=X.shape
683
X=X.reshape(a*b,c)
684
Y=Y.reshape(a*b,1)
685
Sample_weight=Sample_weight.ravel()
686
Visits=Visits.reshape(a*N_visits,1)
687
ind=np.where(Sample_weight==0)
688
X=np.delete(X,ind,0)
689
Y=np.delete(Y,ind,0)
690
Sample_weight=np.delete(Sample_weight,ind,0)
691
Visits=np.delete(Visits,ind,0)
692
for iter_nm in range(0,N_iter):
693
    print('Iteration ',iter_nm)    
694
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
695
    model = Sequential()   
696
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
697
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
698
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
699
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
700
    print('Training start', 'for iteration ', iter_nm ) 
701
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
702
    print('Training complete', 'for iteration ', iter_nm ) 
703
    print('Evaluation', 'for iteration ', iter_nm )    
704
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
705
    print('Evaluation complete', 'for iteration ', iter_nm )
706
707
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
708
709
## Define different experiments
710
    # 1000 - MDF only
711
exp='0100'
712
AUC_test={}
713
Accuracy_test={}
714
PR_auc={}
715
Sensitivity_test={}
716
Specificity_test={}
717
average_precision={}
718
F1_score={}
719
cost_saved={}
720
#Set Params
721
W_classA=0 #Dummy visit weights
722
W_classB=1 #No readmission class weight
723
W_classC=1 #Readmission class weight
724
E_pochs=80 # Traning epochs
725
B_size=32*N_visits # Batch size
726
T_size=0.3 # Samples used for testing
727
NN_nodes=[128,64,1] # Number of nodes in the NN
728
N_iter=10
729
730
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
731
Sample_weight[Sample_weight==0]=W_classA
732
Sample_weight[Sample_weight==1]=W_classB
733
Sample_weight[Sample_weight==2]=W_classC
734
Visits=np.array(VisitIds)
735
a,b,c=X.shape
736
X=X.reshape(a*b,c)
737
Y=Y.reshape(a*b,1)
738
Sample_weight=Sample_weight.ravel()
739
Visits=Visits.reshape(a*N_visits,1)
740
ind=np.where(Sample_weight==0)
741
X=np.delete(X,ind,0)
742
Y=np.delete(Y,ind,0)
743
Sample_weight=np.delete(Sample_weight,ind,0)
744
Visits=np.delete(Visits,ind,0)
745
for iter_nm in range(0,N_iter):
746
    print('Iteration ',iter_nm)    
747
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
748
    model = Sequential()   
749
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
750
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
751
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
752
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
753
    print('Training start', 'for iteration ', iter_nm ) 
754
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
755
    print('Training complete', 'for iteration ', iter_nm ) 
756
    print('Evaluation', 'for iteration ', iter_nm )    
757
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
758
    print('Evaluation complete', 'for iteration ', iter_nm )
759
760
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp) 
761
762
763
## Define different experiments
764
    # 0101 - MDF + CA only
765
exp='0101'
766
AUC_test={}
767
Accuracy_test={}
768
PR_auc={}
769
Sensitivity_test={}
770
Specificity_test={}
771
average_precision={}
772
F1_score={}
773
cost_saved={}
774
#Set Params
775
W_classA=0 #Dummy visit weights
776
W_classB=1 #No readmission class weight
777
W_classC=3 #Readmission class weight
778
E_pochs=80 # Traning epochs
779
B_size=32*N_visits # Batch size
780
T_size=0.3 # Samples used for testing
781
NN_nodes=[6,3,1] # Number of nodes in the NN
782
N_iter=10
783
784
X, Y, Sample_weight,VisitIds=read_data(exp, N_visits)
785
Sample_weight[Sample_weight==0]=W_classA
786
Sample_weight[Sample_weight==1]=W_classB
787
Sample_weight[Sample_weight==2]=W_classC
788
Visits=np.array(VisitIds)
789
a,b,c=X.shape
790
X=X.reshape(a*b,c)
791
Y=Y.reshape(a*b,1)
792
Sample_weight=Sample_weight.ravel()
793
Visits=Visits.reshape(a*N_visits,1)
794
ind=np.where(Sample_weight==0)
795
X=np.delete(X,ind,0)
796
Y=np.delete(Y,ind,0)
797
Sample_weight=np.delete(Sample_weight,ind,0)
798
Visits=np.delete(Visits,ind,0)
799
for iter_nm in range(0,N_iter):
800
    print('Iteration ',iter_nm)    
801
    X_train, X_test, Y_train, Y_test, Sample_weight_train, Sample_weight_test, Visit_train, Visit_test = train_test_split(X, Y,Sample_weight,Visits, test_size=T_size, shuffle=True)
802
    model = Sequential()   
803
    model.add(Dense(NN_nodes[0], activation='sigmoid', input_dim=c))  
804
    model.add(Dense(NN_nodes[1], activation='sigmoid'))            
805
    model.add(Dense(NN_nodes[2], activation='sigmoid'))
806
    model.compile(loss='binary_crossentropy', optimizer='rmsprop',sample_weight_mode='None', metrics=[sensitivity, specificity, ppv, npv, 'accuracy'])
807
    print('Training start', 'for iteration ', iter_nm ) 
808
    model.fit(X_train, Y_train, epochs=E_pochs, batch_size=B_size, verbose=0, sample_weight=Sample_weight_train,shuffle=True, validation_split=0.2, callbacks=[es])      
809
    print('Training complete', 'for iteration ', iter_nm ) 
810
    print('Evaluation', 'for iteration ', iter_nm )    
811
    Accuracy_test[iter_nm], AUC_test[iter_nm], Sensitivity_test[iter_nm], Specificity_test[iter_nm], PR_auc[iter_nm], F1_score[iter_nm],cost_saved[iter_nm]=model_eval(model, X_test,Y_test, Sample_weight_test,exp,X_train,Y_train,Sample_weight_train)
812
    print('Evaluation complete', 'for iteration ', iter_nm )
813
814
save_print(AUC_test, Sensitivity_test, Specificity_test, PR_auc,F1_score,cost_saved, exp)  
815
816
#print([np.mean(np.fromiter(np.load('cost_saved_1111.npy').item().values(), dtype=float)),np.std(np.fromiter(np.load('cost_saved_1111.npy').item().values(), dtype=float))])
817
818
AUC_1111=np.fromiter(np.load('AUC_test_1111.npy').item().values(), dtype=float)
819
AUC_1110=np.fromiter(np.load('AUC_test_1110.npy').item().values(), dtype=float)
820
AUC_1011=np.fromiter(np.load('AUC_test_1011.npy').item().values(), dtype=float)
821
AUC_0111=np.fromiter(np.load('AUC_test_0111.npy').item().values(), dtype=float)
822
AUC_1101=np.fromiter(np.load('AUC_test_1101.npy').item().values(), dtype=float)
823
824
cs_1111=np.fromiter(np.load('cost_saved_1111.npy').item().values(), dtype=float)
825
cs_1110=np.fromiter(np.load('cost_saved_1110.npy').item().values(), dtype=float)
826
cs_1011=np.fromiter(np.load('cost_saved_1011.npy').item().values(), dtype=float)
827
cs_0111=np.fromiter(np.load('cost_saved_0111.npy').item().values(), dtype=float)
828
cs_1101=np.fromiter(np.load('cost_saved_1101.npy').item().values(), dtype=float)
829
830
f1_1111=np.fromiter(np.load('f1_score_1111.npy').item().values(), dtype=float)
831
f1_1110=np.fromiter(np.load('f1_score_1110.npy').item().values(), dtype=float)
832
f1_1011=np.fromiter(np.load('f1_score_1011.npy').item().values(), dtype=float)
833
f1_0111=np.fromiter(np.load('f1_score_0111.npy').item().values(), dtype=float)
834
f1_1101=np.fromiter(np.load('f1_score_1101.npy').item().values(), dtype=float)
835
836
837
#aucs_mean = [np.mean(AUC_1111), np.mean(AUC_1110)]
838
#aucs_std = [np.std(AUC_1111), np.std(AUC_1110)]
839
840
df_results = pd.DataFrame(np.array([[np.mean(AUC_1111), np.mean(AUC_1110),np.mean(AUC_0111),np.mean(AUC_1011),np.mean(AUC_1101)], \
841
                      [np.mean(f1_1111), np.mean(f1_1110), np.mean(f1_0111), np.mean(f1_1011), np.mean(f1_1101)], \
842
                      [np.mean(cs_1111), np.mean(cs_1110),np.mean(cs_0111),np.mean(cs_1011),np.mean(cs_1101)], \
843
                     ]))
844
df_std = pd.DataFrame(np.array([[np.std(AUC_1111)/1, np.std(AUC_1110)/1,np.std(AUC_0111)/1,np.std(AUC_1011)/1,np.std(AUC_1101)/1], \
845
                      [np.std(f1_1111)/1, np.std(f1_1110)/1, np.std(f1_0111)/1, np.std(f1_1011)/1, np.std(f1_1101)/1], \
846
                      [np.std(cs_1111)/1, np.std(cs_1110)/1, np.std(cs_0111)/1, np.std(cs_1011)/1, np.std(cs_1101)/1], \
847
                     ]))
848
df_results.index = ['ROC AUC','F1 score','Cost saved']
849
850
df_results.columns = ['Complete Model', 'Without CA', 'Without HDF', 'Without MDF','Without LSTM']
851
#patterns =  (('/'),('o'))
852
colors=['blue','skyblue','silver','gray', 'black']
853
fig, ax = plt.subplots()
854
#plt.rcParams.update({'figure.figsize': [5, 5], 'font.size': 22})
855
plt.rcParams.update({'font.size': 20, 'figure.figsize': [10,8]})
856
ax = df_results[::-1].plot.barh(ax=ax, xerr=np.array(df_std[::-1]).transpose(),color=colors,width=0.7,capsize=5)
857
ax.legend(bbox_to_anchor=(0.95, 0.30))
858
#ax.set_xlabel('F1 score / Cost Savings')
859
plt.tight_layout()
860
#Options
861
862
plt.show()
863
864
plt.savefig('fig3.pdf', format='pdf', dpi=1000)
865
866
867
#SIGNIFICANCE TESTS
868
#label='cost_saved'
869
#c=np.fromiter(np.load(label+'_1110.npy').item().values(), dtype=float)
870
#d=np.fromiter(np.load(label+'_1111.npy').item().values(), dtype=float)
871
#a,b=stats.ttest_ind(c,d)
872
#print(a,b)
873
874
#import scipy.io as sio
875
#y_pred = model.predict(X_test).ravel()
876
#sio.savemat('y_pred_new_review2.mat', {'y_pred':y_pred,'Visit_ID':Visit_test.ravel(),'Y_test':Y_test.ravel(),'Sample':Sample_weight_test.ravel()})