Switch to unified view

a b/tensorflow/my_dnn_mitdb.py
1
""" 
2
Author: Mondejar Guerra
3
VARPA
4
University of A Coruna
5
April 2017
6
7
Description: Train and evaluate mitdb with interpatient split (train/test)
8
Uses my own model clasifier with weights for imbalanced class
9
"""
10
11
import numpy as np
12
import matplotlib.pyplot as plt
13
import os
14
import csv
15
import pickle
16
import numpy as np
17
import matplotlib.pyplot as plt
18
import tensorflow as tf
19
import collections
20
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
21
22
tf.logging.set_verbosity(tf.logging.INFO)
23
24
def compute_accuracy(m):
25
  # Accuracy by column
26
  classes = m.shape[0]
27
  acc = np.zeros(classes)
28
  acc_global = 0
29
  for c in range(0, classes):
30
    if sum(m[:,c]) > 0:
31
      acc[c] = float(m[c,c]) / float(sum(m[:,c]))
32
    acc_global = acc_global + m[c,c]
33
    #print ('acc ' + str(c) + ': ' + str(acc[c]))
34
  
35
  acc_global = float(acc_global) / float(sum(sum(m)))
36
  #print ('global acc = ' + str(acc_global))
37
  return acc, acc_global
38
39
def load_data(output_path, window_size, compute_RR_interval_feature, compute_wavelets, binary_problem):
40
  extension = '_' + str(window_size)
41
  if compute_wavelets:
42
      extension = extension + '_' + 'wv'
43
  if compute_RR_interval_feature:
44
      extension = extension + '_' + 'RR'
45
  extension = extension + '.csv'
46
47
  # Load training and eval data
48
  train_data = np.loadtxt(output_path + 'train_data' + extension, delimiter=",", dtype=float)
49
  train_labels =  np.loadtxt(output_path + 'train_label' + extension, delimiter=",",  dtype=np.int32)
50
  eval_data = np.loadtxt(output_path + 'eval_data' + extension, delimiter=",", dtype=float)
51
  eval_labels = np.loadtxt(output_path + 'eval_label' + extension, delimiter=",",  dtype=np.int32)
52
53
  if binary_problem: 
54
    # Uses only two classes 
55
    #   [0]: N (0)
56
    #   [1]: SVEB, VEB, F, Q (1, 2, 3, 4)
57
    for i in range(0, len(train_labels), 1):
58
      if train_labels[i] > 0:
59
        train_labels[i] = 1
60
61
    for i in range(0, len(eval_labels), 1):
62
      if eval_labels[i] > 0:
63
        eval_labels[i] = 1        
64
65
  return (train_data, train_labels, eval_data, eval_labels)
66
67
# normalize data features: wave & RR intervals...
68
def normalize_data(train_data, eval_data):
69
  feature_size = len(train_data[0])
70
  if compute_RR_interval_feature:
71
    feature_size = feature_size - 4
72
73
  max_wav = np.amax(np.vstack((train_data[:, 0:feature_size], eval_data[:, 0:feature_size])))
74
  min_wav = np.amin(np.vstack((train_data[:, 0:feature_size], eval_data[:, 0:feature_size])))
75
    
76
  train_data[:, 0:feature_size] = ((train_data[:,0:feature_size] - min_wav) / (max_wav - min_wav))
77
78
  eval_data[:, 0:feature_size] = ((eval_data[:,0:feature_size] - min_wav) / (max_wav - min_wav))
79
  #Norm last part feature: RR interval 
80
  if compute_RR_interval_feature:
81
82
    max_rr = np.amax(np.vstack((train_data[:, feature_size:], eval_data[:, feature_size:])))
83
    min_rr = np.amin(np.vstack((train_data[:, feature_size:], eval_data[:, feature_size:])))
84
85
    train_data[:, feature_size:] = ((train_data[:, feature_size:] - min_rr) / (max_rr - min_rr))
86
    eval_data[:,  feature_size:] = ((eval_data[:, feature_size:] - min_rr) / (max_rr - min_rr))
87
  return (train_data, eval_data)
88
89
90
def my_model_fn(features, targets, mode, params):
91
  """Model function for Estimator."""
92
93
  targets_onehot = tf.one_hot(indices = targets, depth=params["num_classes"], on_value = 1)
94
  # Connect the first hidden layer to input layer
95
  # (features) with relu activation
96
  #first_hidden_layer = tf.contrib.layers.relu(features, 10)
97
  first_hidden_layer = tf.contrib.layers.fully_connected(features, params["h1"])
98
  # tf.nn.conv1d
99
  second_hidden_layer = tf.contrib.layers.fully_connected(first_hidden_layer, params["h2"])
100
  third_hidden_layer = tf.contrib.layers.relu(second_hidden_layer, params["h3"])
101
102
  # Connect the output layer to second hidden layer (no activation fn)
103
  output_layer = tf.contrib.layers.linear(third_hidden_layer, params["num_classes"])
104
105
  if mode == 'train' and params["weight_imbalanced"]:
106
    weights_tf = tf.constant(params["weights"])
107
108
  else:
109
    weights_tf = tf.ones([features.shape[0].value], tf.float32) 
110
      
111
  loss = tf.losses.softmax_cross_entropy(targets_onehot, output_layer, weights=weights_tf)
112
  train_op = tf.contrib.layers.optimize_loss(
113
    loss=loss,
114
    global_step=tf.contrib.framework.get_global_step(),
115
    learning_rate=params["learning_rate"],
116
    optimizer="SGD")
117
118
  correct_prediction = tf.equal(tf.argmax(targets_onehot, 1), tf.argmax(output_layer, 1))
119
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))   
120
  eval_metric_ops = {
121
    "accuracy": accuracy
122
        #tf.metrics.accuracy(targets_onehot, output_layer)
123
        #"rmse": tf.metrics.root_mean_squared_error(
124
        #    tf.cast(targets, tf.float64), predictions)
125
  }
126
127
  return model_fn_lib.ModelFnOps(
128
    mode=mode,
129
    predictions=output_layer,#predictions_dict,
130
    loss=loss,
131
    train_op=train_op,
132
    eval_metric_ops=eval_metric_ops)
133
134
135
def main():
136
  window_size = 160
137
  compute_RR_interval_feature = True
138
  compute_wavelets = True
139
  dataset = '/home/mondejar/dataset/ECG/mitdb/'
140
  output_path = dataset + 'm_learning/'
141
142
  binary_problem = False
143
  weight_imbalanced = True
144
145
146
  # 0 Load Data
147
  train_data, train_labels, eval_data, eval_labels = load_data(output_path, window_size, compute_RR_interval_feature, compute_wavelets, binary_problem)
148
149
  # 1 TODO Preprocess data? norm? if RR interval, last 4 features are pre, post, local and global RR
150
  # Apply some norm? convolution? another approach?
151
  normalize = False
152
  if normalize:
153
    train_data, eval_data =  normalize_data(train_data, eval_data)
154
    
155
  # 2 Create my own model
156
  # Imbalanced class: weights
157
  # https://www.tensorflow.org/api_guides/python/contrib.losses
158
159
  # Learning rate for the model
160
  LEARNING_RATE = 0.001
161
  if binary_problem:
162
    num_classes = 2
163
  else:
164
    num_classes = 5
165
  # Set model params
166
167
  count = collections.Counter(train_labels)
168
  total = 0
169
  max_class = 0
170
  for c in range(0,num_classes):
171
    total = count[c] + total
172
    if count[c] > max_class:
173
      max_class = count[c]
174
175
  class_weight = np.zeros(num_classes)
176
  for c in range(0,num_classes):
177
    if count[c] > 0:
178
      #class_weight[c] = 1- float(count[c]) / float(total)
179
      class_weight[c] = float(max_class) / float(count[c]) # the class with more instance will have weight = 1, and the others x times ...
180
181
  # TODO give more weigth to anomaly classes? We want to detect always these bad anomalies
182
  weights = np.zeros((len(train_labels)), dtype='float')
183
  for i in range(0,len(train_labels)):
184
    weights[i] = class_weight[train_labels[i]]
185
186
  hn_1 = [128, 64, 32]
187
  hn_2 = [64, 32, 16]
188
  hn_3 = [32, 16, 8]
189
  steps = [500, 1000, 2000, 8000]
190
 
191
192
  for h1 in hn_1:
193
    for h2 in hn_2:
194
      for h3 in hn_3:
195
        for s in steps:
196
          model_params = {
197
            "learning_rate": LEARNING_RATE, 
198
            "num_classes": num_classes, 
199
            "weights": weights, 
200
            "weight_imbalanced": weight_imbalanced, 
201
            "h1": h1,
202
            "h2": h2, 
203
            "h3": h3}
204
205
          nn = tf.contrib.learn.Estimator(model_fn=my_model_fn, params=model_params)
206
          
207
          def get_train_inputs():
208
            x = tf.constant(train_data)
209
            y = tf.constant(train_labels)
210
            return x, y
211
          
212
          # Fit
213
          nn.fit(input_fn=get_train_inputs, steps=s)
214
215
          # Score accuracy
216
          def get_test_inputs():
217
            x = tf.constant(eval_data)
218
            y = tf.constant(eval_labels)
219
            return x, y
220
          
221
          ev = nn.evaluate(input_fn=get_test_inputs, steps=1)["accuracy"]
222
223
          # Compute the matrix confussion
224
          predictions = list(nn.predict(input_fn=get_test_inputs))
225
226
          confusion_matrix = np.zeros((num_classes,num_classes), dtype='int')
227
          for p in range(0, len(predictions), 1):
228
            ind_p = np.argmax(predictions[p])
229
            confusion_matrix[ind_p][eval_labels[p]] = confusion_matrix[ind_p][eval_labels[p]] + 1
230
231
          acc, acc_g = compute_accuracy(confusion_matrix)
232
          np.savetxt('nn_' + str(h1) + '_' + str(h2) + '_' + str(h3) + '_' + str(s) + '_cm.txt', confusion_matrix, fmt='%-7.0f')    
233
          np.savetxt('nn_' + str(h1) + '_' + str(h2) + '_' + str(h3) + '_' + str(s) + '_acc.txt', acc, fmt='%-7.2f')    
234
235
236
if __name__ == "__main__":
237
  main()