|
a |
|
b/python/load_MITBIH.py |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
|
|
|
3 |
""" |
|
|
4 |
load_MITBIH.py |
|
|
5 |
|
|
|
6 |
Download .csv files and annotations from: |
|
|
7 |
kaggle.com/mondejar/mitbih-database |
|
|
8 |
|
|
|
9 |
VARPA, University of Coruna |
|
|
10 |
Mondejar Guerra, Victor M. |
|
|
11 |
23 Oct 2017 |
|
|
12 |
""" |
|
|
13 |
|
|
|
14 |
import os |
|
|
15 |
import csv |
|
|
16 |
import gc |
|
|
17 |
import cPickle as pickle |
|
|
18 |
|
|
|
19 |
import numpy as np |
|
|
20 |
import matplotlib.pyplot as plt |
|
|
21 |
from scipy.signal import medfilt |
|
|
22 |
import scipy.stats |
|
|
23 |
import pywt |
|
|
24 |
import time |
|
|
25 |
import sklearn |
|
|
26 |
from sklearn import decomposition |
|
|
27 |
from sklearn.decomposition import PCA, IncrementalPCA |
|
|
28 |
|
|
|
29 |
from features_ECG import * |
|
|
30 |
|
|
|
31 |
from numpy.polynomial.hermite import hermfit, hermval |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
def create_features_labels_name(DS, winL, winR, do_preprocess, maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag): |
|
|
36 |
|
|
|
37 |
features_labels_name = db_path + 'features/' + 'w_' + str(winL) + '_' + str(winR) + '_' + DS |
|
|
38 |
|
|
|
39 |
if do_preprocess: |
|
|
40 |
features_labels_name += '_rm_bsline' |
|
|
41 |
|
|
|
42 |
if maxRR: |
|
|
43 |
features_labels_name += '_maxRR' |
|
|
44 |
|
|
|
45 |
if use_RR: |
|
|
46 |
features_labels_name += '_RR' |
|
|
47 |
|
|
|
48 |
if norm_RR: |
|
|
49 |
features_labels_name += '_norm_RR' |
|
|
50 |
|
|
|
51 |
for descp in compute_morph: |
|
|
52 |
features_labels_name += '_' + descp |
|
|
53 |
|
|
|
54 |
if reduced_DS: |
|
|
55 |
features_labels_name += '_reduced' |
|
|
56 |
|
|
|
57 |
if leads_flag[0] == 1: |
|
|
58 |
features_labels_name += '_MLII' |
|
|
59 |
|
|
|
60 |
if leads_flag[1] == 1: |
|
|
61 |
features_labels_name += '_V1' |
|
|
62 |
|
|
|
63 |
features_labels_name += '.p' |
|
|
64 |
|
|
|
65 |
return features_labels_name |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
def save_wvlt_PCA(PCA, pca_k, family, level): |
|
|
69 |
f = open('Wvlt_' + family + '_' + str(level) + '_PCA_' + str(pca_k) + '.p', 'wb') |
|
|
70 |
pickle.dump(PCA, f, 2) |
|
|
71 |
f.close |
|
|
72 |
|
|
|
73 |
|
|
|
74 |
def load_wvlt_PCA(pca_k, family, level): |
|
|
75 |
f = open('Wvlt_' + family + '_' + str(level) + '_PCA_' + str(pca_k) + '.p', 'rb') |
|
|
76 |
# disable garbage collector |
|
|
77 |
gc.disable()# this improve the required loading time! |
|
|
78 |
PCA = pickle.load(f) |
|
|
79 |
gc.enable() |
|
|
80 |
f.close() |
|
|
81 |
|
|
|
82 |
return PCA |
|
|
83 |
# Load the data with the configuration and features selected |
|
|
84 |
# Params: |
|
|
85 |
# - leads_flag = [MLII, V1] set the value to 0 or 1 to reference if that lead is used |
|
|
86 |
# - reduced_DS = load DS1, DS2 patients division (Chazal) or reduced version, |
|
|
87 |
# i.e., only patients in common that contains both MLII and V1 |
|
|
88 |
def load_mit_db(DS, winL, winR, do_preprocess, maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag): |
|
|
89 |
|
|
|
90 |
features_labels_name = create_features_labels_name(DS, winL, winR, do_preprocess, maxRR, use_RR, norm_RR, compute_morph, db_path, reduced_DS, leads_flag) |
|
|
91 |
|
|
|
92 |
if os.path.isfile(features_labels_name): |
|
|
93 |
print("Loading pickle: " + features_labels_name + "...") |
|
|
94 |
f = open(features_labels_name, 'rb') |
|
|
95 |
# disable garbage collector |
|
|
96 |
gc.disable()# this improve the required loading time! |
|
|
97 |
features, labels, patient_num_beats = pickle.load(f) |
|
|
98 |
gc.enable() |
|
|
99 |
f.close() |
|
|
100 |
|
|
|
101 |
|
|
|
102 |
else: |
|
|
103 |
print("Loading MIT BIH arr (" + DS + ") ...") |
|
|
104 |
|
|
|
105 |
# ML-II |
|
|
106 |
if reduced_DS == False: |
|
|
107 |
DS1 = [101, 106, 108, 109, 112, 114, 115, 116, 118, 119, 122, 124, 201, 203, 205, 207, 208, 209, 215, 220, 223, 230] |
|
|
108 |
DS2 = [100, 103, 105, 111, 113, 117, 121, 123, 200, 202, 210, 212, 213, 214, 219, 221, 222, 228, 231, 232, 233, 234] |
|
|
109 |
|
|
|
110 |
# ML-II + V1 |
|
|
111 |
else: |
|
|
112 |
DS1 = [101, 106, 108, 109, 112, 115, 118, 119, 201, 203, 205, 207, 208, 209, 215, 220, 223, 230] |
|
|
113 |
DS2 = [105, 111, 113, 121, 200, 202, 210, 212, 213, 214, 219, 221, 222, 228, 231, 232, 233, 234] |
|
|
114 |
|
|
|
115 |
mit_pickle_name = db_path + 'python_mit' |
|
|
116 |
if reduced_DS: |
|
|
117 |
mit_pickle_name = mit_pickle_name + '_reduced_' |
|
|
118 |
|
|
|
119 |
if do_preprocess: |
|
|
120 |
mit_pickle_name = mit_pickle_name + '_rm_bsline' |
|
|
121 |
|
|
|
122 |
mit_pickle_name = mit_pickle_name + '_wL_' + str(winL) + '_wR_' + str(winR) |
|
|
123 |
mit_pickle_name = mit_pickle_name + '_' + DS + '.p' |
|
|
124 |
|
|
|
125 |
# If the data with that configuration has been already computed Load pickle |
|
|
126 |
if os.path.isfile(mit_pickle_name): |
|
|
127 |
f = open(mit_pickle_name, 'rb') |
|
|
128 |
# disable garbage collector |
|
|
129 |
gc.disable()# this improve the required loading time! |
|
|
130 |
my_db = pickle.load(f) |
|
|
131 |
gc.enable() |
|
|
132 |
f.close() |
|
|
133 |
else: # Load data and compute de RR features |
|
|
134 |
if DS == 'DS1': |
|
|
135 |
my_db = load_signal(DS1, winL, winR, do_preprocess) |
|
|
136 |
else: |
|
|
137 |
my_db = load_signal(DS2, winL, winR, do_preprocess) |
|
|
138 |
|
|
|
139 |
print("Saving signal processed data ...") |
|
|
140 |
# Save data |
|
|
141 |
# Protocol version 0 itr_features_balanceds the original ASCII protocol and is backwards compatible with earlier versions of Python. |
|
|
142 |
# Protocol version 1 is the old binary format which is also compatible with earlier versions of Python. |
|
|
143 |
# Protocol version 2 was introduced in Python 2.3. It provides much more efficient pickling of new-style classes. |
|
|
144 |
f = open(mit_pickle_name, 'wb') |
|
|
145 |
pickle.dump(my_db, f, 2) |
|
|
146 |
f.close |
|
|
147 |
|
|
|
148 |
|
|
|
149 |
features = np.array([], dtype=float) |
|
|
150 |
labels = np.array([], dtype=np.int32) |
|
|
151 |
|
|
|
152 |
# This array contains the number of beats for each patient (for cross_val) |
|
|
153 |
patient_num_beats = np.array([], dtype=np.int32) |
|
|
154 |
for p in range(len(my_db.beat)): |
|
|
155 |
patient_num_beats = np.append(patient_num_beats, len(my_db.beat[p])) |
|
|
156 |
|
|
|
157 |
# Compute RR features |
|
|
158 |
if use_RR or norm_RR: |
|
|
159 |
if DS == 'DS1': |
|
|
160 |
RR = [RR_intervals() for i in range(len(DS1))] |
|
|
161 |
else: |
|
|
162 |
RR = [RR_intervals() for i in range(len(DS2))] |
|
|
163 |
|
|
|
164 |
print("Computing RR intervals ...") |
|
|
165 |
|
|
|
166 |
for p in range(len(my_db.beat)): |
|
|
167 |
if maxRR: |
|
|
168 |
RR[p] = compute_RR_intervals(my_db.R_pos[p]) |
|
|
169 |
else: |
|
|
170 |
RR[p] = compute_RR_intervals(my_db.orig_R_pos[p]) |
|
|
171 |
|
|
|
172 |
RR[p].pre_R = RR[p].pre_R[(my_db.valid_R[p] == 1)] |
|
|
173 |
RR[p].post_R = RR[p].post_R[(my_db.valid_R[p] == 1)] |
|
|
174 |
RR[p].local_R = RR[p].local_R[(my_db.valid_R[p] == 1)] |
|
|
175 |
RR[p].global_R = RR[p].global_R[(my_db.valid_R[p] == 1)] |
|
|
176 |
|
|
|
177 |
|
|
|
178 |
if use_RR: |
|
|
179 |
f_RR = np.empty((0,4)) |
|
|
180 |
for p in range(len(RR)): |
|
|
181 |
row = np.column_stack((RR[p].pre_R, RR[p].post_R, RR[p].local_R, RR[p].global_R)) |
|
|
182 |
f_RR = np.vstack((f_RR, row)) |
|
|
183 |
|
|
|
184 |
features = np.column_stack((features, f_RR)) if features.size else f_RR |
|
|
185 |
|
|
|
186 |
if norm_RR: |
|
|
187 |
f_RR_norm = np.empty((0,4)) |
|
|
188 |
for p in range(len(RR)): |
|
|
189 |
# Compute avg values! |
|
|
190 |
avg_pre_R = np.average(RR[p].pre_R) |
|
|
191 |
avg_post_R = np.average(RR[p].post_R) |
|
|
192 |
avg_local_R = np.average(RR[p].local_R) |
|
|
193 |
avg_global_R = np.average(RR[p].global_R) |
|
|
194 |
|
|
|
195 |
row = np.column_stack((RR[p].pre_R / avg_pre_R, RR[p].post_R / avg_post_R, RR[p].local_R / avg_local_R, RR[p].global_R / avg_global_R)) |
|
|
196 |
f_RR_norm = np.vstack((f_RR_norm, row)) |
|
|
197 |
|
|
|
198 |
features = np.column_stack((features, f_RR_norm)) if features.size else f_RR_norm |
|
|
199 |
|
|
|
200 |
######################################################################################### |
|
|
201 |
# Compute morphological features |
|
|
202 |
print("Computing morphological features (" + DS + ") ...") |
|
|
203 |
|
|
|
204 |
num_leads = np.sum(leads_flag) |
|
|
205 |
|
|
|
206 |
# Raw |
|
|
207 |
if 'resample_10' in compute_morph: |
|
|
208 |
print("Resample_10 ...") |
|
|
209 |
start = time.time() |
|
|
210 |
|
|
|
211 |
f_raw = np.empty((0, 10 * num_leads)) |
|
|
212 |
|
|
|
213 |
for p in range(len(my_db.beat)): |
|
|
214 |
for beat in my_db.beat[p]: |
|
|
215 |
f_raw_lead = np.empty([]) |
|
|
216 |
for s in range(2): |
|
|
217 |
if leads_flag[s] == 1: |
|
|
218 |
resamp_beat = scipy.signal.resample(beat[s], 10) |
|
|
219 |
if f_raw_lead.size == 1: |
|
|
220 |
f_raw_lead = resamp_beat |
|
|
221 |
else: |
|
|
222 |
f_raw_lead = np.hstack((f_raw_lead, resamp_beat)) |
|
|
223 |
f_raw = np.vstack((f_raw, f_raw_lead)) |
|
|
224 |
|
|
|
225 |
features = np.column_stack((features, f_raw)) if features.size else f_raw |
|
|
226 |
|
|
|
227 |
end = time.time() |
|
|
228 |
print("Time resample: " + str(format(end - start, '.2f')) + " sec" ) |
|
|
229 |
|
|
|
230 |
if 'raw' in compute_morph: |
|
|
231 |
print("Raw ...") |
|
|
232 |
start = time.time() |
|
|
233 |
|
|
|
234 |
f_raw = np.empty((0, (winL + winR) * num_leads)) |
|
|
235 |
|
|
|
236 |
for p in range(len(my_db.beat)): |
|
|
237 |
for beat in my_db.beat[p]: |
|
|
238 |
f_raw_lead = np.empty([]) |
|
|
239 |
for s in range(2): |
|
|
240 |
if leads_flag[s] == 1: |
|
|
241 |
if f_raw_lead.size == 1: |
|
|
242 |
f_raw_lead = beat[s] |
|
|
243 |
else: |
|
|
244 |
f_raw_lead = np.hstack((f_raw_lead, beat[s])) |
|
|
245 |
f_raw = np.vstack((f_raw, f_raw_lead)) |
|
|
246 |
|
|
|
247 |
features = np.column_stack((features, f_raw)) if features.size else f_raw |
|
|
248 |
|
|
|
249 |
end = time.time() |
|
|
250 |
print("Time raw: " + str(format(end - start, '.2f')) + " sec" ) |
|
|
251 |
# LBP 1D |
|
|
252 |
# 1D-local binary pattern based feature extraction for classification of epileptic EEG signals: 2014, unas 55 citas, Q2-Q1 Matematicas |
|
|
253 |
# https://ac.els-cdn.com/S0096300314008285/1-s2.0-S0096300314008285-main.pdf?_tid=8a8433a6-e57f-11e7-98ec-00000aab0f6c&acdnat=1513772341_eb5d4d26addb6c0b71ded4fd6cc23ed5 |
|
|
254 |
|
|
|
255 |
# 1D-LBP method, which derived from implementation steps of 2D-LBP, was firstly proposed by Chatlani et al. for detection of speech signals that is non-stationary in nature [23] |
|
|
256 |
|
|
|
257 |
# From Raw signal |
|
|
258 |
|
|
|
259 |
# TODO: Some kind of preprocesing or clean high frequency noise? |
|
|
260 |
|
|
|
261 |
# Compute 2 Histograms: LBP or Uniform LBP |
|
|
262 |
# LBP 8 = 0-255 |
|
|
263 |
# U-LBP 8 = 0-58 |
|
|
264 |
# Uniform LBP are only those pattern wich only presents 2 (or less) transitions from 0-1 or 1-0 |
|
|
265 |
# All the non-uniform patterns are asigned to the same value in the histogram |
|
|
266 |
|
|
|
267 |
if 'u-lbp' in compute_morph: |
|
|
268 |
print("u-lbp ...") |
|
|
269 |
|
|
|
270 |
f_lbp = np.empty((0, 59 * num_leads)) |
|
|
271 |
|
|
|
272 |
for p in range(len(my_db.beat)): |
|
|
273 |
for beat in my_db.beat[p]: |
|
|
274 |
f_lbp_lead = np.empty([]) |
|
|
275 |
for s in range(2): |
|
|
276 |
if leads_flag[s] == 1: |
|
|
277 |
if f_lbp_lead.size == 1: |
|
|
278 |
|
|
|
279 |
f_lbp_lead = compute_Uniform_LBP(beat[s], 8) |
|
|
280 |
else: |
|
|
281 |
f_lbp_lead = np.hstack((f_lbp_lead, compute_Uniform_LBP(beat[s], 8))) |
|
|
282 |
f_lbp = np.vstack((f_lbp, f_lbp_lead)) |
|
|
283 |
|
|
|
284 |
features = np.column_stack((features, f_lbp)) if features.size else f_lbp |
|
|
285 |
print(features.shape) |
|
|
286 |
|
|
|
287 |
if 'lbp' in compute_morph: |
|
|
288 |
print("lbp ...") |
|
|
289 |
|
|
|
290 |
f_lbp = np.empty((0, 16 * num_leads)) |
|
|
291 |
|
|
|
292 |
for p in range(len(my_db.beat)): |
|
|
293 |
for beat in my_db.beat[p]: |
|
|
294 |
f_lbp_lead = np.empty([]) |
|
|
295 |
for s in range(2): |
|
|
296 |
if leads_flag[s] == 1: |
|
|
297 |
if f_lbp_lead.size == 1: |
|
|
298 |
|
|
|
299 |
f_lbp_lead = compute_LBP(beat[s], 4) |
|
|
300 |
else: |
|
|
301 |
f_lbp_lead = np.hstack((f_lbp_lead, compute_LBP(beat[s], 4))) |
|
|
302 |
f_lbp = np.vstack((f_lbp, f_lbp_lead)) |
|
|
303 |
|
|
|
304 |
features = np.column_stack((features, f_lbp)) if features.size else f_lbp |
|
|
305 |
print(features.shape) |
|
|
306 |
|
|
|
307 |
|
|
|
308 |
if 'hbf5' in compute_morph: |
|
|
309 |
print("hbf ...") |
|
|
310 |
|
|
|
311 |
f_hbf = np.empty((0, 15 * num_leads)) |
|
|
312 |
|
|
|
313 |
for p in range(len(my_db.beat)): |
|
|
314 |
for beat in my_db.beat[p]: |
|
|
315 |
f_hbf_lead = np.empty([]) |
|
|
316 |
for s in range(2): |
|
|
317 |
if leads_flag[s] == 1: |
|
|
318 |
if f_hbf_lead.size == 1: |
|
|
319 |
|
|
|
320 |
f_hbf_lead = compute_HBF(beat[s]) |
|
|
321 |
else: |
|
|
322 |
f_hbf_lead = np.hstack((f_hbf_lead, compute_HBF(beat[s]))) |
|
|
323 |
f_hbf = np.vstack((f_hbf, f_hbf_lead)) |
|
|
324 |
|
|
|
325 |
features = np.column_stack((features, f_hbf)) if features.size else f_hbf |
|
|
326 |
print(features.shape) |
|
|
327 |
|
|
|
328 |
# Wavelets |
|
|
329 |
if 'wvlt' in compute_morph: |
|
|
330 |
print("Wavelets ...") |
|
|
331 |
|
|
|
332 |
f_wav = np.empty((0, 23 * num_leads)) |
|
|
333 |
|
|
|
334 |
for p in range(len(my_db.beat)): |
|
|
335 |
for b in my_db.beat[p]: |
|
|
336 |
f_wav_lead = np.empty([]) |
|
|
337 |
for s in range(2): |
|
|
338 |
if leads_flag[s] == 1: |
|
|
339 |
if f_wav_lead.size == 1: |
|
|
340 |
f_wav_lead = compute_wavelet_descriptor(b[s], 'db1', 3) |
|
|
341 |
else: |
|
|
342 |
f_wav_lead = np.hstack((f_wav_lead, compute_wavelet_descriptor(b[s], 'db1', 3))) |
|
|
343 |
f_wav = np.vstack((f_wav, f_wav_lead)) |
|
|
344 |
#f_wav = np.vstack((f_wav, compute_wavelet_descriptor(b, 'db1', 3))) |
|
|
345 |
|
|
|
346 |
features = np.column_stack((features, f_wav)) if features.size else f_wav |
|
|
347 |
|
|
|
348 |
|
|
|
349 |
# Wavelets |
|
|
350 |
if 'wvlt+pca' in compute_morph: |
|
|
351 |
pca_k = 7 |
|
|
352 |
print("Wavelets + PCA ("+ str(pca_k) + "...") |
|
|
353 |
|
|
|
354 |
family = 'db1' |
|
|
355 |
level = 3 |
|
|
356 |
|
|
|
357 |
f_wav = np.empty((0, 23 * num_leads)) |
|
|
358 |
|
|
|
359 |
for p in range(len(my_db.beat)): |
|
|
360 |
for b in my_db.beat[p]: |
|
|
361 |
f_wav_lead = np.empty([]) |
|
|
362 |
for s in range(2): |
|
|
363 |
if leads_flag[s] == 1: |
|
|
364 |
if f_wav_lead.size == 1: |
|
|
365 |
f_wav_lead = compute_wavelet_descriptor(b[s], family, level) |
|
|
366 |
else: |
|
|
367 |
f_wav_lead = np.hstack((f_wav_lead, compute_wavelet_descriptor(b[s], family, level))) |
|
|
368 |
f_wav = np.vstack((f_wav, f_wav_lead)) |
|
|
369 |
#f_wav = np.vstack((f_wav, compute_wavelet_descriptor(b, 'db1', 3))) |
|
|
370 |
|
|
|
371 |
|
|
|
372 |
if DS == 'DS1': |
|
|
373 |
# Compute PCA |
|
|
374 |
#PCA = sklearn.decomposition.KernelPCA(pca_k) # gamma_pca |
|
|
375 |
IPCA = IncrementalPCA(n_components=pca_k, batch_size=10)# NOTE: due to memory errors, we employ IncrementalPCA |
|
|
376 |
IPCA.fit(f_wav) |
|
|
377 |
|
|
|
378 |
# Save PCA |
|
|
379 |
save_wvlt_PCA(IPCA, pca_k, family, level) |
|
|
380 |
else: |
|
|
381 |
# Load PCAfrom sklearn.decomposition import PCA, IncrementalPCA |
|
|
382 |
IPCA = load_wvlt_PCA( pca_k, family, level) |
|
|
383 |
# Extract the PCA |
|
|
384 |
#f_wav_PCA = np.empty((0, pca_k * num_leads)) |
|
|
385 |
f_wav_PCA = IPCA.transform(f_wav) |
|
|
386 |
features = np.column_stack((features, f_wav_PCA)) if features.size else f_wav_PCA |
|
|
387 |
|
|
|
388 |
# HOS |
|
|
389 |
if 'HOS' in compute_morph: |
|
|
390 |
print("HOS ...") |
|
|
391 |
n_intervals = 6 |
|
|
392 |
lag = int(round( (winL + winR )/ n_intervals)) |
|
|
393 |
|
|
|
394 |
f_HOS = np.empty((0, (n_intervals-1) * 2 * num_leads)) |
|
|
395 |
for p in range(len(my_db.beat)): |
|
|
396 |
for b in my_db.beat[p]: |
|
|
397 |
f_HOS_lead = np.empty([]) |
|
|
398 |
for s in range(2): |
|
|
399 |
if leads_flag[s] == 1: |
|
|
400 |
if f_HOS_lead.size == 1: |
|
|
401 |
f_HOS_lead = compute_hos_descriptor(b[s], n_intervals, lag) |
|
|
402 |
else: |
|
|
403 |
f_HOS_lead = np.hstack((f_HOS_lead, compute_hos_descriptor(b[s], n_intervals, lag))) |
|
|
404 |
f_HOS = np.vstack((f_HOS, f_HOS_lead)) |
|
|
405 |
#f_HOS = np.vstack((f_HOS, compute_hos_descriptor(b, n_intervals, lag))) |
|
|
406 |
|
|
|
407 |
features = np.column_stack((features, f_HOS)) if features.size else f_HOS |
|
|
408 |
print(features.shape) |
|
|
409 |
|
|
|
410 |
# My morphological descriptor |
|
|
411 |
if 'myMorph' in compute_morph: |
|
|
412 |
print("My Descriptor ...") |
|
|
413 |
f_myMorhp = np.empty((0,4 * num_leads)) |
|
|
414 |
for p in range(len(my_db.beat)): |
|
|
415 |
for b in my_db.beat[p]: |
|
|
416 |
f_myMorhp_lead = np.empty([]) |
|
|
417 |
for s in range(2): |
|
|
418 |
if leads_flag[s] == 1: |
|
|
419 |
if f_myMorhp_lead.size == 1: |
|
|
420 |
f_myMorhp_lead = compute_my_own_descriptor(b[s], winL, winR) |
|
|
421 |
else: |
|
|
422 |
f_myMorhp_lead = np.hstack((f_myMorhp_lead, compute_my_own_descriptor(b[s], winL, winR))) |
|
|
423 |
f_myMorhp = np.vstack((f_myMorhp, f_myMorhp_lead)) |
|
|
424 |
#f_myMorhp = np.vstack((f_myMorhp, compute_my_own_descriptor(b, winL, winR))) |
|
|
425 |
|
|
|
426 |
features = np.column_stack((features, f_myMorhp)) if features.size else f_myMorhp |
|
|
427 |
|
|
|
428 |
|
|
|
429 |
labels = np.array(sum(my_db.class_ID, [])).flatten() |
|
|
430 |
print("labels") |
|
|
431 |
|
|
|
432 |
# Set labels array! |
|
|
433 |
print('writing pickle: ' + features_labels_name + '...') |
|
|
434 |
f = open(features_labels_name, 'wb') |
|
|
435 |
pickle.dump([features, labels, patient_num_beats], f, 2) |
|
|
436 |
f.close |
|
|
437 |
|
|
|
438 |
return features, labels, patient_num_beats |
|
|
439 |
|
|
|
440 |
|
|
|
441 |
# DS: contains the patient list for load |
|
|
442 |
# winL, winR: indicates the size of the window centred at R-peak at left and right side |
|
|
443 |
# do_preprocess: indicates if preprocesing of remove baseline on signal is performed |
|
|
444 |
def load_signal(DS, winL, winR, do_preprocess): |
|
|
445 |
|
|
|
446 |
class_ID = [[] for i in range(len(DS))] |
|
|
447 |
beat = [[] for i in range(len(DS))] # record, beat, lead |
|
|
448 |
R_poses = [ np.array([]) for i in range(len(DS))] |
|
|
449 |
Original_R_poses = [ np.array([]) for i in range(len(DS))] |
|
|
450 |
valid_R = [ np.array([]) for i in range(len(DS))] |
|
|
451 |
my_db = mit_db() |
|
|
452 |
patients = [] |
|
|
453 |
|
|
|
454 |
# Lists |
|
|
455 |
# beats = [] |
|
|
456 |
# classes = [] |
|
|
457 |
# valid_R = np.empty([]) |
|
|
458 |
# R_poses = np.empty([]) |
|
|
459 |
# Original_R_poses = np.empty([]) |
|
|
460 |
|
|
|
461 |
size_RR_max = 20 |
|
|
462 |
|
|
|
463 |
pathDB = '/home/mondejar/dataset/ECG/' |
|
|
464 |
DB_name = 'mitdb' |
|
|
465 |
fs = 360 |
|
|
466 |
jump_lines = 1 |
|
|
467 |
|
|
|
468 |
# Read files: signal (.csv ) annotations (.txt) |
|
|
469 |
fRecords = list() |
|
|
470 |
fAnnotations = list() |
|
|
471 |
|
|
|
472 |
lst = os.listdir(pathDB + DB_name + "/csv") |
|
|
473 |
lst.sort() |
|
|
474 |
for file in lst: |
|
|
475 |
if file.endswith(".csv"): |
|
|
476 |
if int(file[0:3]) in DS: |
|
|
477 |
fRecords.append(file) |
|
|
478 |
elif file.endswith(".txt"): |
|
|
479 |
if int(file[0:3]) in DS: |
|
|
480 |
fAnnotations.append(file) |
|
|
481 |
|
|
|
482 |
MITBIH_classes = ['N', 'L', 'R', 'e', 'j', 'A', 'a', 'J', 'S', 'V', 'E', 'F']#, 'P', '/', 'f', 'u'] |
|
|
483 |
AAMI_classes = [] |
|
|
484 |
AAMI_classes.append(['N', 'L', 'R']) # N |
|
|
485 |
AAMI_classes.append(['A', 'a', 'J', 'S', 'e', 'j']) # SVEB |
|
|
486 |
AAMI_classes.append(['V', 'E']) # VEB |
|
|
487 |
AAMI_classes.append(['F']) # F |
|
|
488 |
#AAMI_classes.append(['P', '/', 'f', 'u']) # Q |
|
|
489 |
|
|
|
490 |
RAW_signals = [] |
|
|
491 |
r_index = 0 |
|
|
492 |
|
|
|
493 |
#for r, a in zip(fRecords, fAnnotations): |
|
|
494 |
for r in range(0, len(fRecords)): |
|
|
495 |
|
|
|
496 |
print("Processing signal " + str(r) + " / " + str(len(fRecords)) + "...") |
|
|
497 |
|
|
|
498 |
# 1. Read signalR_poses |
|
|
499 |
filename = pathDB + DB_name + "/csv/" + fRecords[r] |
|
|
500 |
print filename |
|
|
501 |
f = open(filename, 'rb') |
|
|
502 |
reader = csv.reader(f, delimiter=',') |
|
|
503 |
next(reader) # skip first line! |
|
|
504 |
MLII_index = 1 |
|
|
505 |
V1_index = 2 |
|
|
506 |
if int(fRecords[r][0:3]) == 114: |
|
|
507 |
MLII_index = 2 |
|
|
508 |
V1_index = 1 |
|
|
509 |
|
|
|
510 |
MLII = [] |
|
|
511 |
V1 = [] |
|
|
512 |
for row in reader: |
|
|
513 |
MLII.append((int(row[MLII_index]))) |
|
|
514 |
V1.append((int(row[V1_index]))) |
|
|
515 |
f.close() |
|
|
516 |
|
|
|
517 |
|
|
|
518 |
RAW_signals.append((MLII, V1)) ## NOTE a copy must be created in order to preserve the original signal |
|
|
519 |
# display_signal(MLII) |
|
|
520 |
|
|
|
521 |
# 2. Read annotations |
|
|
522 |
filename = pathDB + DB_name + "/csv/" + fAnnotations[r] |
|
|
523 |
print filename |
|
|
524 |
f = open(filename, 'rb') |
|
|
525 |
next(f) # skip first line! |
|
|
526 |
|
|
|
527 |
annotations = [] |
|
|
528 |
for line in f: |
|
|
529 |
annotations.append(line) |
|
|
530 |
f.close |
|
|
531 |
# 3. Preprocessing signal! |
|
|
532 |
if do_preprocess: |
|
|
533 |
#scipy.signal |
|
|
534 |
# median_filter1D |
|
|
535 |
baseline = medfilt(MLII, 71) |
|
|
536 |
baseline = medfilt(baseline, 215) |
|
|
537 |
|
|
|
538 |
# Remove Baseline |
|
|
539 |
for i in range(0, len(MLII)): |
|
|
540 |
MLII[i] = MLII[i] - baseline[i] |
|
|
541 |
|
|
|
542 |
# TODO Remove High Freqs |
|
|
543 |
|
|
|
544 |
# median_filter1D |
|
|
545 |
baseline = medfilt(V1, 71) |
|
|
546 |
baseline = medfilt(baseline, 215) |
|
|
547 |
|
|
|
548 |
# Remove Baseline |
|
|
549 |
for i in range(0, len(V1)): |
|
|
550 |
V1[i] = V1[i] - baseline[i] |
|
|
551 |
|
|
|
552 |
|
|
|
553 |
# Extract the R-peaks from annotations |
|
|
554 |
for a in annotations: |
|
|
555 |
aS = a.split() |
|
|
556 |
|
|
|
557 |
pos = int(aS[1]) |
|
|
558 |
originalPos = int(aS[1]) |
|
|
559 |
classAnttd = aS[2] |
|
|
560 |
if pos > size_RR_max and pos < (len(MLII) - size_RR_max): |
|
|
561 |
index, value = max(enumerate(MLII[pos - size_RR_max : pos + size_RR_max]), key=operator.itemgetter(1)) |
|
|
562 |
pos = (pos - size_RR_max) + index |
|
|
563 |
|
|
|
564 |
peak_type = 0 |
|
|
565 |
#pos = pos-1 |
|
|
566 |
|
|
|
567 |
if classAnttd in MITBIH_classes: |
|
|
568 |
if(pos > winL and pos < (len(MLII) - winR)): |
|
|
569 |
beat[r].append( (MLII[pos - winL : pos + winR], V1[pos - winL : pos + winR])) |
|
|
570 |
for i in range(0,len(AAMI_classes)): |
|
|
571 |
if classAnttd in AAMI_classes[i]: |
|
|
572 |
class_AAMI = i |
|
|
573 |
break #exit loop |
|
|
574 |
#convert class |
|
|
575 |
class_ID[r].append(class_AAMI) |
|
|
576 |
|
|
|
577 |
valid_R[r] = np.append(valid_R[r], 1) |
|
|
578 |
else: |
|
|
579 |
valid_R[r] = np.append(valid_R[r], 0) |
|
|
580 |
else: |
|
|
581 |
valid_R[r] = np.append(valid_R[r], 0) |
|
|
582 |
|
|
|
583 |
R_poses[r] = np.append(R_poses[r], pos) |
|
|
584 |
Original_R_poses[r] = np.append(Original_R_poses[r], originalPos) |
|
|
585 |
|
|
|
586 |
#R_poses[r] = R_poses[r][(valid_R[r] == 1)] |
|
|
587 |
#Original_R_poses[r] = Original_R_poses[r][(valid_R[r] == 1)] |
|
|
588 |
|
|
|
589 |
|
|
|
590 |
# Set the data into a bigger struct that keep all the records! |
|
|
591 |
my_db.filename = fRecords |
|
|
592 |
|
|
|
593 |
my_db.raw_signal = RAW_signals |
|
|
594 |
my_db.beat = beat # record, beat, lead |
|
|
595 |
my_db.class_ID = class_ID |
|
|
596 |
my_db.valid_R = valid_R |
|
|
597 |
my_db.R_pos = R_poses |
|
|
598 |
my_db.orig_R_pos = Original_R_poses |
|
|
599 |
|
|
|
600 |
return my_db |