|
a |
|
b/deeplearn-approach/cincset_files2matrix.py |
|
|
1 |
#!/usr/bin/env python3 |
|
|
2 |
# -*- coding: utf-8 -*- |
|
|
3 |
''' |
|
|
4 |
Convert multiple files from Physionet/Computing in Cardiology challenge into |
|
|
5 |
file single matrix. As input argument |
|
|
6 |
|
|
|
7 |
For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017 |
|
|
8 |
|
|
|
9 |
Referencing this work |
|
|
10 |
Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). Comparing Feature Based |
|
|
11 |
Classifiers and Convolutional Neural Networks to Detect Arrhythmia from Short Segments of ECG. In |
|
|
12 |
Computing in Cardiology. Rennes (France). |
|
|
13 |
|
|
|
14 |
-- |
|
|
15 |
cinc-challenge2017, version 1.0, Sept 2017 |
|
|
16 |
Last updated : 27-09-2017 |
|
|
17 |
Released under the GNU General Public License |
|
|
18 |
|
|
|
19 |
Copyright (C) 2017 Fernando Andreotti, Oliver Carr, Marco A.F. Pimentel, Adam Mahdi, Maarten De Vos |
|
|
20 |
University of Oxford, Department of Engineering Science, Institute of Biomedical Engineering |
|
|
21 |
fernando.andreotti@eng.ox.ac.uk |
|
|
22 |
|
|
|
23 |
This program is free software: you can redistribute it and/or modify |
|
|
24 |
it under the terms of the GNU General Public License as published by |
|
|
25 |
the Free Software Foundation, either version 3 of the License, or |
|
|
26 |
(at your option) any later version. |
|
|
27 |
|
|
|
28 |
This program is distributed in the hope that it will be useful, |
|
|
29 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
30 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
31 |
GNU General Public License for more details. |
|
|
32 |
|
|
|
33 |
You should have received a copy of the GNU General Public License |
|
|
34 |
along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
|
35 |
''' |
|
|
36 |
|
|
|
37 |
|
|
|
38 |
import scipy.io |
|
|
39 |
import numpy as np |
|
|
40 |
import glob |
|
|
41 |
|
|
|
42 |
# Parameters |
|
|
43 |
dataDir = '/some_path/' # <---- change!! |
|
|
44 |
dataDir = 'training2017/' |
|
|
45 |
FS = 300 |
|
|
46 |
WINDOW_SIZE = 60*FS |
|
|
47 |
|
|
|
48 |
|
|
|
49 |
## Loading time serie signals |
|
|
50 |
files = sorted(glob.glob(dataDir+"*.mat")) |
|
|
51 |
trainset = np.zeros((len(files),WINDOW_SIZE)) |
|
|
52 |
count = 0 |
|
|
53 |
for f in files: |
|
|
54 |
record = f[:-4] |
|
|
55 |
record = record[-6:] |
|
|
56 |
# Loading |
|
|
57 |
mat_data = scipy.io.loadmat(f[:-4] + ".mat") |
|
|
58 |
print('Loading record {}'.format(record)) |
|
|
59 |
data = mat_data['val'].squeeze() |
|
|
60 |
# Preprocessing |
|
|
61 |
print('Preprocessing record {}'.format(record)) |
|
|
62 |
data = np.nan_to_num(data) # removing NaNs and Infs |
|
|
63 |
data = data - np.mean(data) |
|
|
64 |
data = data/np.std(data) |
|
|
65 |
trainset[count,:min(WINDOW_SIZE,len(data))] = data[:min(WINDOW_SIZE,len(data))].T # padding sequence |
|
|
66 |
count += 1 |
|
|
67 |
|
|
|
68 |
## Loading labels |
|
|
69 |
import csv |
|
|
70 |
csvfile = list(csv.reader(open(dataDir+'REFERENCE.csv'))) |
|
|
71 |
traintarget = np.zeros((trainset.shape[0],4)) |
|
|
72 |
classes = ['A','N','O','~'] |
|
|
73 |
for row in range(len(csvfile)): |
|
|
74 |
traintarget[row,classes.index(csvfile[row][1])] = 1 |
|
|
75 |
|
|
|
76 |
# Saving both |
|
|
77 |
scipy.io.savemat('trainingset.mat',mdict={'trainset': trainset,'traintarget': traintarget}) |