Switch to unified view

a b/deeplearn-approach/cincset_files2matrix.py
1
#!/usr/bin/env python3
2
# -*- coding: utf-8 -*-
3
'''
4
Convert multiple files from Physionet/Computing in Cardiology challenge into 
5
file single matrix. As input argument 
6
7
For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017
8
 
9
 Referencing this work
10
   Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). Comparing Feature Based 
11
   Classifiers and Convolutional Neural Networks to Detect Arrhythmia from Short Segments of ECG. In 
12
   Computing in Cardiology. Rennes (France).
13
14
--
15
 cinc-challenge2017, version 1.0, Sept 2017
16
 Last updated : 27-09-2017
17
 Released under the GNU General Public License
18
19
 Copyright (C) 2017  Fernando Andreotti, Oliver Carr, Marco A.F. Pimentel, Adam Mahdi, Maarten De Vos
20
 University of Oxford, Department of Engineering Science, Institute of Biomedical Engineering
21
 fernando.andreotti@eng.ox.ac.uk
22
   
23
 This program is free software: you can redistribute it and/or modify
24
 it under the terms of the GNU General Public License as published by
25
 the Free Software Foundation, either version 3 of the License, or
26
 (at your option) any later version.
27
 
28
 This program is distributed in the hope that it will be useful,
29
 but WITHOUT ANY WARRANTY; without even the implied warranty of
30
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
31
 GNU General Public License for more details.
32
 
33
 You should have received a copy of the GNU General Public License
34
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
35
'''
36
37
38
import scipy.io
39
import numpy as np
40
import glob
41
42
# Parameters
43
dataDir = '/some_path/' # <---- change!!
44
dataDir = 'training2017/'
45
FS = 300
46
WINDOW_SIZE = 60*FS
47
48
49
## Loading time serie signals
50
files = sorted(glob.glob(dataDir+"*.mat"))
51
trainset = np.zeros((len(files),WINDOW_SIZE))
52
count = 0
53
for f in files:
54
    record = f[:-4]
55
    record = record[-6:]
56
    # Loading
57
    mat_data = scipy.io.loadmat(f[:-4] + ".mat")
58
    print('Loading record {}'.format(record))    
59
    data = mat_data['val'].squeeze()
60
    # Preprocessing
61
    print('Preprocessing record {}'.format(record))       
62
    data = np.nan_to_num(data) # removing NaNs and Infs
63
    data = data - np.mean(data)
64
    data = data/np.std(data)
65
    trainset[count,:min(WINDOW_SIZE,len(data))] = data[:min(WINDOW_SIZE,len(data))].T # padding sequence
66
    count += 1
67
    
68
## Loading labels    
69
import csv
70
csvfile = list(csv.reader(open(dataDir+'REFERENCE.csv')))
71
traintarget = np.zeros((trainset.shape[0],4))
72
classes = ['A','N','O','~']
73
for row in range(len(csvfile)):
74
    traintarget[row,classes.index(csvfile[row][1])] = 1
75
            
76
# Saving both
77
scipy.io.savemat('trainingset.mat',mdict={'trainset': trainset,'traintarget': traintarget})