cinc-challenge2017 / Git / Diff of /deeplearn-approach/cincset

Models:

ReneeD/

cinc-challenge2017

Downloads: 1

Diff of /deeplearn-approach/cincset_files2matrix.py [000000] .. [cc7dc8]

Switch to unified view

 b/deeplearn-approach/cincset_files2matrix.py
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+'''
+Convert multiple files from Physionet/Computing in Cardiology challenge into
+file single matrix. As input argument
+For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017
+ Referencing this work
+   Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). Comparing Feature Based
+   Classifiers and Convolutional Neural Networks to Detect Arrhythmia from Short Segments of ECG. In
+   Computing in Cardiology. Rennes (France).
+--
+ cinc-challenge2017, version 1.0, Sept 2017
+ Last updated : 27-09-2017
+ Released under the GNU General Public License
+ Copyright (C) 2017  Fernando Andreotti, Oliver Carr, Marco A.F. Pimentel, Adam Mahdi, Maarten De Vos
+ University of Oxford, Department of Engineering Science, Institute of Biomedical Engineering
+ fernando.andreotti@eng.ox.ac.uk
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.
+'''
+import scipy.io
+import numpy as np
+import glob
+# Parameters
+dataDir = '/some_path/' # <---- change!!
+dataDir = 'training2017/'
+FS = 300
+WINDOW_SIZE = 60*FS
+## Loading time serie signals
+files = sorted(glob.glob(dataDir+"*.mat"))
+trainset = np.zeros((len(files),WINDOW_SIZE))
+count = 0
+for f in files:
+    record = f[:-4]
+    record = record[-6:]
+    # Loading
+    mat_data = scipy.io.loadmat(f[:-4] + ".mat")
+    print('Loading record {}'.format(record))
+    data = mat_data['val'].squeeze()
+    # Preprocessing
+    print('Preprocessing record {}'.format(record))
+    data = np.nan_to_num(data) # removing NaNs and Infs
+    data = data - np.mean(data)
+    data = data/np.std(data)
+    trainset[count,:min(WINDOW_SIZE,len(data))] = data[:min(WINDOW_SIZE,len(data))].T # padding sequence
+    count += 1
+## Loading labels
+import csv
+csvfile = list(csv.reader(open(dataDir+'REFERENCE.csv')))
+traintarget = np.zeros((trainset.shape[0],4))
+classes = ['A','N','O','~']
+for row in range(len(csvfile)):
+    traintarget[row,classes.index(csvfile[row][1])] = 1
+# Saving both
+scipy.io.savemat('trainingset.mat',mdict={'trainset': trainset,'traintarget': traintarget})