--- a +++ b/deeplearn-approach/cincset_files2matrix.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +''' +Convert multiple files from Physionet/Computing in Cardiology challenge into +file single matrix. As input argument + +For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017 + + Referencing this work + Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). Comparing Feature Based + Classifiers and Convolutional Neural Networks to Detect Arrhythmia from Short Segments of ECG. In + Computing in Cardiology. Rennes (France). + +-- + cinc-challenge2017, version 1.0, Sept 2017 + Last updated : 27-09-2017 + Released under the GNU General Public License + + Copyright (C) 2017 Fernando Andreotti, Oliver Carr, Marco A.F. Pimentel, Adam Mahdi, Maarten De Vos + University of Oxford, Department of Engineering Science, Institute of Biomedical Engineering + fernando.andreotti@eng.ox.ac.uk + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +''' + + +import scipy.io +import numpy as np +import glob + +# Parameters +dataDir = '/some_path/' # <---- change!! +dataDir = 'training2017/' +FS = 300 +WINDOW_SIZE = 60*FS + + +## Loading time serie signals +files = sorted(glob.glob(dataDir+"*.mat")) +trainset = np.zeros((len(files),WINDOW_SIZE)) +count = 0 +for f in files: + record = f[:-4] + record = record[-6:] + # Loading + mat_data = scipy.io.loadmat(f[:-4] + ".mat") + print('Loading record {}'.format(record)) + data = mat_data['val'].squeeze() + # Preprocessing + print('Preprocessing record {}'.format(record)) + data = np.nan_to_num(data) # removing NaNs and Infs + data = data - np.mean(data) + data = data/np.std(data) + trainset[count,:min(WINDOW_SIZE,len(data))] = data[:min(WINDOW_SIZE,len(data))].T # padding sequence + count += 1 + +## Loading labels +import csv +csvfile = list(csv.reader(open(dataDir+'REFERENCE.csv'))) +traintarget = np.zeros((trainset.shape[0],4)) +classes = ['A','N','O','~'] +for row in range(len(csvfile)): + traintarget[row,classes.index(csvfile[row][1])] = 1 + +# Saving both +scipy.io.savemat('trainingset.mat',mdict={'trainset': trainset,'traintarget': traintarget})