Switch to unified view

a b/featurebased-approach/ExtractFeatures.m
1
function ExtractFeatures(dbpath,varargin)
2
% This function extracts features for each record present  in a folder
3
%
4
%  Input:
5
%       - dbpath:         directory where database is
6
%       (optional inputs)
7
%           - useSegments:       segment signals into windows (bool)?
8
%           - windowSize:        size of window used in segmenting record
9
%           - percentageOverlap: overlap between windows
10
%
11
% --
12
% ECG classification from single-lead segments using Deep Convolutional Neural 
13
% Networks and Feature-Based Approaches - December 2017
14
% 
15
% Released under the GNU General Public License
16
%
17
% Copyright (C) 2017  Fernando Andreotti, Oliver Carr
18
% University of Oxford, Insitute of Biomedical Engineering, CIBIM Lab - Oxford 2017
19
% fernando.andreotti@eng.ox.ac.uk
20
%
21
% 
22
% For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017
23
% 
24
% Referencing this work
25
%
26
% Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). 
27
% Comparing Feature Based Classifiers and Convolutional Neural Networks to Detect 
28
% Arrhythmia from Short Segments of ECG. In Computing in Cardiology. Rennes (France).
29
%
30
% Last updated : December 2017
31
% 
32
% This program is free software: you can redistribute it and/or modify
33
% it under the terms of the GNU General Public License as published by
34
% the Free Software Foundation, either version 3 of the License, or
35
% (at your option) any later version.
36
% 
37
% This program is distributed in the hope that it will be useful,
38
% but WITHOUT ANY WARRANTY; without even the implied warranty of
39
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
40
% GNU General Public License for more details.
41
% 
42
% You should have received a copy of the GNU General Public License
43
% along with this program.  If not, see <http://www.gnu.org/licenses/>.
44
45
46
% Default arguments
47
slashchar = char('/'*isunix + '\'*(~isunix));
48
if ~strcmp(dbpath(end),slashchar)
49
    dbpath = [dbpath slashchar];
50
end
51
optargs = {1 10 0.8};  % default values for input arguments
52
newVals = cellfun(@(x) ~isempty(x), varargin);
53
optargs(newVals) = varargin(newVals);
54
[useSegments, windowSize, percentageOverlap] = optargs{:};
55
clear optargs newVals
56
57
% Parameters
58
NFEAT = 169; % number of features used
59
NFEAT_hrv = 113;
60
61
fs = 300;       % sampling frequency [Hz]
62
63
% Add subfunctions to matlab path
64
mainpath = (strrep(which(mfilename),['preparation' slashchar mfilename '.m'],''));
65
addpath(genpath([mainpath(1:end-length(mfilename)-2) 'subfunctions' slashchar])) % add subfunctions folder to path
66
67
68
% Find recordings
69
mkdir([dbpath 'featextract'])
70
cd([dbpath 'featextract' slashchar])
71
disp('Loading reference from Physionet..')
72
ref_filename = [dbpath 'REFERENCE.csv'];
73
websave(ref_filename,'https://physionet.org/challenge/2017/REFERENCE-v3.csv');
74
reference_tab = readtable(ref_filename,'ReadVariableNames',false);
75
fls = reference_tab{:,1};
76
clear dataArray delimiter ref_filename formatSpec fileID
77
78
79
%% Initialize loop
80
% Wide BP
81
Fhigh = 5;  % highpass frequency [Hz]
82
Flow = 45;   % low pass frequency [Hz]
83
Nbut = 10;     % order of Butterworth filter
84
d_bp= design(fdesign.bandpass('N,F3dB1,F3dB2',Nbut,Fhigh,Flow,fs),'butter');
85
[b_bp,a_bp] = tf(d_bp);
86
87
% Narrow BP
88
Fhigh = 1;  % highpass frequency [Hz]
89
Flow = 100;   % low pass frequency [Hz]
90
Nbut = 10;     % order of Butterworth filter
91
d_bp= design(fdesign.bandpass('N,F3dB1,F3dB2',Nbut,Fhigh,Flow,fs),'butter');
92
[b_bp2,a_bp2] = tf(d_bp);
93
clear Fhigh Flow Nbut d_bp
94
95
%% Run through files
96
allfeats = cell2table(cell(0,NFEAT+2));
97
for f = 1:length(fls)
98
    %% Loading data
99
    data = load([dbpath fls{f} '.mat']);
100
    fname = fls{f};
101
    signal = data.val;
102
    if size(signal,1)<size(signal,2), signal = signal'; end % make sure it's column vector
103
    signalraw =  signal;
104
    
105
    %% Preprocessing
106
    signal = filtfilt(b_bp,a_bp,signal);             % filtering narrow
107
    signal = detrend(signal);                        % detrending (optional)
108
    signal = signal - mean(signal);
109
    signal = signal/std(signal);                     % standardizing
110
    signalraw = filtfilt(b_bp2,a_bp2,signalraw);     % filtering wide
111
    signalraw = detrend(signalraw);                  % detrending (optional)
112
    signalraw = signalraw - mean(signalraw);
113
    signalraw = signalraw/std(signalraw);        % standardizing
114
    disp(['Preprocessed ' fname '...'])
115
    
116
    % Figuring out if segmentation is used
117
    if useSegments==1
118
        WINSIZE = windowSize; % window size (in sec)
119
        OLAP = percentageOverlap;
120
    else
121
        WINSIZE = length(signal)/fs;
122
        OLAP=0;
123
    end
124
    startp = 1;
125
    endp = WINSIZE*fs;
126
    looptrue = true;
127
    nseg = 1;
128
    while looptrue
129
        % Conditions to stop loop
130
        if length(signal) < WINSIZE*fs
131
            endp = length(signal);
132
            looptrue = false;
133
            continue
134
        end
135
        if nseg > 1
136
            startp(nseg) = startp(nseg-1) + round((1-OLAP)*WINSIZE*fs);
137
            if length(signal) - endp(nseg-1) < 0.5*WINSIZE*fs
138
                endp(nseg) = length(signal);
139
            else
140
                endp(nseg) = startp(nseg) + WINSIZE*fs -1;
141
            end
142
        end
143
        if endp(nseg) == length(signal)
144
            looptrue = false;
145
            nseg = nseg - 1;
146
        end
147
        nseg = nseg + 1;
148
    end
149
    
150
    % Obtain features for each available segment
151
    fetbag = {};
152
    feat_hrv = [];
153
    parfor n = 1:nseg
154
        % Get signal of interest
155
        sig_seg = signal(startp(n):endp(n));
156
        sig_segraw = signalraw(startp(n):endp(n));
157
        
158
        % QRS detect
159
        [qrsseg,featqrs] = multi_qrsdetect(sig_seg,fs,[fname '_s' num2str(n)]);
160
        
161
        % HRV features
162
        if length(qrsseg{end})>5 % if too few detections, returns zeros
163
            try
164
                feat_basic=HRV_features(sig_seg,qrsseg{end}./fs,fs);
165
                feats_poincare = get_poincare(qrsseg{end}./fs,fs);
166
                feat_hrv = [feat_basic, feats_poincare];
167
                feat_hrv(~isreal(feat_hrv)|isnan(feat_hrv)|isinf(feat_hrv)) = 0; % removing not numbers
168
            catch
169
                warning('Some HRV code failed.')
170
                feat_hrv = zeros(1,NFEAT_hrv);
171
            end
172
        else
173
            disp('Skipping HRV analysis due to shortage of peaks..')
174
            feat_hrv = zeros(1,NFEAT_hrv);
175
        end
176
        
177
        % Heart Rate features
178
        HRbpm = median(60./(diff(qrsseg{end}./fs)));
179
        %obvious cases: tachycardia ( > 100 beats per minute (bpm) in adults)
180
        feat_tachy = normcdf(HRbpm,120,20); % sampling from normal CDF
181
        %See e.g.   x = 10:10:200; p = normcdf(x,120,20); plot(x,p)
182
        
183
        %obvious cases: bradycardia ( < 60 bpm in adults)
184
        feat_brady = 1-normcdf(HRbpm,60,20);
185
        
186
        % SQI metrics
187
        feats_sqi = ecgsqi(sig_seg,qrsseg,fs);
188
        
189
        % Features on residual
190
        featsres = residualfeats(sig_segraw,fs,qrsseg{end});
191
        
192
        % Morphological features
193
        feats_morph = morphofeatures(sig_segraw,fs,qrsseg,[fname '_s' num2str(n)]);
194
        
195
        
196
        feat_fer=[featqrs,feat_tachy,feat_brady,double(feats_sqi),featsres,feats_morph];
197
        feat_fer(~isreal(feat_fer)|isnan(feat_fer)|isinf(feat_fer)) = 0; % removing not numbers
198
        
199
        % Save features to table for training
200
        feats = [feat_hrv,feat_fer];
201
        fetbag{n} = feats;
202
    end
203
    thisfeats = array2table([repmat(f,nseg,1),[1:nseg]',cell2mat(fetbag')]);%#ok<NBRAK>
204
    allfeats = [allfeats;thisfeats];
205
    
206
end
207
delete('gqrsdet*.*')
208
% diary off
209
%% Saving Output
210
% hardcoded feature names
211
names = {'rec_number' 'seg_number' 'sample_AFEv' 'meanRR' 'medianRR' 'SDNN' 'RMSSD' 'SDSD' 'NN50' 'pNN50' 'LFpeak' 'HFpeak' 'totalpower' 'LFpower' ...
212
    'HFpower' 'nLF' 'nHF' 'LFHF' 'PoincareSD1' 'PoincareSD2' 'SampEn' 'ApEn'  ...
213
    'RR' 'DET' 'ENTR' 'L' 'TKEO1'  'DAFa2' 'LZ' ...
214
    'Clvl1' 'Clvl2' 'Clvl3' 'Clvl4' 'Clvl5' 'Clvl6' 'Clvl7' 'Clvl8' 'Clvl9' ...
215
    'Clvl10' 'Dlvl1' 'Dlvl2' 'Dlvl3' 'Dlvl4' ...
216
    'Dlvl5' 'Dlvl6' 'Dlvl7' 'Dlvl8' 'Dlvl9' 'Dlvl10' ...
217
    'percR50' 'percR100' 'percR200' 'percR300' 'medRR' 'meddRR' 'iqrRR' 'iqrdRR' 'bins1' 'bins2' 'bins1nL' 'bins2nL' 'bins1nS' 'bins2nS' ...
218
    'edgebins1' 'edgebins2' 'edgebins1nL' 'edgebins2nL' 'edgebins1nS' 'edgebins2nS' 'minArea' 'minAreanL' 'minAreanS' ...
219
    'minCArea' 'minCAreanL' 'minCAreanS' 'Perim' 'PerimnL' 'PerimnS' 'PerimC' 'PerimCnL' 'PerimCnS' ...
220
    'DistCen' 'DistCennL' 'DistCennS' 'DistNN' 'DistNNnL' 'DistNNnS' 'DistNext' 'DistNextnL' 'DistNextnS' 'ClustDistMax' 'ClustDistMin' ...
221
    'ClustDistMean' 'ClustDistSTD' 'ClustDistMed' 'MajorAxis' 'percR3' 'percR5' 'percR10' 'percR20' 'percR30' 'percR40' ...
222
    'Xcent' 'Ycent' 'rad1' 'rad2' 'rad1rad2' 'theta' 'NoClust1' 'NoClust2' 'NoClust3' 'NoClust4' 'NoClust5' 'NoClust6' 'NoClust7'};
223
names = [names 'amp_varsqi' 'amp_stdsqi' 'amp_mean'];
224
names = [names 'tachy' 'brady' 'stdsqi' 'ksqi' 'ssqi' 'psqi'];
225
combs = nchoosek(1:5,2);
226
combs = num2cell(combs,2);
227
names = [names cellfun(@(x) strtrim(['bsqi_',num2str(x(1)) num2str(x(2))]),combs,'UniformOutput',false)'];
228
names = [names arrayfun(@(x) strtrim(['rsqi_',num2str(x)]),1:5,'UniformOutput',false)];
229
names = [names arrayfun(@(x) strtrim(['csqi_',num2str(x)]),1:5,'UniformOutput',false)];
230
names = [names arrayfun(@(x) strtrim(['xsqi_',num2str(x)]),1:5,'UniformOutput',false)];
231
names = [names 'res1' 'res2' 'res3' 'res4' 'res5'];
232
names = [names 'QRSheight','QRSwidth','QRSpow','noPwave','Pheight','Pwidth','Ppow','Theight',...
233
    'Twidth','Tpow','Theightnorm','Pheightnorm','Prelpow','PTrelpow','Trelpow','QTlen','PRlen'];
234
allfeats.Properties.VariableNames = names;
235
save(['allfeatures_olap' num2str(OLAP) '_win' num2str(WINSIZE) '.mat'],'allfeats','reference_tab');
236