|
a |
|
b/featurebased-approach/ExtractFeatures.m |
|
|
1 |
function ExtractFeatures(dbpath,varargin) |
|
|
2 |
% This function extracts features for each record present in a folder |
|
|
3 |
% |
|
|
4 |
% Input: |
|
|
5 |
% - dbpath: directory where database is |
|
|
6 |
% (optional inputs) |
|
|
7 |
% - useSegments: segment signals into windows (bool)? |
|
|
8 |
% - windowSize: size of window used in segmenting record |
|
|
9 |
% - percentageOverlap: overlap between windows |
|
|
10 |
% |
|
|
11 |
% -- |
|
|
12 |
% ECG classification from single-lead segments using Deep Convolutional Neural |
|
|
13 |
% Networks and Feature-Based Approaches - December 2017 |
|
|
14 |
% |
|
|
15 |
% Released under the GNU General Public License |
|
|
16 |
% |
|
|
17 |
% Copyright (C) 2017 Fernando Andreotti, Oliver Carr |
|
|
18 |
% University of Oxford, Insitute of Biomedical Engineering, CIBIM Lab - Oxford 2017 |
|
|
19 |
% fernando.andreotti@eng.ox.ac.uk |
|
|
20 |
% |
|
|
21 |
% |
|
|
22 |
% For more information visit: https://github.com/fernandoandreotti/cinc-challenge2017 |
|
|
23 |
% |
|
|
24 |
% Referencing this work |
|
|
25 |
% |
|
|
26 |
% Andreotti, F., Carr, O., Pimentel, M.A.F., Mahdi, A., & De Vos, M. (2017). |
|
|
27 |
% Comparing Feature Based Classifiers and Convolutional Neural Networks to Detect |
|
|
28 |
% Arrhythmia from Short Segments of ECG. In Computing in Cardiology. Rennes (France). |
|
|
29 |
% |
|
|
30 |
% Last updated : December 2017 |
|
|
31 |
% |
|
|
32 |
% This program is free software: you can redistribute it and/or modify |
|
|
33 |
% it under the terms of the GNU General Public License as published by |
|
|
34 |
% the Free Software Foundation, either version 3 of the License, or |
|
|
35 |
% (at your option) any later version. |
|
|
36 |
% |
|
|
37 |
% This program is distributed in the hope that it will be useful, |
|
|
38 |
% but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
39 |
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
40 |
% GNU General Public License for more details. |
|
|
41 |
% |
|
|
42 |
% You should have received a copy of the GNU General Public License |
|
|
43 |
% along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
|
44 |
|
|
|
45 |
|
|
|
46 |
% Default arguments |
|
|
47 |
slashchar = char('/'*isunix + '\'*(~isunix)); |
|
|
48 |
if ~strcmp(dbpath(end),slashchar) |
|
|
49 |
dbpath = [dbpath slashchar]; |
|
|
50 |
end |
|
|
51 |
optargs = {1 10 0.8}; % default values for input arguments |
|
|
52 |
newVals = cellfun(@(x) ~isempty(x), varargin); |
|
|
53 |
optargs(newVals) = varargin(newVals); |
|
|
54 |
[useSegments, windowSize, percentageOverlap] = optargs{:}; |
|
|
55 |
clear optargs newVals |
|
|
56 |
|
|
|
57 |
% Parameters |
|
|
58 |
NFEAT = 169; % number of features used |
|
|
59 |
NFEAT_hrv = 113; |
|
|
60 |
|
|
|
61 |
fs = 300; % sampling frequency [Hz] |
|
|
62 |
|
|
|
63 |
% Add subfunctions to matlab path |
|
|
64 |
mainpath = (strrep(which(mfilename),['preparation' slashchar mfilename '.m'],'')); |
|
|
65 |
addpath(genpath([mainpath(1:end-length(mfilename)-2) 'subfunctions' slashchar])) % add subfunctions folder to path |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
% Find recordings |
|
|
69 |
mkdir([dbpath 'featextract']) |
|
|
70 |
cd([dbpath 'featextract' slashchar]) |
|
|
71 |
disp('Loading reference from Physionet..') |
|
|
72 |
ref_filename = [dbpath 'REFERENCE.csv']; |
|
|
73 |
websave(ref_filename,'https://physionet.org/challenge/2017/REFERENCE-v3.csv'); |
|
|
74 |
reference_tab = readtable(ref_filename,'ReadVariableNames',false); |
|
|
75 |
fls = reference_tab{:,1}; |
|
|
76 |
clear dataArray delimiter ref_filename formatSpec fileID |
|
|
77 |
|
|
|
78 |
|
|
|
79 |
%% Initialize loop |
|
|
80 |
% Wide BP |
|
|
81 |
Fhigh = 5; % highpass frequency [Hz] |
|
|
82 |
Flow = 45; % low pass frequency [Hz] |
|
|
83 |
Nbut = 10; % order of Butterworth filter |
|
|
84 |
d_bp= design(fdesign.bandpass('N,F3dB1,F3dB2',Nbut,Fhigh,Flow,fs),'butter'); |
|
|
85 |
[b_bp,a_bp] = tf(d_bp); |
|
|
86 |
|
|
|
87 |
% Narrow BP |
|
|
88 |
Fhigh = 1; % highpass frequency [Hz] |
|
|
89 |
Flow = 100; % low pass frequency [Hz] |
|
|
90 |
Nbut = 10; % order of Butterworth filter |
|
|
91 |
d_bp= design(fdesign.bandpass('N,F3dB1,F3dB2',Nbut,Fhigh,Flow,fs),'butter'); |
|
|
92 |
[b_bp2,a_bp2] = tf(d_bp); |
|
|
93 |
clear Fhigh Flow Nbut d_bp |
|
|
94 |
|
|
|
95 |
%% Run through files |
|
|
96 |
allfeats = cell2table(cell(0,NFEAT+2)); |
|
|
97 |
for f = 1:length(fls) |
|
|
98 |
%% Loading data |
|
|
99 |
data = load([dbpath fls{f} '.mat']); |
|
|
100 |
fname = fls{f}; |
|
|
101 |
signal = data.val; |
|
|
102 |
if size(signal,1)<size(signal,2), signal = signal'; end % make sure it's column vector |
|
|
103 |
signalraw = signal; |
|
|
104 |
|
|
|
105 |
%% Preprocessing |
|
|
106 |
signal = filtfilt(b_bp,a_bp,signal); % filtering narrow |
|
|
107 |
signal = detrend(signal); % detrending (optional) |
|
|
108 |
signal = signal - mean(signal); |
|
|
109 |
signal = signal/std(signal); % standardizing |
|
|
110 |
signalraw = filtfilt(b_bp2,a_bp2,signalraw); % filtering wide |
|
|
111 |
signalraw = detrend(signalraw); % detrending (optional) |
|
|
112 |
signalraw = signalraw - mean(signalraw); |
|
|
113 |
signalraw = signalraw/std(signalraw); % standardizing |
|
|
114 |
disp(['Preprocessed ' fname '...']) |
|
|
115 |
|
|
|
116 |
% Figuring out if segmentation is used |
|
|
117 |
if useSegments==1 |
|
|
118 |
WINSIZE = windowSize; % window size (in sec) |
|
|
119 |
OLAP = percentageOverlap; |
|
|
120 |
else |
|
|
121 |
WINSIZE = length(signal)/fs; |
|
|
122 |
OLAP=0; |
|
|
123 |
end |
|
|
124 |
startp = 1; |
|
|
125 |
endp = WINSIZE*fs; |
|
|
126 |
looptrue = true; |
|
|
127 |
nseg = 1; |
|
|
128 |
while looptrue |
|
|
129 |
% Conditions to stop loop |
|
|
130 |
if length(signal) < WINSIZE*fs |
|
|
131 |
endp = length(signal); |
|
|
132 |
looptrue = false; |
|
|
133 |
continue |
|
|
134 |
end |
|
|
135 |
if nseg > 1 |
|
|
136 |
startp(nseg) = startp(nseg-1) + round((1-OLAP)*WINSIZE*fs); |
|
|
137 |
if length(signal) - endp(nseg-1) < 0.5*WINSIZE*fs |
|
|
138 |
endp(nseg) = length(signal); |
|
|
139 |
else |
|
|
140 |
endp(nseg) = startp(nseg) + WINSIZE*fs -1; |
|
|
141 |
end |
|
|
142 |
end |
|
|
143 |
if endp(nseg) == length(signal) |
|
|
144 |
looptrue = false; |
|
|
145 |
nseg = nseg - 1; |
|
|
146 |
end |
|
|
147 |
nseg = nseg + 1; |
|
|
148 |
end |
|
|
149 |
|
|
|
150 |
% Obtain features for each available segment |
|
|
151 |
fetbag = {}; |
|
|
152 |
feat_hrv = []; |
|
|
153 |
parfor n = 1:nseg |
|
|
154 |
% Get signal of interest |
|
|
155 |
sig_seg = signal(startp(n):endp(n)); |
|
|
156 |
sig_segraw = signalraw(startp(n):endp(n)); |
|
|
157 |
|
|
|
158 |
% QRS detect |
|
|
159 |
[qrsseg,featqrs] = multi_qrsdetect(sig_seg,fs,[fname '_s' num2str(n)]); |
|
|
160 |
|
|
|
161 |
% HRV features |
|
|
162 |
if length(qrsseg{end})>5 % if too few detections, returns zeros |
|
|
163 |
try |
|
|
164 |
feat_basic=HRV_features(sig_seg,qrsseg{end}./fs,fs); |
|
|
165 |
feats_poincare = get_poincare(qrsseg{end}./fs,fs); |
|
|
166 |
feat_hrv = [feat_basic, feats_poincare]; |
|
|
167 |
feat_hrv(~isreal(feat_hrv)|isnan(feat_hrv)|isinf(feat_hrv)) = 0; % removing not numbers |
|
|
168 |
catch |
|
|
169 |
warning('Some HRV code failed.') |
|
|
170 |
feat_hrv = zeros(1,NFEAT_hrv); |
|
|
171 |
end |
|
|
172 |
else |
|
|
173 |
disp('Skipping HRV analysis due to shortage of peaks..') |
|
|
174 |
feat_hrv = zeros(1,NFEAT_hrv); |
|
|
175 |
end |
|
|
176 |
|
|
|
177 |
% Heart Rate features |
|
|
178 |
HRbpm = median(60./(diff(qrsseg{end}./fs))); |
|
|
179 |
%obvious cases: tachycardia ( > 100 beats per minute (bpm) in adults) |
|
|
180 |
feat_tachy = normcdf(HRbpm,120,20); % sampling from normal CDF |
|
|
181 |
%See e.g. x = 10:10:200; p = normcdf(x,120,20); plot(x,p) |
|
|
182 |
|
|
|
183 |
%obvious cases: bradycardia ( < 60 bpm in adults) |
|
|
184 |
feat_brady = 1-normcdf(HRbpm,60,20); |
|
|
185 |
|
|
|
186 |
% SQI metrics |
|
|
187 |
feats_sqi = ecgsqi(sig_seg,qrsseg,fs); |
|
|
188 |
|
|
|
189 |
% Features on residual |
|
|
190 |
featsres = residualfeats(sig_segraw,fs,qrsseg{end}); |
|
|
191 |
|
|
|
192 |
% Morphological features |
|
|
193 |
feats_morph = morphofeatures(sig_segraw,fs,qrsseg,[fname '_s' num2str(n)]); |
|
|
194 |
|
|
|
195 |
|
|
|
196 |
feat_fer=[featqrs,feat_tachy,feat_brady,double(feats_sqi),featsres,feats_morph]; |
|
|
197 |
feat_fer(~isreal(feat_fer)|isnan(feat_fer)|isinf(feat_fer)) = 0; % removing not numbers |
|
|
198 |
|
|
|
199 |
% Save features to table for training |
|
|
200 |
feats = [feat_hrv,feat_fer]; |
|
|
201 |
fetbag{n} = feats; |
|
|
202 |
end |
|
|
203 |
thisfeats = array2table([repmat(f,nseg,1),[1:nseg]',cell2mat(fetbag')]);%#ok<NBRAK> |
|
|
204 |
allfeats = [allfeats;thisfeats]; |
|
|
205 |
|
|
|
206 |
end |
|
|
207 |
delete('gqrsdet*.*') |
|
|
208 |
% diary off |
|
|
209 |
%% Saving Output |
|
|
210 |
% hardcoded feature names |
|
|
211 |
names = {'rec_number' 'seg_number' 'sample_AFEv' 'meanRR' 'medianRR' 'SDNN' 'RMSSD' 'SDSD' 'NN50' 'pNN50' 'LFpeak' 'HFpeak' 'totalpower' 'LFpower' ... |
|
|
212 |
'HFpower' 'nLF' 'nHF' 'LFHF' 'PoincareSD1' 'PoincareSD2' 'SampEn' 'ApEn' ... |
|
|
213 |
'RR' 'DET' 'ENTR' 'L' 'TKEO1' 'DAFa2' 'LZ' ... |
|
|
214 |
'Clvl1' 'Clvl2' 'Clvl3' 'Clvl4' 'Clvl5' 'Clvl6' 'Clvl7' 'Clvl8' 'Clvl9' ... |
|
|
215 |
'Clvl10' 'Dlvl1' 'Dlvl2' 'Dlvl3' 'Dlvl4' ... |
|
|
216 |
'Dlvl5' 'Dlvl6' 'Dlvl7' 'Dlvl8' 'Dlvl9' 'Dlvl10' ... |
|
|
217 |
'percR50' 'percR100' 'percR200' 'percR300' 'medRR' 'meddRR' 'iqrRR' 'iqrdRR' 'bins1' 'bins2' 'bins1nL' 'bins2nL' 'bins1nS' 'bins2nS' ... |
|
|
218 |
'edgebins1' 'edgebins2' 'edgebins1nL' 'edgebins2nL' 'edgebins1nS' 'edgebins2nS' 'minArea' 'minAreanL' 'minAreanS' ... |
|
|
219 |
'minCArea' 'minCAreanL' 'minCAreanS' 'Perim' 'PerimnL' 'PerimnS' 'PerimC' 'PerimCnL' 'PerimCnS' ... |
|
|
220 |
'DistCen' 'DistCennL' 'DistCennS' 'DistNN' 'DistNNnL' 'DistNNnS' 'DistNext' 'DistNextnL' 'DistNextnS' 'ClustDistMax' 'ClustDistMin' ... |
|
|
221 |
'ClustDistMean' 'ClustDistSTD' 'ClustDistMed' 'MajorAxis' 'percR3' 'percR5' 'percR10' 'percR20' 'percR30' 'percR40' ... |
|
|
222 |
'Xcent' 'Ycent' 'rad1' 'rad2' 'rad1rad2' 'theta' 'NoClust1' 'NoClust2' 'NoClust3' 'NoClust4' 'NoClust5' 'NoClust6' 'NoClust7'}; |
|
|
223 |
names = [names 'amp_varsqi' 'amp_stdsqi' 'amp_mean']; |
|
|
224 |
names = [names 'tachy' 'brady' 'stdsqi' 'ksqi' 'ssqi' 'psqi']; |
|
|
225 |
combs = nchoosek(1:5,2); |
|
|
226 |
combs = num2cell(combs,2); |
|
|
227 |
names = [names cellfun(@(x) strtrim(['bsqi_',num2str(x(1)) num2str(x(2))]),combs,'UniformOutput',false)']; |
|
|
228 |
names = [names arrayfun(@(x) strtrim(['rsqi_',num2str(x)]),1:5,'UniformOutput',false)]; |
|
|
229 |
names = [names arrayfun(@(x) strtrim(['csqi_',num2str(x)]),1:5,'UniformOutput',false)]; |
|
|
230 |
names = [names arrayfun(@(x) strtrim(['xsqi_',num2str(x)]),1:5,'UniformOutput',false)]; |
|
|
231 |
names = [names 'res1' 'res2' 'res3' 'res4' 'res5']; |
|
|
232 |
names = [names 'QRSheight','QRSwidth','QRSpow','noPwave','Pheight','Pwidth','Ppow','Theight',... |
|
|
233 |
'Twidth','Tpow','Theightnorm','Pheightnorm','Prelpow','PTrelpow','Trelpow','QTlen','PRlen']; |
|
|
234 |
allfeats.Properties.VariableNames = names; |
|
|
235 |
save(['allfeatures_olap' num2str(OLAP) '_win' num2str(WINSIZE) '.mat'],'allfeats','reference_tab'); |
|
|
236 |
|