|
a |
|
b/Inference/preprocess_data.py |
|
|
1 |
import wfdb as wf |
|
|
2 |
import numpy as np |
|
|
3 |
import glob |
|
|
4 |
import os |
|
|
5 |
import matplotlib.pyplot as plt |
|
|
6 |
from tqdm import tqdm as tqdm |
|
|
7 |
from scipy import signal |
|
|
8 |
from sklearn.preprocessing import StandardScaler |
|
|
9 |
### Use standard scaler |
|
|
10 |
|
|
|
11 |
def data_read(args): |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
patient_no = args.patient_no |
|
|
15 |
patient_no -= 1 |
|
|
16 |
|
|
|
17 |
path_dir = args.path_dir |
|
|
18 |
file_dirs = sorted(glob.glob(os.path.join(path_dir,'infant*'))) |
|
|
19 |
patient = {'ecg':[],'resp':[],'r_peaks':[],'resp_peaks':[],'brad_onset':[],'ecg_fs':[],'resp_fs':[]} |
|
|
20 |
|
|
|
21 |
for i in range(0,len(file_dirs),7): |
|
|
22 |
if (i == 7 * patient_no): |
|
|
23 |
|
|
|
24 |
print('-------- Acquiring data from patient {} --------'.format(patient_no + 1)) |
|
|
25 |
|
|
|
26 |
ecg_file_name = os.path.splitext(file_dirs[i + 1])[0] |
|
|
27 |
resp_file_name = os.path.splitext(file_dirs[i + 5])[0] |
|
|
28 |
qrsc_ext = os.path.splitext(file_dirs[i + 3])[1][1:] |
|
|
29 |
resp_ext = os.path.splitext(file_dirs[i + 6])[1][1:] |
|
|
30 |
brad_onset_ext = os.path.splitext(file_dirs[i + 6])[1][1:] |
|
|
31 |
atr_ext = os.path.splitext(file_dirs[i])[1][1:] |
|
|
32 |
|
|
|
33 |
ecg_sample_rate = wf.rdsamp(ecg_file_name)[-1]['fs'] |
|
|
34 |
resp_sample_rate = wf.rdsamp(resp_file_name)[-1]['fs'] |
|
|
35 |
ecg = wf.io.rdrecord(ecg_file_name).p_signal |
|
|
36 |
resp = wf.io.rdrecord(resp_file_name).p_signal |
|
|
37 |
r_peaks_loc = wf.rdann(ecg_file_name,qrsc_ext).sample |
|
|
38 |
resp_peak_loc = wf.rdann(resp_file_name,resp_ext).sample |
|
|
39 |
brad_onset = wf.rdann(ecg_file_name,atr_ext).sample |
|
|
40 |
|
|
|
41 |
patient['ecg'].append(ecg) |
|
|
42 |
patient['resp'].append(resp) |
|
|
43 |
patient['r_peaks'].append(resp_peak_loc) |
|
|
44 |
patient['resp_peaks'].append(resp_peak_loc) |
|
|
45 |
patient['ecg_fs'].append(ecg_sample_rate) |
|
|
46 |
patient['resp_fs'].append(resp_sample_rate) |
|
|
47 |
patient['brad_onset'].append(brad_onset) |
|
|
48 |
|
|
|
49 |
return patient |
|
|
50 |
|
|
|
51 |
def windowing_and_resampling_hr(patient): |
|
|
52 |
|
|
|
53 |
no_sec = 10 |
|
|
54 |
final_ecg_sample_rate = 500 |
|
|
55 |
final_resp_sample_rate = 50 |
|
|
56 |
|
|
|
57 |
windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]} |
|
|
58 |
windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]} |
|
|
59 |
infant_no = 0 |
|
|
60 |
|
|
|
61 |
for patients in patient[ 'ecg']: |
|
|
62 |
fs = patient['ecg_fs'][infant_no] |
|
|
63 |
window_len = no_sec * fs |
|
|
64 |
for i in range(len(patients) // (window_len-fs)): |
|
|
65 |
windowed_patient_overlap['ecg'][infant_no].append(patients[(window_len-fs) * i : ((window_len-fs) * i + window_len)]) |
|
|
66 |
windowed_patient['ecg'][infant_no].append(patients[window_len * i : window_len * (i+1) ]) |
|
|
67 |
if (final_ecg_sample_rate * no_sec) != window_len: |
|
|
68 |
windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec) |
|
|
69 |
windowed_patient['ecg'][infant_no][i] = signal.resample(windowed_patient['ecg'][infant_no][i],final_ecg_sample_rate * no_sec) |
|
|
70 |
|
|
|
71 |
infant_no += 1 |
|
|
72 |
|
|
|
73 |
infant_no = 0 |
|
|
74 |
for patients in patient['resp']: |
|
|
75 |
window_len = no_sec * patient['resp_fs'][infant_no] |
|
|
76 |
for i in range(len(patients) // window_len): |
|
|
77 |
windowed_patient['resp'][infant_no].append(patients[window_len * i : window_len * (i+1)]) |
|
|
78 |
if (final_resp_sample_rate * no_sec) != window_len: |
|
|
79 |
windowed_patient['resp'][infant_no][i] = custom_resample(windowed_patient['resp'][infant_no][i],patient['resp_fs'][infant_no]) |
|
|
80 |
infant_no += 1 |
|
|
81 |
|
|
|
82 |
return windowed_patient_overlap,windowed_patient |
|
|
83 |
|
|
|
84 |
def windowing_and_resampling_br(patient): |
|
|
85 |
|
|
|
86 |
scaler = StandardScaler() |
|
|
87 |
no_sec = 10 |
|
|
88 |
final_ecg_sample_rate = 500 |
|
|
89 |
final_resp_sample_rate = 50 |
|
|
90 |
|
|
|
91 |
windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]} |
|
|
92 |
windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]} |
|
|
93 |
infant_no = 0 |
|
|
94 |
overlap_percent = 50 |
|
|
95 |
overlap = int(50 / 100 * 5000) |
|
|
96 |
|
|
|
97 |
for patients in patient[ 'ecg']: |
|
|
98 |
|
|
|
99 |
fs = patient['ecg_fs'][infant_no] |
|
|
100 |
window_len = no_sec * fs |
|
|
101 |
for i in range(len(patients) // (window_len-overlap)): |
|
|
102 |
windowed_patient_overlap['ecg'][infant_no].append(scaler.fit_transform(patients[(window_len-overlap) * i : ((window_len-overlap) * i + window_len)])) |
|
|
103 |
if (final_ecg_sample_rate * no_sec) != window_len: |
|
|
104 |
windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec) |
|
|
105 |
|
|
|
106 |
infant_no += 1 |
|
|
107 |
return windowed_patient_overlap |
|
|
108 |
|
|
|
109 |
def custom_resample(resp,fs): |
|
|
110 |
|
|
|
111 |
modified_resp = [] |
|
|
112 |
for i in range(int(len(resp) * 50/fs)): |
|
|
113 |
modified_resp.append(resp[int(fs/50*i)].astype(float)) |
|
|
114 |
return np.asarray(modified_resp) |