|
a |
|
b/preprocessOfApneaECG/preProcessing.py |
|
|
1 |
""" |
|
|
2 |
主要包括去燥, 计算特征信号等 |
|
|
3 |
""" |
|
|
4 |
from preprocessOfApneaECG.fileIO import get_database |
|
|
5 |
from preprocessOfApneaECG.denoising import denoise_ecg |
|
|
6 |
from preprocessOfApneaECG.list2mat import list2mat |
|
|
7 |
import os |
|
|
8 |
import numpy as np |
|
|
9 |
import matlab.engine |
|
|
10 |
import scipy.io as sio |
|
|
11 |
from scipy import interpolate |
|
|
12 |
from preprocessOfApneaECG.mit2Segments import ECG_RAW_FREQUENCY |
|
|
13 |
from scipy.signal import decimate |
|
|
14 |
|
|
|
15 |
eng = matlab.engine.start_matlab() |
|
|
16 |
|
|
|
17 |
# interpolation algorithm. |
|
|
18 |
# From https://github.com/rhenanbartels/hrv/blob/develop/hrv/classical.py |
|
|
19 |
|
|
|
20 |
|
|
|
21 |
def create_time_info(rri): |
|
|
22 |
rri_time = np.cumsum(rri) / 1000.0 # make it seconds |
|
|
23 |
return rri_time - rri_time[0] # force it to start at zero |
|
|
24 |
|
|
|
25 |
|
|
|
26 |
def create_interp_time(rri, fs): |
|
|
27 |
time_rri = create_time_info(rri) |
|
|
28 |
# print(time_rri[-1]) |
|
|
29 |
start, end = 0, 0 |
|
|
30 |
if time_rri[-1] < 60: |
|
|
31 |
end = 60 |
|
|
32 |
else: |
|
|
33 |
print("abnormal %s..." % time_rri[-1]) |
|
|
34 |
return np.arange(0, end, 1 / float(fs)) |
|
|
35 |
|
|
|
36 |
|
|
|
37 |
def interp_cubic_spline(rri, fs): |
|
|
38 |
time_rri = create_time_info(rri) |
|
|
39 |
time_rri_interp = create_interp_time(rri, fs) |
|
|
40 |
tck_rri = interpolate.splrep(time_rri, rri, s=0) |
|
|
41 |
rri_interp = interpolate.splev(time_rri_interp, tck_rri, der=0) |
|
|
42 |
return rri_interp |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
def interp_cubic_spline_qrs(qrs_index, qrs_amp, fs): |
|
|
46 |
time_qrs = qrs_index / float(ECG_RAW_FREQUENCY) |
|
|
47 |
time_qrs = time_qrs - time_qrs[0] |
|
|
48 |
time_qrs_interp = np.arange(0, 60, 1 / float(fs)) |
|
|
49 |
tck = interpolate.splrep(time_qrs, qrs_amp, s=0) |
|
|
50 |
qrs_interp = interpolate.splev(time_qrs_interp, tck, der=0) |
|
|
51 |
return qrs_interp |
|
|
52 |
|
|
|
53 |
|
|
|
54 |
def smooth(a, WSZ): |
|
|
55 |
""" |
|
|
56 |
滑动平均. |
|
|
57 |
:param a: |
|
|
58 |
:param WSZ: |
|
|
59 |
:return: |
|
|
60 |
""" |
|
|
61 |
out0 = np.convolve(a, np.ones(WSZ, dtype=float), 'valid') / WSZ |
|
|
62 |
r = np.arange(1, WSZ - 1, 2) |
|
|
63 |
start = np.cumsum(a[:WSZ - 1])[::2] / r |
|
|
64 |
stop = (np.cumsum(a[:-WSZ:-1])[::2] / r)[::-1] |
|
|
65 |
return np.concatenate((start, out0, stop)) |
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
def mat2npy(dict_data): |
|
|
70 |
print("........") |
|
|
71 |
|
|
|
72 |
def rricheck(ecg_data, rr_intervals): |
|
|
73 |
""" |
|
|
74 |
# Check ECG data and RR intervals. |
|
|
75 |
:param numpy array ecg_data: ECG signal. |
|
|
76 |
:param numpy array rr_intervals: RR intervals. |
|
|
77 |
:return bool: |
|
|
78 |
""" |
|
|
79 |
noise_flag = rr_intervals > 180 |
|
|
80 |
noise_flag1 = rr_intervals < 30 |
|
|
81 |
if len(rr_intervals) < 40 \ |
|
|
82 |
or np.sum(noise_flag) > 0 \ |
|
|
83 |
or np.sum(noise_flag1) > 0 \ |
|
|
84 |
or len(ecg_data) != 6000: |
|
|
85 |
return False |
|
|
86 |
else: |
|
|
87 |
return True |
|
|
88 |
|
|
|
89 |
|
|
|
90 |
def compute_r_peak_amplitude(ecg_data, rwave): |
|
|
91 |
""" |
|
|
92 |
Compute R peaks amplitude based on R waves indices. |
|
|
93 |
:param numpy array ecg_data: ECG signal. |
|
|
94 |
:param numpy array rwave: R waves indices. |
|
|
95 |
:return numpy array: R peak amplitude. |
|
|
96 |
""" |
|
|
97 |
|
|
|
98 |
wave_amp = [] |
|
|
99 |
for peak_ind in rwave.tolist(): |
|
|
100 |
interval = 25 |
|
|
101 |
if peak_ind - interval < 0: |
|
|
102 |
start = 0 |
|
|
103 |
else: |
|
|
104 |
start = peak_ind - interval |
|
|
105 |
|
|
|
106 |
if peak_ind + interval > len(ecg_data): |
|
|
107 |
end = len(ecg_data) |
|
|
108 |
else: |
|
|
109 |
end = peak_ind + interval |
|
|
110 |
|
|
|
111 |
amp = np.max(ecg_data[start:end]) |
|
|
112 |
wave_amp.append(amp) |
|
|
113 |
return np.array(wave_amp) |
|
|
114 |
|
|
|
115 |
|
|
|
116 |
def pre_proc(dataset, database_name, is_debug=False): |
|
|
117 |
""" |
|
|
118 |
|
|
|
119 |
:param Mit2Segment list dataset: ECG segments. |
|
|
120 |
:return None: |
|
|
121 |
""" |
|
|
122 |
|
|
|
123 |
clear_id_set, noise_id_set = [], [] |
|
|
124 |
for segment in dataset: |
|
|
125 |
if is_debug: |
|
|
126 |
print("now process %s id=%s." % (segment.database_name, str(segment.global_id))) |
|
|
127 |
# denoising and write to txt file |
|
|
128 |
segment.denoised_ecg_data = denoise_ecg(segment.raw_ecg_data) |
|
|
129 |
segment.write_ecg_segment(rdf=1) |
|
|
130 |
# ecg data list to .mat |
|
|
131 |
list2mat(segment, is_debug=True) |
|
|
132 |
# compute RRI, RAMP and EDR. |
|
|
133 |
eng.computeFeatures(segment.base_file_path) |
|
|
134 |
if os.path.exists(segment.base_file_path + "/Rwave.mat"): |
|
|
135 |
RwaveMat = sio.loadmat(segment.base_file_path + "/Rwave.mat") |
|
|
136 |
Rwave = np.transpose(RwaveMat['Rwave']) |
|
|
137 |
Rwave = np.reshape(Rwave, len(Rwave)) |
|
|
138 |
# RR intervals |
|
|
139 |
RR_intervals = np.diff(Rwave) |
|
|
140 |
# store RR intervals |
|
|
141 |
np.save(segment.base_file_path + "/RRI.npy", RR_intervals) |
|
|
142 |
# RRI validity check |
|
|
143 |
rri_flag = rricheck(segment.denoised_ecg_data, RR_intervals) |
|
|
144 |
if rri_flag: |
|
|
145 |
clear_id_set.append(segment.global_id) |
|
|
146 |
else: |
|
|
147 |
noise_id_set.append(segment.global_id) |
|
|
148 |
continue |
|
|
149 |
# compute R peaks amplitude(RAMP) |
|
|
150 |
RAMP = compute_r_peak_amplitude(segment.denoised_ecg_data, Rwave) |
|
|
151 |
# smoothing filtering |
|
|
152 |
RRI = smooth(RR_intervals, 3) |
|
|
153 |
RAMP = smooth(RAMP, 3) |
|
|
154 |
# spline interpolation |
|
|
155 |
RRI = RRI / ECG_RAW_FREQUENCY * 1000.0 |
|
|
156 |
RRI = interp_cubic_spline(RRI, fs=4) |
|
|
157 |
RAMP = interp_cubic_spline_qrs(Rwave, RAMP, fs=4) |
|
|
158 |
# store RRI and RAMP |
|
|
159 |
np.save(segment.base_file_path + "/RRI.npy", RRI) |
|
|
160 |
np.save(segment.base_file_path + "/RAMP.npy", RAMP) |
|
|
161 |
# EDR |
|
|
162 |
EDRMat = sio.loadmat(segment.base_file_path + "/EDR.mat") |
|
|
163 |
EDR = np.transpose(EDRMat['EDR']) |
|
|
164 |
EDR = np.reshape(EDR, len(EDR)) |
|
|
165 |
# downsampling |
|
|
166 |
EDR = decimate(EDR, 25) |
|
|
167 |
np.save(segment.base_file_path + "/EDR.npy", EDR) |
|
|
168 |
# print(".............") |
|
|
169 |
else: |
|
|
170 |
noise_id_set.append(segment.global_id) |
|
|
171 |
print(len(noise_id_set)) |
|
|
172 |
print(len(clear_id_set)) |
|
|
173 |
np.save(database_name[0] + "_" + database_name[1] + "_clear_id.npy", np.array(clear_id_set)) |
|
|
174 |
np.save(database_name[0] + "_" + database_name[1] + "_noise_id.npy", np.array(noise_id_set)) |
|
|
175 |
|
|
|
176 |
|
|
|
177 |
if __name__ == '__main__': |
|
|
178 |
# train_set = get_database(["apnea-ecg", "train"], rdf=0,is_debug=True) |
|
|
179 |
# pre_proc(train_set, ["apnea-ecg", "train"], is_debug=True) |
|
|
180 |
test_set = get_database(["apnea-ecg", "test"], rdf=0, is_debug=True) |
|
|
181 |
pre_proc(test_set, ["apnea-ecg", "test"], is_debug=True) |
|
|
182 |
|
|
|
183 |
|
|
|
184 |
|
|
|
185 |
|