Switch to unified view

a b/Inference/preprocess_data.py
1
import wfdb as wf
2
import numpy as np
3
import glob
4
import os
5
import matplotlib.pyplot as plt
6
from tqdm import tqdm as tqdm
7
from scipy import signal
8
from sklearn.preprocessing import StandardScaler 
9
### Use standard scaler
10
11
def data_read(args):
12
    
13
    
14
    patient_no = args.patient_no
15
    patient_no -= 1
16
17
    path_dir = args.path_dir
18
    file_dirs = sorted(glob.glob(os.path.join(path_dir,'infant*')))   
19
    patient = {'ecg':[],'resp':[],'r_peaks':[],'resp_peaks':[],'brad_onset':[],'ecg_fs':[],'resp_fs':[]}
20
    
21
    for i in range(0,len(file_dirs),7):
22
        if (i == 7 * patient_no):
23
            
24
            print('-------- Acquiring data from patient {} --------'.format(patient_no + 1))
25
26
            ecg_file_name = os.path.splitext(file_dirs[i + 1])[0] 
27
            resp_file_name = os.path.splitext(file_dirs[i + 5])[0]
28
            qrsc_ext = os.path.splitext(file_dirs[i + 3])[1][1:]
29
            resp_ext = os.path.splitext(file_dirs[i + 6])[1][1:]
30
            brad_onset_ext = os.path.splitext(file_dirs[i + 6])[1][1:]
31
            atr_ext = os.path.splitext(file_dirs[i])[1][1:]
32
                
33
            ecg_sample_rate = wf.rdsamp(ecg_file_name)[-1]['fs']
34
            resp_sample_rate = wf.rdsamp(resp_file_name)[-1]['fs']
35
            ecg = wf.io.rdrecord(ecg_file_name).p_signal 
36
            resp = wf.io.rdrecord(resp_file_name).p_signal
37
            r_peaks_loc = wf.rdann(ecg_file_name,qrsc_ext).sample
38
            resp_peak_loc = wf.rdann(resp_file_name,resp_ext).sample
39
            brad_onset = wf.rdann(ecg_file_name,atr_ext).sample
40
                
41
            patient['ecg'].append(ecg)
42
            patient['resp'].append(resp)
43
            patient['r_peaks'].append(resp_peak_loc)
44
            patient['resp_peaks'].append(resp_peak_loc)
45
            patient['ecg_fs'].append(ecg_sample_rate)
46
            patient['resp_fs'].append(resp_sample_rate)
47
            patient['brad_onset'].append(brad_onset)
48
49
    return patient
50
51
def windowing_and_resampling_hr(patient):
52
    
53
    no_sec = 10
54
    final_ecg_sample_rate = 500
55
    final_resp_sample_rate = 50
56
57
    windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
58
    windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
59
    infant_no = 0
60
61
    for patients in patient[ 'ecg']:
62
        fs = patient['ecg_fs'][infant_no]
63
        window_len = no_sec * fs
64
        for i in range(len(patients) // (window_len-fs)):
65
            windowed_patient_overlap['ecg'][infant_no].append(patients[(window_len-fs) * i : ((window_len-fs) * i + window_len)])
66
            windowed_patient['ecg'][infant_no].append(patients[window_len * i : window_len * (i+1) ])
67
            if (final_ecg_sample_rate * no_sec) != window_len:
68
                windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
69
                windowed_patient['ecg'][infant_no][i] = signal.resample(windowed_patient['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
70
        
71
        infant_no += 1
72
73
    infant_no = 0
74
    for patients in patient['resp']:
75
        window_len = no_sec * patient['resp_fs'][infant_no] 
76
        for i in range(len(patients) // window_len):
77
            windowed_patient['resp'][infant_no].append(patients[window_len * i : window_len * (i+1)])
78
            if (final_resp_sample_rate * no_sec) != window_len:
79
                windowed_patient['resp'][infant_no][i] = custom_resample(windowed_patient['resp'][infant_no][i],patient['resp_fs'][infant_no])
80
        infant_no += 1
81
82
    return windowed_patient_overlap,windowed_patient
83
84
def windowing_and_resampling_br(patient):
85
    
86
    scaler = StandardScaler()
87
    no_sec = 10
88
    final_ecg_sample_rate = 500
89
    final_resp_sample_rate = 50
90
91
    windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
92
    windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
93
    infant_no = 0
94
    overlap_percent = 50
95
    overlap = int(50 / 100 * 5000)
96
    
97
    for patients in patient[ 'ecg']:
98
        
99
        fs = patient['ecg_fs'][infant_no]
100
        window_len = no_sec * fs
101
        for i in range(len(patients) // (window_len-overlap)):
102
            windowed_patient_overlap['ecg'][infant_no].append(scaler.fit_transform(patients[(window_len-overlap) * i : ((window_len-overlap) * i + window_len)]))
103
            if (final_ecg_sample_rate * no_sec) != window_len:
104
                windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
105
        
106
        infant_no += 1
107
    return windowed_patient_overlap
108
109
def custom_resample(resp,fs):
110
    
111
    modified_resp = []
112
    for i in range(int(len(resp) * 50/fs)):
113
        modified_resp.append(resp[int(fs/50*i)].astype(float)) 
114
    return np.asarray(modified_resp)