|
a |
|
b/read_data.py |
|
|
1 |
import os |
|
|
2 |
import pickle |
|
|
3 |
import numpy as np |
|
|
4 |
import matplotlib.pyplot as plt |
|
|
5 |
import neurokit as nk |
|
|
6 |
import seaborn as sns |
|
|
7 |
import pandas as pd |
|
|
8 |
|
|
|
9 |
def load_data(path, subject): |
|
|
10 |
"""Given path and subject, load the data of the subject""" |
|
|
11 |
os.chdir(path) |
|
|
12 |
os.chdir(subject) |
|
|
13 |
with open(subject + '.pkl', 'rb') as file: |
|
|
14 |
data = pickle.load(file, encoding='latin1') |
|
|
15 |
return data |
|
|
16 |
|
|
|
17 |
class read_data_one_subject: |
|
|
18 |
"""Read data from WESAD dataset""" |
|
|
19 |
def __init__(self, path, subject): |
|
|
20 |
self.keys = ['label', 'subject', 'signal'] |
|
|
21 |
self.signal_keys = ['wrist', 'chest'] |
|
|
22 |
self.chest_sensor_keys = ['ACC', 'ECG', 'EDA', 'EMG', 'Resp', 'Temp'] |
|
|
23 |
self.wrist_sensor_keys = ['ACC', 'BVP', 'EDA', 'TEMP'] |
|
|
24 |
os.chdir(path) |
|
|
25 |
os.chdir(subject) |
|
|
26 |
with open(subject + '.pkl', 'rb') as file: |
|
|
27 |
data = pickle.load(file, encoding='latin1') |
|
|
28 |
self.data = data |
|
|
29 |
|
|
|
30 |
def get_labels(self): |
|
|
31 |
return self.data[self.keys[0]] |
|
|
32 |
|
|
|
33 |
def get_wrist_data(self): |
|
|
34 |
"""""" |
|
|
35 |
#label = self.data[self.keys[0]] |
|
|
36 |
#assert subject == self.data[self.keys[1]] |
|
|
37 |
signal = self.data[self.keys[2]] |
|
|
38 |
wrist_data = signal[self.signal_keys[0]] |
|
|
39 |
#wrist_ACC = wrist_data[self.wrist_sensor_keys[0]] |
|
|
40 |
#wrist_ECG = wrist_data[self.wrist_sensor_keys[1]] |
|
|
41 |
return wrist_data |
|
|
42 |
|
|
|
43 |
def get_chest_data(self): |
|
|
44 |
"""""" |
|
|
45 |
signal = self.data[self.keys[2]] |
|
|
46 |
chest_data = signal[self.signal_keys[1]] |
|
|
47 |
return chest_data |
|
|
48 |
|
|
|
49 |
def extract_mean_std_features(ecg_data, label=0, block = 700): |
|
|
50 |
#print (len(ecg_data)) |
|
|
51 |
i = 0 |
|
|
52 |
mean_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) |
|
|
53 |
std_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) |
|
|
54 |
max_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) |
|
|
55 |
min_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) |
|
|
56 |
|
|
|
57 |
idx = 0 |
|
|
58 |
while i < len(ecg_data): |
|
|
59 |
temp = ecg_data[i:i+block] |
|
|
60 |
#print(len(temp)) |
|
|
61 |
if idx < int(len(ecg_data)/block): |
|
|
62 |
mean_features[idx] = np.mean(temp) |
|
|
63 |
std_features[idx] = np.std(temp) |
|
|
64 |
min_features[idx] = np.amin(temp) |
|
|
65 |
max_features[idx] = np.amax(temp) |
|
|
66 |
i += 700 |
|
|
67 |
idx += 1 |
|
|
68 |
#print(len(mean_features), len(std_features)) |
|
|
69 |
#print(mean_features, std_features) |
|
|
70 |
features = {'mean':mean_features, 'std':std_features, 'min':min_features, 'max':max_features} |
|
|
71 |
|
|
|
72 |
one_set = np.column_stack((mean_features, std_features, min_features, max_features)) |
|
|
73 |
return one_set |
|
|
74 |
|
|
|
75 |
def extract_one(chest_data_dict, idx, l_condition=0): |
|
|
76 |
ecg_data = chest_data_dict["ECG"][idx].flatten() |
|
|
77 |
ecg_features = extract_mean_std_features(ecg_data, label=l_condition) |
|
|
78 |
#print(ecg_features.shape) |
|
|
79 |
|
|
|
80 |
eda_data = chest_data_dict["EDA"][idx].flatten() |
|
|
81 |
eda_features = extract_mean_std_features(eda_data, label=l_condition) |
|
|
82 |
#print(eda_features.shape) |
|
|
83 |
|
|
|
84 |
emg_data = chest_data_dict["EMG"][idx].flatten() |
|
|
85 |
emg_features = extract_mean_std_features(emg_data, label=l_condition) |
|
|
86 |
#print(emg_features.shape) |
|
|
87 |
|
|
|
88 |
temp_data = chest_data_dict["Temp"][idx].flatten() |
|
|
89 |
temp_features = extract_mean_std_features(temp_data, label=l_condition) |
|
|
90 |
#print(temp_features.shape) |
|
|
91 |
|
|
|
92 |
baseline_data = np.hstack((eda_features, temp_features, ecg_features, emg_features)) |
|
|
93 |
#print(len(baseline_data)) |
|
|
94 |
label_array = np.full(len(baseline_data), l_condition) |
|
|
95 |
#print(label_array.shape) |
|
|
96 |
#print(baseline_data.shape) |
|
|
97 |
baseline_data = np.column_stack((baseline_data, label_array)) |
|
|
98 |
#print(baseline_data.shape) |
|
|
99 |
return baseline_data |
|
|
100 |
|
|
|
101 |
def recur_print(ecg): |
|
|
102 |
while ecg is dict: |
|
|
103 |
print(ecg.keys()) |
|
|
104 |
for k in ecg.keys(): |
|
|
105 |
recur_print(ecg[k]) |
|
|
106 |
|
|
|
107 |
def execute(): |
|
|
108 |
data_set_path = "/media/jac/New Volume/Datasets/WESAD" |
|
|
109 |
file_path = "ecg.txt" |
|
|
110 |
subject = 'S3' |
|
|
111 |
obj_data = {} |
|
|
112 |
labels = {} |
|
|
113 |
all_data = {} |
|
|
114 |
subs = [2, 3, 4, 5, 6] |
|
|
115 |
for i in subs: |
|
|
116 |
subject = 'S' + str(i) |
|
|
117 |
print("Reading data", subject) |
|
|
118 |
obj_data[subject] = read_data_one_subject(data_set_path, subject) |
|
|
119 |
labels[subject] = obj_data[subject].get_labels() |
|
|
120 |
|
|
|
121 |
wrist_data_dict = obj_data[subject].get_wrist_data() |
|
|
122 |
wrist_dict_length = {key: len(value) for key, value in wrist_data_dict.items()} |
|
|
123 |
|
|
|
124 |
chest_data_dict = obj_data[subject].get_chest_data() |
|
|
125 |
chest_dict_length = {key: len(value) for key, value in chest_data_dict.items()} |
|
|
126 |
print(chest_dict_length) |
|
|
127 |
chest_data = np.concatenate((chest_data_dict['ACC'], chest_data_dict['ECG'], chest_data_dict['EDA'], |
|
|
128 |
chest_data_dict['EMG'], chest_data_dict['Resp'], chest_data_dict['Temp']), axis=1) |
|
|
129 |
# Get labels |
|
|
130 |
|
|
|
131 |
|
|
|
132 |
# 'ACC' : 3, 'ECG' 1: , 'EDA' : 1, 'EMG': 1, 'RESP': 1, 'Temp': 1 ===> Total dimensions : 8 |
|
|
133 |
# No. of Labels ==> 8 ; 0 = not defined / transient, 1 = baseline, 2 = stress, 3 = amusement, |
|
|
134 |
# 4 = meditation, 5/6/7 = should be ignored in this dataset |
|
|
135 |
|
|
|
136 |
# Do for each subject |
|
|
137 |
baseline = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 1]) |
|
|
138 |
# print("Baseline:", chest_data_dict['ECG'][baseline].shape) |
|
|
139 |
# print(baseline.shape) |
|
|
140 |
|
|
|
141 |
stress = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 2]) |
|
|
142 |
# print(stress.shape) |
|
|
143 |
|
|
|
144 |
amusement = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 3]) |
|
|
145 |
# print(amusement.shape) |
|
|
146 |
|
|
|
147 |
baseline_data = extract_one(chest_data_dict, baseline, l_condition=1) |
|
|
148 |
stress_data = extract_one(chest_data_dict, stress, l_condition=2) |
|
|
149 |
amusement_data = extract_one(chest_data_dict, amusement, l_condition=3) |
|
|
150 |
|
|
|
151 |
full_data = np.vstack((baseline_data, stress_data, amusement_data)) |
|
|
152 |
print("One subject data", full_data.shape) |
|
|
153 |
all_data[subject] = full_data |
|
|
154 |
|
|
|
155 |
i = 0 |
|
|
156 |
for k, v in all_data.items(): |
|
|
157 |
if i == 0: |
|
|
158 |
data = all_data[k] |
|
|
159 |
i += 1 |
|
|
160 |
print(all_data[k].shape) |
|
|
161 |
data = np.vstack((data, all_data[k])) |
|
|
162 |
|
|
|
163 |
print(data.shape) |
|
|
164 |
return data |
|
|
165 |
|
|
|
166 |
if __name__ == '__main__': |
|
|
167 |
execute() |
|
|
168 |
""" |
|
|
169 |
ecg, eda = chest_data_dict['ECG'], chest_data_dict['EDA'] |
|
|
170 |
x = [i for i in range(len(baseline))] |
|
|
171 |
for one in baseline: |
|
|
172 |
x = [i for i in range(99)] |
|
|
173 |
plt.plot(x, ecg[one:100]) |
|
|
174 |
break |
|
|
175 |
""" |
|
|
176 |
#x = [i for i in range(10000)] |
|
|
177 |
#plt.plot(x, chest_data_dict['ECG'][:10000]) |
|
|
178 |
#plt.show() |
|
|
179 |
|
|
|
180 |
# BASELINE |
|
|
181 |
|
|
|
182 |
# [ecg_features[k] for k in ecg_features.keys()]) |
|
|
183 |
|
|
|
184 |
#ecg = nk.ecg_process(ecg=ecg_data, rsp=chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700) |
|
|
185 |
#print(os.getcwd()) |
|
|
186 |
|
|
|
187 |
""" |
|
|
188 |
#recur_print |
|
|
189 |
print(type(ecg)) |
|
|
190 |
print(ecg.keys()) |
|
|
191 |
for k in ecg.keys(): |
|
|
192 |
print(k) |
|
|
193 |
for i in ecg[k].keys(): |
|
|
194 |
print(i) |
|
|
195 |
|
|
|
196 |
|
|
|
197 |
resp = nk.eda_process(eda=chest_data_dict['EDA'][baseline].flatten(), sampling_rate=700) |
|
|
198 |
resp = nk.rsp_process(chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700) |
|
|
199 |
for k in resp.keys(): |
|
|
200 |
print(k) |
|
|
201 |
for i in resp[k].keys(): |
|
|
202 |
print(i) |
|
|
203 |
|
|
|
204 |
# For baseline, compute mean, std, for each 700 samples. (1 second values) |
|
|
205 |
|
|
|
206 |
#file_path = os.getcwd() |
|
|
207 |
with open(file_path, "w") as file: |
|
|
208 |
#file.write(str(ecg['df'])) |
|
|
209 |
file.write(str(ecg['ECG']['HRV']['RR_Intervals'])) |
|
|
210 |
file.write("...") |
|
|
211 |
file.write(str(ecg['RSP'])) |
|
|
212 |
#file.write("RESP................") |
|
|
213 |
#file.write(str(resp['RSP'])) |
|
|
214 |
#file.write(str(resp['df'])) |
|
|
215 |
#print(type(ecg['ECG']['HRV']['RR_Intervals'])) |
|
|
216 |
|
|
|
217 |
#file.write(str(ecg['ECG']['Cardiac_Cycles'])) |
|
|
218 |
#print(type(ecg['ECG']['Cardiac_Cycles'])) |
|
|
219 |
|
|
|
220 |
#file.write(ecg['ECG']['Cardiac_Cycles'].to_csv()) |
|
|
221 |
|
|
|
222 |
# Plot the processed dataframe, normalizing all variables for viewing purpose |
|
|
223 |
""" |
|
|
224 |
""" |
|
|
225 |
bio = nk.bio_process(ecg=chest_data_dict["ECG"][baseline].flatten(), rsp=chest_data_dict['Resp'][baseline].flatten() |
|
|
226 |
, eda=chest_data_dict["EDA"][baseline].flatten(), sampling_rate=700) |
|
|
227 |
#nk.z_score(bio["df"]).plot() |
|
|
228 |
|
|
|
229 |
print(bio["ECG"].keys()) |
|
|
230 |
print(bio["EDA"].keys()) |
|
|
231 |
print(bio["RSP"].keys()) |
|
|
232 |
|
|
|
233 |
#ECG |
|
|
234 |
print(bio["ECG"]["HRV"]) |
|
|
235 |
print(bio["ECG"]["R_Peaks"]) |
|
|
236 |
|
|
|
237 |
#EDA |
|
|
238 |
print(bio["EDA"]["SCR_Peaks_Amplitudes"]) |
|
|
239 |
print(bio["EDA"]["SCR_Onsets"]) |
|
|
240 |
|
|
|
241 |
|
|
|
242 |
#RSP |
|
|
243 |
print(bio["RSP"]["Cycles_Onsets"]) |
|
|
244 |
print(bio["RSP"]["Cycles_Length"]) |
|
|
245 |
""" |
|
|
246 |
print("Read data file") |
|
|
247 |
#Flow: Read data for all subjects -> Extract features (Preprocessing) -> Train the model |
|
|
248 |
|