a b/read_data.py
1
import os
2
import pickle
3
import numpy as np
4
import matplotlib.pyplot as plt
5
import neurokit as nk
6
import seaborn as sns
7
import pandas as pd
8
9
def load_data(path, subject):
10
    """Given path and subject, load the data of the subject"""
11
    os.chdir(path)
12
    os.chdir(subject)
13
    with open(subject + '.pkl', 'rb') as file:
14
        data = pickle.load(file, encoding='latin1')
15
    return data
16
17
class read_data_one_subject:
18
    """Read data from WESAD dataset"""
19
    def __init__(self, path, subject):
20
        self.keys = ['label', 'subject', 'signal']
21
        self.signal_keys = ['wrist', 'chest']
22
        self.chest_sensor_keys = ['ACC', 'ECG', 'EDA', 'EMG', 'Resp', 'Temp']
23
        self.wrist_sensor_keys = ['ACC', 'BVP', 'EDA', 'TEMP']
24
        os.chdir(path)
25
        os.chdir(subject)
26
        with open(subject + '.pkl', 'rb') as file:
27
            data = pickle.load(file, encoding='latin1')
28
        self.data = data
29
30
    def get_labels(self):
31
        return self.data[self.keys[0]]
32
33
    def get_wrist_data(self):
34
        """"""
35
        #label = self.data[self.keys[0]]
36
        #assert subject == self.data[self.keys[1]]
37
        signal = self.data[self.keys[2]]
38
        wrist_data = signal[self.signal_keys[0]]
39
        #wrist_ACC = wrist_data[self.wrist_sensor_keys[0]]
40
        #wrist_ECG = wrist_data[self.wrist_sensor_keys[1]]
41
        return wrist_data
42
43
    def get_chest_data(self):
44
        """"""
45
        signal = self.data[self.keys[2]]
46
        chest_data = signal[self.signal_keys[1]]
47
        return chest_data
48
49
def extract_mean_std_features(ecg_data, label=0, block = 700):
50
    #print (len(ecg_data))
51
    i = 0
52
    mean_features = np.empty(int(len(ecg_data)/block), dtype=np.float64)
53
    std_features = np.empty(int(len(ecg_data)/block), dtype=np.float64)
54
    max_features = np.empty(int(len(ecg_data)/block), dtype=np.float64)
55
    min_features = np.empty(int(len(ecg_data)/block), dtype=np.float64)
56
57
    idx = 0
58
    while i < len(ecg_data):
59
        temp = ecg_data[i:i+block]
60
        #print(len(temp))
61
        if idx < int(len(ecg_data)/block):
62
            mean_features[idx] = np.mean(temp)
63
            std_features[idx] = np.std(temp)
64
            min_features[idx] = np.amin(temp)
65
            max_features[idx] = np.amax(temp)
66
        i += 700
67
        idx += 1
68
    #print(len(mean_features), len(std_features))
69
    #print(mean_features, std_features)
70
    features = {'mean':mean_features, 'std':std_features, 'min':min_features, 'max':max_features}
71
72
    one_set = np.column_stack((mean_features, std_features, min_features, max_features))
73
    return one_set
74
75
def extract_one(chest_data_dict, idx, l_condition=0):
76
    ecg_data = chest_data_dict["ECG"][idx].flatten()
77
    ecg_features = extract_mean_std_features(ecg_data, label=l_condition)
78
    #print(ecg_features.shape)
79
80
    eda_data = chest_data_dict["EDA"][idx].flatten()
81
    eda_features = extract_mean_std_features(eda_data, label=l_condition)
82
    #print(eda_features.shape)
83
84
    emg_data = chest_data_dict["EMG"][idx].flatten()
85
    emg_features = extract_mean_std_features(emg_data, label=l_condition)
86
    #print(emg_features.shape)
87
88
    temp_data = chest_data_dict["Temp"][idx].flatten()
89
    temp_features = extract_mean_std_features(temp_data, label=l_condition)
90
    #print(temp_features.shape)
91
92
    baseline_data = np.hstack((eda_features, temp_features, ecg_features, emg_features))
93
    #print(len(baseline_data))
94
    label_array = np.full(len(baseline_data), l_condition)
95
    #print(label_array.shape)
96
    #print(baseline_data.shape)
97
    baseline_data = np.column_stack((baseline_data, label_array))
98
    #print(baseline_data.shape)
99
    return baseline_data
100
101
def recur_print(ecg):
102
    while ecg is dict:
103
        print(ecg.keys())
104
        for k in ecg.keys():
105
            recur_print(ecg[k])
106
107
def execute():
108
    data_set_path = "/media/jac/New Volume/Datasets/WESAD"
109
    file_path = "ecg.txt"
110
    subject = 'S3'
111
    obj_data = {}
112
    labels = {}
113
    all_data = {}
114
    subs = [2, 3, 4, 5, 6]
115
    for i in subs:
116
        subject = 'S' + str(i)
117
        print("Reading data", subject)
118
        obj_data[subject] = read_data_one_subject(data_set_path, subject)
119
        labels[subject] = obj_data[subject].get_labels()
120
121
        wrist_data_dict = obj_data[subject].get_wrist_data()
122
        wrist_dict_length = {key: len(value) for key, value in wrist_data_dict.items()}
123
124
        chest_data_dict = obj_data[subject].get_chest_data()
125
        chest_dict_length = {key: len(value) for key, value in chest_data_dict.items()}
126
        print(chest_dict_length)
127
        chest_data = np.concatenate((chest_data_dict['ACC'], chest_data_dict['ECG'], chest_data_dict['EDA'],
128
                                     chest_data_dict['EMG'], chest_data_dict['Resp'], chest_data_dict['Temp']), axis=1)
129
        # Get labels
130
131
132
        # 'ACC' : 3, 'ECG' 1: , 'EDA' : 1, 'EMG': 1, 'RESP': 1, 'Temp': 1  ===> Total dimensions : 8
133
        # No. of Labels ==> 8 ; 0 = not defined / transient, 1 = baseline, 2 = stress, 3 = amusement,
134
        # 4 = meditation, 5/6/7 = should be ignored in this dataset
135
136
        # Do for each subject
137
        baseline = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 1])
138
        # print("Baseline:", chest_data_dict['ECG'][baseline].shape)
139
        # print(baseline.shape)
140
141
        stress = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 2])
142
        # print(stress.shape)
143
144
        amusement = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 3])
145
        # print(amusement.shape)
146
147
        baseline_data = extract_one(chest_data_dict, baseline, l_condition=1)
148
        stress_data = extract_one(chest_data_dict, stress, l_condition=2)
149
        amusement_data = extract_one(chest_data_dict, amusement, l_condition=3)
150
151
        full_data = np.vstack((baseline_data, stress_data, amusement_data))
152
        print("One subject data", full_data.shape)
153
        all_data[subject] = full_data
154
155
    i = 0
156
    for k, v in all_data.items():
157
        if i == 0:
158
            data = all_data[k]
159
            i += 1
160
        print(all_data[k].shape)
161
        data = np.vstack((data, all_data[k]))
162
163
    print(data.shape)
164
    return data
165
166
if __name__ == '__main__':
167
    execute()
168
    """
169
    ecg, eda = chest_data_dict['ECG'], chest_data_dict['EDA']
170
    x = [i for i in range(len(baseline))]
171
    for one in baseline:
172
        x = [i for i in range(99)]
173
        plt.plot(x, ecg[one:100])
174
        break
175
    """
176
    #x = [i for i in range(10000)]
177
    #plt.plot(x, chest_data_dict['ECG'][:10000])
178
    #plt.show()
179
180
    # BASELINE
181
182
    #                                    [ecg_features[k] for k in ecg_features.keys()])
183
184
    #ecg = nk.ecg_process(ecg=ecg_data, rsp=chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700)
185
    #print(os.getcwd())
186
187
    """
188
    #recur_print
189
    print(type(ecg))
190
    print(ecg.keys())
191
    for k in ecg.keys():
192
        print(k)
193
        for i in ecg[k].keys():
194
            print(i)
195
    
196
197
    resp = nk.eda_process(eda=chest_data_dict['EDA'][baseline].flatten(), sampling_rate=700)
198
    resp = nk.rsp_process(chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700)
199
    for k in resp.keys():
200
        print(k)
201
        for i in resp[k].keys():
202
            print(i)
203
    
204
    # For baseline, compute mean, std, for each 700 samples. (1 second values)
205
206
    #file_path = os.getcwd()
207
    with open(file_path, "w") as file:
208
        #file.write(str(ecg['df']))
209
        file.write(str(ecg['ECG']['HRV']['RR_Intervals']))
210
        file.write("...")
211
        file.write(str(ecg['RSP']))
212
        #file.write("RESP................")
213
        #file.write(str(resp['RSP']))
214
        #file.write(str(resp['df']))
215
        #print(type(ecg['ECG']['HRV']['RR_Intervals']))
216
217
        #file.write(str(ecg['ECG']['Cardiac_Cycles']))
218
        #print(type(ecg['ECG']['Cardiac_Cycles']))
219
220
        #file.write(ecg['ECG']['Cardiac_Cycles'].to_csv())
221
222
    # Plot the processed dataframe, normalizing all variables for viewing purpose
223
    """
224
    """
225
    bio = nk.bio_process(ecg=chest_data_dict["ECG"][baseline].flatten(), rsp=chest_data_dict['Resp'][baseline].flatten()
226
                         , eda=chest_data_dict["EDA"][baseline].flatten(), sampling_rate=700)
227
    #nk.z_score(bio["df"]).plot()
228
229
    print(bio["ECG"].keys())
230
    print(bio["EDA"].keys())
231
    print(bio["RSP"].keys())
232
233
    #ECG
234
    print(bio["ECG"]["HRV"])
235
    print(bio["ECG"]["R_Peaks"])
236
237
    #EDA
238
    print(bio["EDA"]["SCR_Peaks_Amplitudes"])
239
    print(bio["EDA"]["SCR_Onsets"])
240
241
242
    #RSP
243
    print(bio["RSP"]["Cycles_Onsets"])
244
    print(bio["RSP"]["Cycles_Length"])
245
    """
246
    print("Read data file")
247
    #Flow: Read data for all subjects -> Extract features (Preprocessing) -> Train the model
248