--- a +++ b/read_data.py @@ -0,0 +1,248 @@ +import os +import pickle +import numpy as np +import matplotlib.pyplot as plt +import neurokit as nk +import seaborn as sns +import pandas as pd + +def load_data(path, subject): + """Given path and subject, load the data of the subject""" + os.chdir(path) + os.chdir(subject) + with open(subject + '.pkl', 'rb') as file: + data = pickle.load(file, encoding='latin1') + return data + +class read_data_one_subject: + """Read data from WESAD dataset""" + def __init__(self, path, subject): + self.keys = ['label', 'subject', 'signal'] + self.signal_keys = ['wrist', 'chest'] + self.chest_sensor_keys = ['ACC', 'ECG', 'EDA', 'EMG', 'Resp', 'Temp'] + self.wrist_sensor_keys = ['ACC', 'BVP', 'EDA', 'TEMP'] + os.chdir(path) + os.chdir(subject) + with open(subject + '.pkl', 'rb') as file: + data = pickle.load(file, encoding='latin1') + self.data = data + + def get_labels(self): + return self.data[self.keys[0]] + + def get_wrist_data(self): + """""" + #label = self.data[self.keys[0]] + #assert subject == self.data[self.keys[1]] + signal = self.data[self.keys[2]] + wrist_data = signal[self.signal_keys[0]] + #wrist_ACC = wrist_data[self.wrist_sensor_keys[0]] + #wrist_ECG = wrist_data[self.wrist_sensor_keys[1]] + return wrist_data + + def get_chest_data(self): + """""" + signal = self.data[self.keys[2]] + chest_data = signal[self.signal_keys[1]] + return chest_data + +def extract_mean_std_features(ecg_data, label=0, block = 700): + #print (len(ecg_data)) + i = 0 + mean_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) + std_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) + max_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) + min_features = np.empty(int(len(ecg_data)/block), dtype=np.float64) + + idx = 0 + while i < len(ecg_data): + temp = ecg_data[i:i+block] + #print(len(temp)) + if idx < int(len(ecg_data)/block): + mean_features[idx] = np.mean(temp) + std_features[idx] = np.std(temp) + min_features[idx] = np.amin(temp) + max_features[idx] = np.amax(temp) + i += 700 + idx += 1 + #print(len(mean_features), len(std_features)) + #print(mean_features, std_features) + features = {'mean':mean_features, 'std':std_features, 'min':min_features, 'max':max_features} + + one_set = np.column_stack((mean_features, std_features, min_features, max_features)) + return one_set + +def extract_one(chest_data_dict, idx, l_condition=0): + ecg_data = chest_data_dict["ECG"][idx].flatten() + ecg_features = extract_mean_std_features(ecg_data, label=l_condition) + #print(ecg_features.shape) + + eda_data = chest_data_dict["EDA"][idx].flatten() + eda_features = extract_mean_std_features(eda_data, label=l_condition) + #print(eda_features.shape) + + emg_data = chest_data_dict["EMG"][idx].flatten() + emg_features = extract_mean_std_features(emg_data, label=l_condition) + #print(emg_features.shape) + + temp_data = chest_data_dict["Temp"][idx].flatten() + temp_features = extract_mean_std_features(temp_data, label=l_condition) + #print(temp_features.shape) + + baseline_data = np.hstack((eda_features, temp_features, ecg_features, emg_features)) + #print(len(baseline_data)) + label_array = np.full(len(baseline_data), l_condition) + #print(label_array.shape) + #print(baseline_data.shape) + baseline_data = np.column_stack((baseline_data, label_array)) + #print(baseline_data.shape) + return baseline_data + +def recur_print(ecg): + while ecg is dict: + print(ecg.keys()) + for k in ecg.keys(): + recur_print(ecg[k]) + +def execute(): + data_set_path = "/media/jac/New Volume/Datasets/WESAD" + file_path = "ecg.txt" + subject = 'S3' + obj_data = {} + labels = {} + all_data = {} + subs = [2, 3, 4, 5, 6] + for i in subs: + subject = 'S' + str(i) + print("Reading data", subject) + obj_data[subject] = read_data_one_subject(data_set_path, subject) + labels[subject] = obj_data[subject].get_labels() + + wrist_data_dict = obj_data[subject].get_wrist_data() + wrist_dict_length = {key: len(value) for key, value in wrist_data_dict.items()} + + chest_data_dict = obj_data[subject].get_chest_data() + chest_dict_length = {key: len(value) for key, value in chest_data_dict.items()} + print(chest_dict_length) + chest_data = np.concatenate((chest_data_dict['ACC'], chest_data_dict['ECG'], chest_data_dict['EDA'], + chest_data_dict['EMG'], chest_data_dict['Resp'], chest_data_dict['Temp']), axis=1) + # Get labels + + + # 'ACC' : 3, 'ECG' 1: , 'EDA' : 1, 'EMG': 1, 'RESP': 1, 'Temp': 1 ===> Total dimensions : 8 + # No. of Labels ==> 8 ; 0 = not defined / transient, 1 = baseline, 2 = stress, 3 = amusement, + # 4 = meditation, 5/6/7 = should be ignored in this dataset + + # Do for each subject + baseline = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 1]) + # print("Baseline:", chest_data_dict['ECG'][baseline].shape) + # print(baseline.shape) + + stress = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 2]) + # print(stress.shape) + + amusement = np.asarray([idx for idx, val in enumerate(labels[subject]) if val == 3]) + # print(amusement.shape) + + baseline_data = extract_one(chest_data_dict, baseline, l_condition=1) + stress_data = extract_one(chest_data_dict, stress, l_condition=2) + amusement_data = extract_one(chest_data_dict, amusement, l_condition=3) + + full_data = np.vstack((baseline_data, stress_data, amusement_data)) + print("One subject data", full_data.shape) + all_data[subject] = full_data + + i = 0 + for k, v in all_data.items(): + if i == 0: + data = all_data[k] + i += 1 + print(all_data[k].shape) + data = np.vstack((data, all_data[k])) + + print(data.shape) + return data + +if __name__ == '__main__': + execute() + """ + ecg, eda = chest_data_dict['ECG'], chest_data_dict['EDA'] + x = [i for i in range(len(baseline))] + for one in baseline: + x = [i for i in range(99)] + plt.plot(x, ecg[one:100]) + break + """ + #x = [i for i in range(10000)] + #plt.plot(x, chest_data_dict['ECG'][:10000]) + #plt.show() + + # BASELINE + + # [ecg_features[k] for k in ecg_features.keys()]) + + #ecg = nk.ecg_process(ecg=ecg_data, rsp=chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700) + #print(os.getcwd()) + + """ + #recur_print + print(type(ecg)) + print(ecg.keys()) + for k in ecg.keys(): + print(k) + for i in ecg[k].keys(): + print(i) + + + resp = nk.eda_process(eda=chest_data_dict['EDA'][baseline].flatten(), sampling_rate=700) + resp = nk.rsp_process(chest_data_dict['Resp'][baseline].flatten(), sampling_rate=700) + for k in resp.keys(): + print(k) + for i in resp[k].keys(): + print(i) + + # For baseline, compute mean, std, for each 700 samples. (1 second values) + + #file_path = os.getcwd() + with open(file_path, "w") as file: + #file.write(str(ecg['df'])) + file.write(str(ecg['ECG']['HRV']['RR_Intervals'])) + file.write("...") + file.write(str(ecg['RSP'])) + #file.write("RESP................") + #file.write(str(resp['RSP'])) + #file.write(str(resp['df'])) + #print(type(ecg['ECG']['HRV']['RR_Intervals'])) + + #file.write(str(ecg['ECG']['Cardiac_Cycles'])) + #print(type(ecg['ECG']['Cardiac_Cycles'])) + + #file.write(ecg['ECG']['Cardiac_Cycles'].to_csv()) + + # Plot the processed dataframe, normalizing all variables for viewing purpose + """ + """ + bio = nk.bio_process(ecg=chest_data_dict["ECG"][baseline].flatten(), rsp=chest_data_dict['Resp'][baseline].flatten() + , eda=chest_data_dict["EDA"][baseline].flatten(), sampling_rate=700) + #nk.z_score(bio["df"]).plot() + + print(bio["ECG"].keys()) + print(bio["EDA"].keys()) + print(bio["RSP"].keys()) + + #ECG + print(bio["ECG"]["HRV"]) + print(bio["ECG"]["R_Peaks"]) + + #EDA + print(bio["EDA"]["SCR_Peaks_Amplitudes"]) + print(bio["EDA"]["SCR_Onsets"]) + + + #RSP + print(bio["RSP"]["Cycles_Onsets"]) + print(bio["RSP"]["Cycles_Length"]) + """ + print("Read data file") + #Flow: Read data for all subjects -> Extract features (Preprocessing) -> Train the model +