--- a +++ b/ecgtoBR/create_dataset.py @@ -0,0 +1,92 @@ +import numpy as np +import os +import pandas as pd +import wfdb as wf +import argparse +from glob import glob +from tqdm import tqdm +from scipy.signal import resample +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.model_selection import train_test_split +import wget + +import torch + +from utils import dist_transform,getWindow + +def data_preprocess(args): + + """ Preprocess data and create train - validate split + """ + + dat_path = os.path.join(args.data_path,'*.dat') + paths = glob(dat_path) + + paths= sorted([path[:-4] for path in paths if path[-5] != "n"] ) + + fs = args.sampling_freq + fs_upsample = args.upsample_freq + WINDOWS = args.window_length + + ecgSignals = [] + BRSignals = [] + BRAnn1 = [] + BRAnn2 = [] + + for path in tqdm(paths): + + ann = wf.rdann(path,'breath') + samples = np.array(ann.sample) + aux_note = np.array(ann.aux_note) + ann1 = samples[(aux_note == "ann1")] + ann2 = samples[(aux_note == "ann2")] + record = wf.io.rdrecord(path) + + ecgSignals.append(record.p_signal[:,record.sig_name.index('II,')]) + BRSignals.append(record.p_signal[:,record.sig_name.index('RESP,')]) + BRAnn1.append(ann1) + BRAnn2.append(ann2) + + ecgSignals = np.array(ecgSignals,ndmin = 2) + BRSignals = np.array(BRSignals, ndmin = 2) + + signals = np.stack([ecgSignals,BRSignals], axis= -1 ) + + inputECG = [] + groundTruth = [] + + for i in tqdm(range(len(signals))): + + generateSignals = getWindow(signals[i],BRAnn2[i],windows=WINDOWS) + + for sig, ann in generateSignals: + + ecg = sig[:,0] + br = sig[:,1] + + if len(ecg) == 1 or len(ann) == 0: + break + + resampled = resample(ecg, WINDOWS*fs_upsample) + scaler = StandardScaler() + resampled = scaler.fit_transform(resampled.reshape((-1,1))) + transform = dist_transform(br,ann) + + if resampled.shape == (fs_upsample*WINDOWS,1) and transform.shape == (WINDOWS*fs,1): + inputECG.append(resampled.reshape((1,-1))) + groundTruth.append(transform.reshape((1,-1))) + + X_train,X_test,y_train,y_test = train_test_split(np.array(inputECG),np.array(groundTruth),test_size = 0.2, random_state = 42) + + X_train_toTensor = torch.Tensor(X_train).type(torch.float) + X_test_toTensor = torch.Tensor(X_test).type(torch.float) + y_train_toTensor = torch.Tensor(y_train).type(torch.float) + y_test_toTensor = torch.Tensor(y_test).type(torch.float) + + if not(os.path.exists('data')): + os.mkdir('data') + + torch.save(X_train_toTensor, "data/ecgtoBR_train_data.pt") + torch.save(y_train_toTensor, "data/ecgtoBR_train_labels.pt") + torch.save(X_test_toTensor, "data/ecgtoBR_test_data.pt") + torch.save(y_test_toTensor, "data/ecgtoBR_test_labels.pt") \ No newline at end of file