--- a
+++ b/Inference/preprocess_data.py
@@ -0,0 +1,114 @@
+import wfdb as wf
+import numpy as np
+import glob
+import os
+import matplotlib.pyplot as plt
+from tqdm import tqdm as tqdm
+from scipy import signal
+from sklearn.preprocessing import StandardScaler 
+### Use standard scaler
+
+def data_read(args):
+    
+    
+    patient_no = args.patient_no
+    patient_no -= 1
+
+    path_dir = args.path_dir
+    file_dirs = sorted(glob.glob(os.path.join(path_dir,'infant*')))   
+    patient = {'ecg':[],'resp':[],'r_peaks':[],'resp_peaks':[],'brad_onset':[],'ecg_fs':[],'resp_fs':[]}
+    
+    for i in range(0,len(file_dirs),7):
+        if (i == 7 * patient_no):
+            
+            print('-------- Acquiring data from patient {} --------'.format(patient_no + 1))
+
+            ecg_file_name = os.path.splitext(file_dirs[i + 1])[0] 
+            resp_file_name = os.path.splitext(file_dirs[i + 5])[0]
+            qrsc_ext = os.path.splitext(file_dirs[i + 3])[1][1:]
+            resp_ext = os.path.splitext(file_dirs[i + 6])[1][1:]
+            brad_onset_ext = os.path.splitext(file_dirs[i + 6])[1][1:]
+            atr_ext = os.path.splitext(file_dirs[i])[1][1:]
+                
+            ecg_sample_rate = wf.rdsamp(ecg_file_name)[-1]['fs']
+            resp_sample_rate = wf.rdsamp(resp_file_name)[-1]['fs']
+            ecg = wf.io.rdrecord(ecg_file_name).p_signal 
+            resp = wf.io.rdrecord(resp_file_name).p_signal
+            r_peaks_loc = wf.rdann(ecg_file_name,qrsc_ext).sample
+            resp_peak_loc = wf.rdann(resp_file_name,resp_ext).sample
+            brad_onset = wf.rdann(ecg_file_name,atr_ext).sample
+                
+            patient['ecg'].append(ecg)
+            patient['resp'].append(resp)
+            patient['r_peaks'].append(resp_peak_loc)
+            patient['resp_peaks'].append(resp_peak_loc)
+            patient['ecg_fs'].append(ecg_sample_rate)
+            patient['resp_fs'].append(resp_sample_rate)
+            patient['brad_onset'].append(brad_onset)
+
+    return patient
+
+def windowing_and_resampling_hr(patient):
+    
+    no_sec = 10
+    final_ecg_sample_rate = 500
+    final_resp_sample_rate = 50
+
+    windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
+    windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
+    infant_no = 0
+
+    for patients in patient[ 'ecg']:
+        fs = patient['ecg_fs'][infant_no]
+        window_len = no_sec * fs
+        for i in range(len(patients) // (window_len-fs)):
+            windowed_patient_overlap['ecg'][infant_no].append(patients[(window_len-fs) * i : ((window_len-fs) * i + window_len)])
+            windowed_patient['ecg'][infant_no].append(patients[window_len * i : window_len * (i+1) ])
+            if (final_ecg_sample_rate * no_sec) != window_len:
+                windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
+                windowed_patient['ecg'][infant_no][i] = signal.resample(windowed_patient['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
+        
+        infant_no += 1
+
+    infant_no = 0
+    for patients in patient['resp']:
+        window_len = no_sec * patient['resp_fs'][infant_no] 
+        for i in range(len(patients) // window_len):
+            windowed_patient['resp'][infant_no].append(patients[window_len * i : window_len * (i+1)])
+            if (final_resp_sample_rate * no_sec) != window_len:
+                windowed_patient['resp'][infant_no][i] = custom_resample(windowed_patient['resp'][infant_no][i],patient['resp_fs'][infant_no])
+        infant_no += 1
+
+    return windowed_patient_overlap,windowed_patient
+
+def windowing_and_resampling_br(patient):
+    
+    scaler = StandardScaler()
+    no_sec = 10
+    final_ecg_sample_rate = 500
+    final_resp_sample_rate = 50
+
+    windowed_patient = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
+    windowed_patient_overlap = {'ecg':[[] for i in range(10)],'resp':[[] for i in range(10)]}
+    infant_no = 0
+    overlap_percent = 50
+    overlap = int(50 / 100 * 5000)
+    
+    for patients in patient[ 'ecg']:
+        
+        fs = patient['ecg_fs'][infant_no]
+        window_len = no_sec * fs
+        for i in range(len(patients) // (window_len-overlap)):
+            windowed_patient_overlap['ecg'][infant_no].append(scaler.fit_transform(patients[(window_len-overlap) * i : ((window_len-overlap) * i + window_len)]))
+            if (final_ecg_sample_rate * no_sec) != window_len:
+                windowed_patient_overlap['ecg'][infant_no][i] = signal.resample(windowed_patient_overlap['ecg'][infant_no][i],final_ecg_sample_rate * no_sec)
+        
+        infant_no += 1
+    return windowed_patient_overlap
+
+def custom_resample(resp,fs):
+    
+    modified_resp = []
+    for i in range(int(len(resp) * 50/fs)):
+        modified_resp.append(resp[int(fs/50*i)].astype(float)) 
+    return np.asarray(modified_resp)