--- a +++ b/Download_Raw_EEG_Data/Extract-Raw-Data-Into-Matlab-Files.py @@ -0,0 +1,282 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' + +Please see this before you run this file!!!!!! + +NOTICE: + Be advised that this “Extract-Raw-Data-Into-Matlab-Files.py” Python File + should only be executed under the Python 2 Environment. + I highly recommend to execute the file under the Python 2.7 Environment + because I have passed the test. + However, if you are using Python 3 Environment to run this file, + I'm afraid there will be an error and the generated labels will be wrong. + + If you have any question, please be sure to let me know. + My email is shuyuej@ieee.org. + Thanks a lot. + +''' +import numpy as np +import io +import os +import pyedflib +import scipy.io as sio +from scipy.signal import butter, filtfilt, iirnotch, lfilter + +MOVEMENT_START = 1 * 160 # MI starts 1s after trial begin +MOVEMENT_END = 5 * 160 # MI lasts 4 seconds +NOISE_LEVEL = 0.01 + +PHYSIONET_ELECTRODES = { + 1: "FC5", 2: "FC3", 3: "FC1", 4: "FCz", 5: "FC2", 6: "FC4", + 7: "FC6", 8: "C5", 9: "C3", 10: "C1", 11: "Cz", 12: "C2", + 13: "C4", 14: "C6", 15: "CP5", 16: "CP3", 17: "CP1", 18: "CPz", + 19: "CP2", 20: "CP4", 21: "CP6", 22: "Fp1", 23: "Fpz", 24: "Fp2", + 25: "AF7", 26: "AF3", 27: "AFz", 28: "AF4", 29: "AF8", 30: "F7", + 31: "F5", 32: "F3", 33: "F1", 34: "Fz", 35: "F2", 36: "F4", + 37: "F6", 38: "F8", 39: "FT7", 40: "FT8", 41: "T7", 42: "T8", + 43: "T9", 44: "T10", 45: "TP7", 46: "TP8", 47: "P7", 48: "P5", + 49: "P3", 50: "P1", 51: "Pz", 52: "P2", 53: "P4", 54: "P6", + 55: "P8", 56: "PO7", 57: "PO3", 58: "POz", 59: "PO4", 60: "PO8", + 61: "O1", 62: "Oz", 63: "O2", 64: "Iz"} + + +def load_edf_signals(path): + try: + sig = pyedflib.EdfReader(path) + n = sig.signals_in_file + signal_labels = sig.getSignalLabels() + sigbuf = np.zeros((n, sig.getNSamples()[0])) + + for j in np.arange(n): + sigbuf[j, :] = sig.readSignal(j) + # (n,3) annotations: [t in s, duration, type T0/T1/T2] + annotations = sig.read_annotation() + except KeyboardInterrupt: + # prevent memory leak and access problems of unclosed buffers + sig._close() + raise + + sig._close() + del sig + return sigbuf.transpose(), annotations + + +def get_physionet_electrode_positions(): + refpos = get_electrode_positions() + return np.array([refpos[PHYSIONET_ELECTRODES[idx]] for idx in range(1, 65)]) + + +def projection_2d(loc): + """ + Azimuthal equidistant projection (AEP) of 3D carthesian coordinates. + Preserves distance to origin while projecting to 2D carthesian space. + loc: N x 3 array of 3D points + returns: N x 2 array of projected 2D points + """ + x, y, z = loc[:, 0], loc[:, 1], loc[:, 2] + theta = np.arctan2(y, x) # theta = azimuth + rho = np.pi / 2 - np.arctan2(z, np.hypot(x, y)) # rho = pi/2 - elevation + return np.stack((np.multiply(rho, np.cos(theta)), np.multiply(rho, np.sin(theta))), 1) + + +def get_electrode_positions(): + """ + Returns a dictionary (Name) -> (x,y,z) of electrode name in the extended + 10-20 system and its carthesian coordinates in unit sphere. + """ + positions = dict() + with io.open("electrode_positions.txt", "r") as pos_file: + for line in pos_file: + parts = line.split() + positions[parts[0]] = tuple([float(part) for part in parts[1:]]) + return positions + + +def load_physionet_data(subject_id, num_classes=4, long_edge=False): + """ + subject_id: ID (1-109) for the subject to be loaded from file + num_classes: number of classes (2, 3 or 4) for L/R, L/R/0, L/R/0/F + long_edge: if False include 1s before and after MI, if True include 3s + returns (X, y, pos, fs) + X: Trials with shape (N_subjects, N_trials, N_samples, N_channels) + y: labels with shape (N_subjects, N_trials, N_classes) + pos: 2D projected electrode positions + fs: sample rate + """ + + SAMPLE_RATE = 160 + EEG_CHANNELS = 64 + BASELINE_RUN = 1 + + MI_RUNS = [4, 8, 12] # l/r fist + if num_classes >= 4: + MI_RUNS += [6, 10, 14] # feet (& fists) + + # total number of samples per long run + RUN_LENGTH = 125 * SAMPLE_RATE + + # length of single trial in seconds + TRIAL_LENGTH = 4 if not long_edge else 6 + NUM_TRIALS = 21 * num_classes + + n_runs = len(MI_RUNS) + X = np.zeros((n_runs, RUN_LENGTH, EEG_CHANNELS)) + events = [] + + base_path = 'S%03dR%02d.edf' + + for i_run, current_run in enumerate(MI_RUNS): + # load from file + path = base_path % (subject_id, current_run) + signals, annotations = load_edf_signals(path) + X[i_run, :signals.shape[0], :] = signals + + # read annotations + current_event = [i_run, 0, 0, 0] # run, class (l/r), start, end + + for annotation in annotations: + t = int(annotation[0] * SAMPLE_RATE * 1e-7) + action = int(annotation[2][1]) + + if action == 0 and current_event[1] != 0: + # make 6 second runs by extending snippet + length = TRIAL_LENGTH * SAMPLE_RATE + pad = (length - (t - current_event[2])) / 2 + + current_event[2] -= pad + (t - current_event[2]) % 2 + current_event[3] = t + pad + + if (current_run - 6) % 4 != 0 or current_event[1] == 2: + if (current_run - 6) % 4 == 0: + current_event[1] = 3 + events.append(current_event) + + elif action > 0: + current_event = [i_run, action, t, 0] + + # split runs into trials + num_mi_trials = len(events) + trials = np.zeros((NUM_TRIALS, TRIAL_LENGTH * SAMPLE_RATE, EEG_CHANNELS)) + labels = np.zeros((NUM_TRIALS, num_classes)) + + for i, ev in enumerate(events): + trials[i, :, :] = X[ev[0], ev[2]:ev[3]] + labels[i, ev[1] - 1] = 1. + + if num_classes < 3: + return (trials[:num_mi_trials, ...], + labels[:num_mi_trials, ...], + projection_2d(get_physionet_electrode_positions()), + SAMPLE_RATE) + else: + # baseline run + path = base_path % (subject_id, BASELINE_RUN) + signals, annotations = load_edf_signals(path) + SAMPLES = TRIAL_LENGTH * SAMPLE_RATE + for i in range(num_mi_trials, NUM_TRIALS): + offset = np.random.randint(0, signals.shape[0] - SAMPLES) + trials[i, :, :] = signals[offset: offset+SAMPLES, :] + labels[i, -1] = 1. + return trials, labels, projection_2d(get_physionet_electrode_positions()), SAMPLE_RATE + + +def load_raw_data(electrodes, subject=None, num_classes=4, long_edge=False): + # load from file + trials = [] + labels = [] + + if subject == None: + subject_ids = range(11, 15) + else: + try: + subject_ids = [int(subject)] + except: + subject_ids = subject + + for subject_id in subject_ids: + try: + t, l, loc, fs = load_physionet_data(subject_id, num_classes, long_edge=long_edge) + if num_classes == 2 and t.shape[0] != 42: + # drop subjects with less trials + continue + trials.append(t[:, :, electrodes]) + labels.append(l) + except: + pass + return np.array(trials, dtype=np.float64).reshape((len(trials),) + trials[0].shape + (1,)), \ + np.array(labels, dtype=np.float64) + + +def butter_bandpass(lowcut, highcut, fs, order=5): + nyq = 0.5 * fs + low = lowcut / nyq + high = highcut / nyq + b, a = butter(order, [low, high], btype='band') + return b, a + + +def butter_bandpass_filter(data, lowcut, highcut, fs, order=5): + b, a = butter_bandpass(lowcut, highcut, fs, order=order) + y = filtfilt(b, a, data) + return y + + +print("Start to save the Files!") + + +# Sample rate and desired cutoff frequencies (in Hz). +fs = 160.0 +lowcut = 5.0 +highcut = 30.0 + +# Save Dataset of 4 clases +nclasses = 4 + +# Save 20 subjects' dataset +SAVE = '20-Subjects' +if not os.path.exists(SAVE): + os.mkdir(SAVE) + +subject = range(1, 21) + +# Save 64 electrodes +for i in range(0, 64): + electrodes = [i] + X = 'X_' + str(i) + Y = 'Y_' + str(i) + X, Y = load_raw_data(electrodes=electrodes, subject=subject, num_classes=nclasses) + X = np.squeeze(X) + + sio.savemat(SAVE + 'Dataset_%d.mat' % int(i+1), {'Dataset': X}) + sio.savemat(SAVE + 'Labels_%d.mat' % int(i+1), {'Labels': Y}) + + print("Finished saving %d electrodes" % int(i+1)) + +# SAVE = '100-Subjects/' +# +# if not os.path.exists(SAVE): +# os.mkdir(SAVE) +# +# subject = range(1, 102) +# +# for i in range(0, 64): +# electrodes = [i] +# X = 'X_' + str(i) +# Y = 'Y_' + str(i) +# X, Y = load_raw_data(electrodes=electrodes, subject=subject, num_classes=nclasses) +# X = np.squeeze(X) +# +# # # Notch Filter +# # b, a = iirnotch(w0=60.0, Q=30.0, fs=fs) +# # X = lfilter(b, a, X) +# # +# # # Butterworth Band-pass filter +# # X = butter_bandpass_filter(X, lowcut, highcut, fs, order=4) +# +# sio.savemat(SAVE + 'Dataset_%d.mat' % int(i+1), {'Dataset': X}) +# sio.savemat(SAVE + 'Labels_%d.mat' % int(i+1), {'Labels': Y}) +# +# print("Finished saving %d electrodes" % int(i+1))