Diff of /preprocess_HGD.py [000000] .. [56a69a]

Switch to side-by-side view

--- a
+++ b/preprocess_HGD.py
@@ -0,0 +1,129 @@
+""" 
+Copyright (C) 2022 King Saud University, Saudi Arabia 
+SPDX-License-Identifier: Apache-2.0 
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+this file except in compliance with the License. You may obtain a copy of the 
+License at
+
+http://www.apache.org/licenses/LICENSE-2.0  
+
+Unless required by applicable law or agreed to in writing, software distributed
+under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 
+CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License. 
+
+Author:  Hamdi Altaheri 
+"""
+
+#%%
+# We need the following to load and preprocess the High Gamma Dataset
+import numpy as np
+import logging
+from collections import OrderedDict
+from braindecode.datasets.bbci import BBCIDataset
+from braindecode.datautil.trial_segment import \
+    create_signal_target_from_raw_mne
+from braindecode.mne_ext.signalproc import mne_apply, resample_cnt
+from braindecode.datautil.signalproc import exponential_running_standardize
+from braindecode.datautil.signalproc import highpass_cnt
+
+#%%
+def load_HGD_data(data_path, subject, training, low_cut_hz =0, debug = False):
+    """ Loading training/testing data for the High Gamma Dataset (HGD)
+    for a specific subject.
+    
+    Please note that  HGD is for "executed movements" NOT "motor imagery"  
+    
+    This code is taken from https://github.com/robintibor/high-gamma-dataset 
+    You can download the HGD using the following link: 
+        https://gin.g-node.org/robintibor/high-gamma-dataset/src/master/data
+    The Braindecode library is required to load and processs the HGD dataset.
+   
+        Parameters
+        ----------
+        data_path: string
+            dataset path
+        subject: int
+            number of subject in [1, .. ,14]
+        training: bool
+            if True, load training data
+            if False, load testing data
+        debug: bool
+            if True, 
+            if False, 
+    """
+
+    log = logging.getLogger(__name__)
+    log.setLevel('DEBUG')
+
+    if training:  filename = (data_path + 'train/{}.mat'.format(subject))
+    else:         filename = (data_path + 'test/{}.mat'.format(subject))
+
+    load_sensor_names = None
+    if debug:
+        load_sensor_names = ['C3', 'C4', 'C2']
+    # we loaded all sensors to always get same cleaning results independent of sensor selection
+    # There is an inbuilt heuristic that tries to use only EEG channels and that definitely
+    # works for datasets in our paper
+    loader = BBCIDataset(filename, load_sensor_names=load_sensor_names)
+    
+    log.info("Loading data...")
+    cnt = loader.load()
+
+    # Cleaning: First find all trials that have absolute microvolt values
+    # larger than +- 800 inside them and remember them for removal later
+    log.info("Cutting trials...")
+
+    marker_def = OrderedDict([('Right Hand', [1]), ('Left Hand', [2],),
+                              ('Rest', [3]), ('Feet', [4])])
+    clean_ival = [0, 4000]
+
+    set_for_cleaning = create_signal_target_from_raw_mne(cnt, marker_def,
+                                                  clean_ival)
+
+    clean_trial_mask = np.max(np.abs(set_for_cleaning.X), axis=(1, 2)) < 800
+
+    log.info("Clean trials: {:3d}  of {:3d} ({:5.1f}%)".format(
+        np.sum(clean_trial_mask),
+        len(set_for_cleaning.X),
+        np.mean(clean_trial_mask) * 100))
+
+    # now pick only sensors with C in their name
+    # as they cover motor cortex
+    C_sensors = ['FC5', 'FC1', 'FC2', 'FC6', 'C3', 'C4', 'CP5',
+                 'CP1', 'CP2', 'CP6', 'FC3', 'FCz', 'FC4', 'C5', 'C1', 'C2',
+                 'C6',
+                 'CP3', 'CPz', 'CP4', 'FFC5h', 'FFC3h', 'FFC4h', 'FFC6h',
+                 'FCC5h',
+                 'FCC3h', 'FCC4h', 'FCC6h', 'CCP5h', 'CCP3h', 'CCP4h', 'CCP6h',
+                 'CPP5h',
+                 'CPP3h', 'CPP4h', 'CPP6h', 'FFC1h', 'FFC2h', 'FCC1h', 'FCC2h',
+                 'CCP1h',
+                 'CCP2h', 'CPP1h', 'CPP2h']
+    if debug:
+        C_sensors = load_sensor_names
+    cnt = cnt.pick_channels(C_sensors)
+
+    # Further preprocessings as descibed in paper
+    log.info("Resampling...")
+    cnt = resample_cnt(cnt, 250.0)
+    log.info("Highpassing...")
+    cnt = mne_apply(
+        lambda a: highpass_cnt(
+            a, low_cut_hz, cnt.info['sfreq'], filt_order=3, axis=1),
+        cnt)
+    log.info("Standardizing...")
+    cnt = mne_apply(
+        lambda a: exponential_running_standardize(a.T, factor_new=1e-3,
+                                                  init_block_size=1000,
+                                                  eps=1e-4).T,
+        cnt)
+
+    # Trial interval, start at -500 already, since improved decoding for networks
+    ival = [-500, 4000]
+
+    dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival)
+    dataset.X = dataset.X[clean_trial_mask]
+    dataset.y = dataset.y[clean_trial_mask]
+    return dataset.X, dataset.y