Diff of /dataset.py [000000] .. [49dbd7]

Switch to side-by-side view

--- a
+++ b/dataset.py
@@ -0,0 +1,111 @@
+import os
+import numpy as np
+import pandas as pd
+import scipy
+import variables as v
+
+def load_dataset(data_type="ica_filtered", test_type="Arithmetic"):
+    '''
+    Loads data from the SAM 40 Dataset.
+    
+    Args:
+        data_type (string): The data type to load. Defaults to "ica_filtered".
+        test_type (string): The test type to load. Defaults to "Arithmetic".
+    
+    Returns:
+        ndarray: The specified dataset.
+
+    '''
+    assert (test_type in v.TEST_TYPES)
+
+    assert (data_type in v.DATA_TYPES)
+
+    if data_type == "ica_filtered" and test_type != "Arithmetic":
+        print("Data of type", data_type, "does not have test type", test_type)
+        return 0
+
+    if data_type == "raw":
+        dir = v.DIR_RAW
+        data_key = 'Data'
+    elif data_type == "wt_filtered":
+        dir = v.DIR_FILTERED
+        data_key = 'Clean_data'
+    else:
+        dir = v.DIR_ICA_FILTERED
+        data_key = 'Clean_data'
+        
+    dataset = np.empty((120, 32, 3200))
+
+    counter = 0
+    for filename in os.listdir(dir):
+        if test_type not in filename:
+            continue
+
+        f = os.path.join(dir, filename)
+        data = scipy.io.loadmat(f)[data_key]
+        dataset[counter] = data
+        counter += 1
+    return dataset
+
+
+def load_labels():
+    '''
+    Loads labels from the dataset and transforms the label values to binary values.
+
+    Returns:
+        ndarray: The labels.
+    '''
+    labels = pd.read_excel(v.LABELS_PATH)
+    labels = labels.rename(columns=v.COLUMNS_TO_RENAME)
+    labels = labels[1:]
+    labels = labels.astype("int")
+    labels = labels > 5
+    return labels
+
+
+def format_labels(labels, test_type="Arithmetic", epochs=1):
+    '''
+    Filter the labels and repeat for the specified amount of epochs.
+
+    Args:
+        labels (ndarray): The labels.
+        test_type (string): The test_type to filter by. Defaults to "Arithmetic".
+        epochs (int): The amount of epochs. Defaults to 1.
+
+    Returns:
+        ndarray: The formatted labels.
+
+    '''
+    assert (test_type in v.TEST_TYPES)
+
+    formatted_labels = []
+    for trial in v.TEST_TYPE_COLUMNS[test_type]:
+        formatted_labels.append(labels[trial])
+
+    formatted_labels = pd.concat(formatted_labels).to_numpy()
+
+    formatted_labels = formatted_labels.repeat(epochs)
+
+    return formatted_labels
+
+
+def split_data(data, sfreq):
+    '''
+    Splits EEG data into epochs with length 1 sec.
+
+    Args:
+        data (ndarray): EEG data.
+        sfreq (int): The sampling frequency.
+    
+    Returns:
+        ndarray: The epoched data.
+
+    '''
+
+    n_trials, n_channels, n_samples = data.shape
+
+    epoched_data = np.empty((n_trials, n_samples//sfreq, n_channels, sfreq))
+    for i in range(data.shape[0]):
+        for j in range(data.shape[2]//sfreq):
+            epoched_data[i, j] = data[i, :, j*sfreq:(j+1)*sfreq]
+    return epoched_data