[49dbd7]: / dataset.py

Download this file

112 lines (82 with data), 3.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import numpy as np
import pandas as pd
import scipy
import variables as v
def load_dataset(data_type="ica_filtered", test_type="Arithmetic"):
'''
Loads data from the SAM 40 Dataset.
Args:
data_type (string): The data type to load. Defaults to "ica_filtered".
test_type (string): The test type to load. Defaults to "Arithmetic".
Returns:
ndarray: The specified dataset.
'''
assert (test_type in v.TEST_TYPES)
assert (data_type in v.DATA_TYPES)
if data_type == "ica_filtered" and test_type != "Arithmetic":
print("Data of type", data_type, "does not have test type", test_type)
return 0
if data_type == "raw":
dir = v.DIR_RAW
data_key = 'Data'
elif data_type == "wt_filtered":
dir = v.DIR_FILTERED
data_key = 'Clean_data'
else:
dir = v.DIR_ICA_FILTERED
data_key = 'Clean_data'
dataset = np.empty((120, 32, 3200))
counter = 0
for filename in os.listdir(dir):
if test_type not in filename:
continue
f = os.path.join(dir, filename)
data = scipy.io.loadmat(f)[data_key]
dataset[counter] = data
counter += 1
return dataset
def load_labels():
'''
Loads labels from the dataset and transforms the label values to binary values.
Returns:
ndarray: The labels.
'''
labels = pd.read_excel(v.LABELS_PATH)
labels = labels.rename(columns=v.COLUMNS_TO_RENAME)
labels = labels[1:]
labels = labels.astype("int")
labels = labels > 5
return labels
def format_labels(labels, test_type="Arithmetic", epochs=1):
'''
Filter the labels and repeat for the specified amount of epochs.
Args:
labels (ndarray): The labels.
test_type (string): The test_type to filter by. Defaults to "Arithmetic".
epochs (int): The amount of epochs. Defaults to 1.
Returns:
ndarray: The formatted labels.
'''
assert (test_type in v.TEST_TYPES)
formatted_labels = []
for trial in v.TEST_TYPE_COLUMNS[test_type]:
formatted_labels.append(labels[trial])
formatted_labels = pd.concat(formatted_labels).to_numpy()
formatted_labels = formatted_labels.repeat(epochs)
return formatted_labels
def split_data(data, sfreq):
'''
Splits EEG data into epochs with length 1 sec.
Args:
data (ndarray): EEG data.
sfreq (int): The sampling frequency.
Returns:
ndarray: The epoched data.
'''
n_trials, n_channels, n_samples = data.shape
epoched_data = np.empty((n_trials, n_samples//sfreq, n_channels, sfreq))
for i in range(data.shape[0]):
for j in range(data.shape[2]//sfreq):
epoched_data[i, j] = data[i, :, j*sfreq:(j+1)*sfreq]
return epoched_data