[ac138c]: / examples / datasets_io / plot_tuh_discrete_multitarget.py

Download this file

127 lines (101 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
"""
Multiple discrete targets with the TUH EEG Corpus
=================================================
Welcome to this tutorial where we demonstrate how to work with multiple discrete
targets for each recording in the TUH EEG Corpus. We'll guide you through the
process step by step.
"""
# Author: Lukas Gemein <l.gemein@gmail.com>
#
# License: BSD (3-clause)
import mne
from torch.utils.data import DataLoader
from braindecode.datasets import TUH
from braindecode.preprocessing import create_fixed_length_windows
# Setting Logging Level
# ----------------------
#
# We'll set the logging level to 'ERROR' to avoid excessive messages when
# extracting windows:
mne.set_log_level("ERROR") # avoid messages every time a window is extracted
###############################################################################
# If you want to try this code with the actual data, please delete the next
# section. We are required to mock some dataset functionality, since the data
# is not available at creation time of this example.
from braindecode.datasets.tuh import _TUHMock as TUH # noqa F811
###############################################################################
# Creating Temple University Hospital (TUH) EEG Corpus Dataset
# ------------------------------------------------------------
#
# We start by creating a TUH dataset. Instead of just a `str, we give it
# multiple strings as target names. Each of the strings has to exist as a
# column in the description DataFrame.
TUH_PATH = "please insert actual path to data here"
tuh = TUH(
path=TUH_PATH,
recording_ids=None,
target_name=("age", "gender"), # use both age and gender as decoding target
preload=False,
add_physician_reports=False,
)
print(tuh.description)
###############################################################################
# Exploring Data
# --------------
#
# Iterating through the dataset gives `x` as an ndarray with shape
# `(n_channels x 1)` and `y` as a list containing `[age of the subject, gender
# of the subject]`.
# Let's look at the last example as it has more interesting age/gender labels
# (compare to the last row of the dataframe above).
x, y = tuh[-1]
print(f"{x=}\n{y=}")
###############################################################################
# Creating Windows
# ----------------
#
# We will skip preprocessing steps for now, since it is not the aim of this
# example. Instead, we will directly create compute windows. We specify a
# mapping from genders 'M' and 'F' to integers, since this is required for
# decoding.
tuh_windows = create_fixed_length_windows(
tuh,
start_offset_samples=0,
stop_offset_samples=None,
window_size_samples=1000,
window_stride_samples=1000,
drop_last_window=False,
mapping={"M": 0, "F": 1}, # map non-digit targets
)
# store the number of windows required for loading later on
tuh_windows.set_description({"n_windows": [len(d) for d in tuh_windows.datasets]})
###############################################################################
# Exploring Windows
# -----------------
#
# Iterating through the dataset gives `x` as an ndarray with shape
# `(n_channels x 1000)`, `y` as `[age, gender]`, and `ind`.
# Let's look at the last example again.
x, y, ind = tuh_windows[-1]
print(f"{x=}\n{y=}\n{ind=}")
###############################################################################
# DataLoader for Model Training
# -----------------------------
#
# We give the dataset to a pytorch DataLoader, such that it can be used for
# model training.
dl = DataLoader(
dataset=tuh_windows,
batch_size=4,
)
###############################################################################
# Exploring DataLoader
# --------------------
#
# When iterating through the DataLoader, we get `batch_X` as a tensor with shape
# `(4 x n_channels x 1000)`, `batch_y` as `[tensor([4 x age of subject]),
# tensor([4 x gender of subject])]`, and `batch_ind`. To view the last example,
# simply iterate through the DataLoader:
for batch_X, batch_y, batch_ind in dl:
pass
print(f"{batch_X=}\n{batch_y=}\n{batch_ind=}")