"""
.. _tut-reject-data-spans:
===================================
Rejecting bad data spans and breaks
===================================
This tutorial covers:
- manual marking of bad spans of data,
- automated rejection of data spans based on signal amplitude, and
- automated detection of breaks during an experiment.
We begin as always by importing the necessary Python modules and loading some
:ref:`example data <sample-dataset>`; to save memory we'll use a pre-filtered
and downsampled version of the example data, and we'll also load an events
array to use when converting the continuous data to epochs:
"""
# Authors: The MNE-Python contributors.
# License: BSD-3-Clause
# Copyright the MNE-Python contributors.
# %%
import os
import numpy as np
import mne
sample_data_folder = mne.datasets.sample.data_path()
sample_data_raw_file = os.path.join(
sample_data_folder, "MEG", "sample", "sample_audvis_filt-0-40_raw.fif"
)
raw = mne.io.read_raw_fif(sample_data_raw_file, verbose=False)
events_file = os.path.join(
sample_data_folder, "MEG", "sample", "sample_audvis_filt-0-40_raw-eve.fif"
)
events = mne.read_events(events_file)
# %%
# Annotating bad spans of data
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# The tutorial :ref:`tut-events-vs-annotations` describes how
# :class:`~mne.Annotations` can be read from embedded events in the raw
# recording file, and :ref:`tut-annotate-raw` describes in detail how to
# interactively annotate a :class:`~mne.io.Raw` data object. Here, we focus on
# best practices for annotating *bad* data spans so that they will be excluded
# from your analysis pipeline.
#
#
# The ``reject_by_annotation`` parameter
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# In the interactive ``raw.plot()`` window, the annotation controls can be
# opened by pressing :kbd:`a`. Here, new annotation labels can be created or
# existing annotation labels can be selected for use.
fig = raw.plot()
fig.fake_keypress("a") # Simulates user pressing 'a' on the keyboard.
# %%
# You can see that you need to add a description first to start with
# marking spans (Push the button "Add Description" and enter the description).
# You can use any description you like, but annotations marking spans that
# should be excluded from the analysis pipeline should all begin with "BAD" or
# "bad" (e.g., "bad_cough", "bad-eyes-closed", "bad door slamming", etc). When
# this practice is followed, many processing steps in MNE-Python will
# automatically exclude the "bad"-labelled spans of data; this behavior is
# controlled by a parameter ``reject_by_annotation`` that can be found in many
# MNE-Python functions or class constructors, including:
#
# - creation of epoched data from continuous data (:class:`mne.Epochs`)
# - many methods of the independent components analysis class
# (:class:`mne.preprocessing.ICA`)
# - functions for finding heartbeat and blink artifacts
# (:func:`~mne.preprocessing.find_ecg_events`,
# :func:`~mne.preprocessing.find_eog_events`)
# - covariance computations (:func:`mne.compute_raw_covariance`)
# - power spectral density computation (:meth:`mne.io.Raw.compute_psd`)
#
# For example, when creating epochs from continuous data, if
# ``reject_by_annotation=True`` the :class:`~mne.Epochs` constructor will drop
# any epoch that partially or fully overlaps with an annotated span that begins
# with "bad".
#
#
# Generating annotations programmatically
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# The :ref:`tut-artifact-overview` tutorial introduced the artifact detection
# functions :func:`~mne.preprocessing.find_eog_events` and
# :func:`~mne.preprocessing.find_ecg_events` (although that tutorial mostly
# relied on their higher-level wrappers
# :func:`~mne.preprocessing.create_eog_epochs` and
# :func:`~mne.preprocessing.create_ecg_epochs`). Here, for demonstration
# purposes, we make use of the lower-level artifact detection function to get
# an events array telling us where the blinks are, then automatically add
# "bad_blink" annotations around them (this is not necessary when using
# :func:`~mne.preprocessing.create_eog_epochs`, it is done here just to show
# how annotations are added non-interactively). We'll start the annotations
# 250 ms before the blink and end them 250 ms after it:
# sphinx_gallery_thumbnail_number = 3
eog_events = mne.preprocessing.find_eog_events(raw)
onsets = eog_events[:, 0] / raw.info["sfreq"] - 0.25
durations = [0.5] * len(eog_events)
descriptions = ["bad blink"] * len(eog_events)
blink_annot = mne.Annotations(
onsets, durations, descriptions, orig_time=raw.info["meas_date"]
)
raw.set_annotations(blink_annot)
# %%
# Now we can confirm that the annotations are centered on the EOG events. Since
# blinks are usually easiest to see in the EEG channels, we'll only plot EEG
# here:
eeg_picks = mne.pick_types(raw.info, meg=False, eeg=True)
raw.plot(events=eog_events, order=eeg_picks)
# %%
# See the section :ref:`tut-section-programmatic-annotations` for more details
# on creating annotations programmatically.
#
# Detecting and annotating breaks
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Another useful function, albeit not related to artifact detection *per se*,
# is `mne.preprocessing.annotate_break`: It will generate annotations for
# segments of the data where no existing annotations (or, alternatively:
# events) can be found. It can therefore be used to automatically detect and
# mark breaks, e.g. between experimental blocks, when recording continued.
#
# For the sake of this example, let's assume an experiment consisting of two
# blocks, the first one stretching from 30 to 90, and the second from 120 to
# 180 seconds. We'll mark these blocks by annotations, and then use
# `mne.preprocessing.annotate_break` to detect and annotate any breaks.
#
# .. note:: We need to take ``raw.first_time`` into account, otherwise the
# onsets will be incorrect!
onsets = [raw.first_time + 30, raw.first_time + 180]
durations = [60, 60]
descriptions = ["block_1", "block_2"]
block_annots = mne.Annotations(
onset=onsets,
duration=durations,
description=descriptions,
orig_time=raw.info["meas_date"],
)
raw.set_annotations(raw.annotations + block_annots) # add to existing
raw.plot()
# %%
# Now detect break periods. We can control how far the break annotations shall
# expand toward both ends of each break.
break_annots = mne.preprocessing.annotate_break(
raw=raw,
min_break_duration=20, # consider segments of at least 20 s duration
t_start_after_previous=5, # start annotation 5 s after end of previous one
t_stop_before_next=2, # stop annotation 2 s before beginning of next one
)
raw.set_annotations(raw.annotations + break_annots) # add to existing
raw.plot()
# %%
# You can see that 3 segments have been annotated as ``BAD_break``:
#
# - the first one starting with the beginning of the recording and ending 2
# seconds before the beginning of block 1 (due to ``t_stop_before_next=2``),
# - the second one starting 5 seconds after block 1 has ended, and ending 2
# seconds before the beginning of block 2 (``t_start_after_previous=5``,
# ``t_stop_before_next=2``),
# - and the last one starting 5 seconds after block 2 has ended
# (``t_start_after_previous=5``) and continuing until the end of the
# recording.
#
# You can also see that only the ``block_1`` and ``block_2`` annotations
# were considered in the detection of the break periods – the EOG annotations
# were simply ignored. This is because, by default,
# `~mne.preprocessing.annotate_break` ignores all annotations starting with
# ``'bad'``. You can control this behavior via the ``ignore`` parameter.
#
# It is also possible to perform break period detection based on an array
# of events: simply pass the array via the ``events`` parameter. Existing
# annotations in the raw data will be ignored in this case:
# only keep some button press events (code 32) for this demonstration
events_subset = events[events[:, -1] == 32]
# drop the first and last few events
events_subset = events_subset[3:-3]
break_annots = mne.preprocessing.annotate_break(
raw=raw,
events=events_subset, # passing events will ignore existing annotations
min_break_duration=25, # pick a longer break duration this time
)
# replace existing annotations (otherwise it becomes difficult to see any
# effects in the plot!)
raw.set_annotations(break_annots)
raw.plot(events=events_subset)
# %%
# .. _`tut-reject-epochs-section`:
#
# Rejecting Epochs based on peak-to-peak channel amplitude
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Besides "bad" annotations, the :class:`mne.Epochs` class constructor has
# another means of rejecting epochs, based on signal amplitude thresholds for
# each channel type. In the :ref:`overview tutorial
# <tut-section-overview-epoching>` we saw an example of this: setting maximum
# acceptable peak-to-peak amplitudes for each channel type in an epoch, using
# the ``reject`` parameter. There is also a related parameter, ``flat``, that
# can be used to set *minimum* acceptable peak-to-peak amplitudes for each
# channel type in an epoch:
reject_criteria = dict(
mag=3000e-15, # 3000 fT
grad=3000e-13, # 3000 fT/cm
eeg=100e-6, # 100 µV
eog=200e-6,
) # 200 µV
flat_criteria = dict(mag=1e-15, grad=1e-13, eeg=1e-6) # 1 fT # 1 fT/cm # 1 µV
# %%
# The values that are appropriate are dataset- and hardware-dependent, so some
# trial-and-error may be necessary to find the correct balance between data
# quality and loss of power due to too many dropped epochs. Here, we've set the
# rejection criteria to be fairly stringent, for illustration purposes.
#
# Two additional parameters, ``reject_tmin`` and ``reject_tmax``, are used to
# set the temporal window in which to calculate peak-to-peak amplitude for the
# purposes of epoch rejection. These default to the same ``tmin`` and ``tmax``
# of the entire epoch. As one example, if you wanted to only apply the
# rejection thresholds to the portion of the epoch that occurs *before* the
# event marker around which the epoch is created, you could set
# ``reject_tmax=0``. A summary of the causes of rejected epochs can be
# generated with the :meth:`~mne.Epochs.plot_drop_log` method:
raw.set_annotations(blink_annot) # restore the EOG annotations
epochs = mne.Epochs(
raw,
events,
tmin=-0.2,
tmax=0.5,
reject_tmax=0,
reject=reject_criteria,
flat=flat_criteria,
reject_by_annotation=False,
preload=True,
)
epochs.plot_drop_log()
# %%
# Notice that we've passed ``reject_by_annotation=False`` above, in order to
# isolate the effects of the rejection thresholds. If we re-run the epoching
# with ``reject_by_annotation=True`` (the default) we see that the rejections
# due to EEG and EOG channels have disappeared (suggesting that those channel
# fluctuations were probably blink-related, and were subsumed by rejections
# based on the "bad blink" label).
epochs = mne.Epochs(
raw,
events,
tmin=-0.2,
tmax=0.5,
reject_tmax=0,
reject=reject_criteria,
flat=flat_criteria,
preload=True,
)
epochs.plot_drop_log()
# %%
# More importantly, note that *many* more epochs are rejected (~12.2% instead
# of ~2.5%) when rejecting based on the blink labels, underscoring why it is
# usually desirable to repair artifacts rather than exclude them.
#
# The :meth:`~mne.Epochs.plot_drop_log` method is a visualization of an
# :class:`~mne.Epochs` attribute, namely ``epochs.drop_log``, which stores
# empty lists for retained epochs and lists of strings for dropped epochs, with
# the strings indicating the reason(s) why the epoch was dropped. For example:
print(epochs.drop_log)
# %%
# Finally, it should be noted that "dropped" epochs are not necessarily deleted
# from the :class:`~mne.Epochs` object right away. Above, we forced the
# dropping to happen when we created the :class:`~mne.Epochs` object by using
# the ``preload=True`` parameter. If we had not done that, the
# :class:`~mne.Epochs` object would have been `memory-mapped`_ (not loaded into
# RAM), in which case the criteria for dropping epochs are stored, and the
# actual dropping happens when the :class:`~mne.Epochs` data are finally loaded
# and used. There are several ways this can get triggered, such as:
#
# - explicitly loading the data into RAM with the :meth:`~mne.Epochs.load_data`
# method
# - plotting the data (:meth:`~mne.Epochs.plot`,
# :meth:`~mne.Epochs.plot_image`, etc)
# - using the :meth:`~mne.Epochs.average` method to create an
# :class:`~mne.Evoked` object
#
# You can also trigger dropping with the :meth:`~mne.Epochs.drop_bad` method;
# if ``reject`` and/or ``flat`` criteria have already been provided to the
# epochs constructor, :meth:`~mne.Epochs.drop_bad` can be used without
# arguments to simply delete the epochs already marked for removal (if the
# epochs have already been dropped, nothing further will happen):
epochs.drop_bad()
# %%
# Alternatively, if rejection thresholds were not originally given to the
# :class:`~mne.Epochs` constructor, they can be passed to
# :meth:`~mne.Epochs.drop_bad` later instead; this can also be a way of
# imposing progressively more stringent rejection criteria:
stronger_reject_criteria = dict(
mag=2000e-15, # 2000 fT
grad=2000e-13, # 2000 fT/cm
eeg=100e-6, # 100 µV
eog=100e-6,
) # 100 µV
epochs.drop_bad(reject=stronger_reject_criteria)
print(epochs.drop_log)
# %%
# .. _`tut-reject-epochs-func-section`:
#
# Rejecting Epochs using callables (functions)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Sometimes it is useful to reject epochs based criteria other than
# peak-to-peak amplitudes. For example, we might want to reject epochs
# based on the maximum or minimum amplitude of a channel.
# In this case, the `mne.Epochs.drop_bad` function also accepts
# callables (functions) in the ``reject`` and ``flat`` parameters. This
# allows us to define functions to reject epochs based on our desired criteria.
#
# Let's begin by generating Epoch data with large artifacts in one eeg channel
# in order to demonstrate the versatility of this approach.
raw.crop(0, 5)
raw.del_proj()
chans = raw.info["ch_names"][-5:-1]
raw.pick(chans)
data = raw.get_data()
new_data = data
new_data[0, 180:200] *= 1e3
new_data[0, 460:580] += 1e-3
edit_raw = mne.io.RawArray(new_data, raw.info)
# Create fixed length epochs of 1 second
events = mne.make_fixed_length_events(edit_raw, id=1, duration=1.0, start=0)
epochs = mne.Epochs(edit_raw, events, tmin=0, tmax=1, baseline=None)
epochs.plot(scalings=dict(eeg=50e-5))
# %%
# As you can see, we have two large artifacts in the first channel. One large
# spike in amplitude and one large increase in amplitude.
# Let's try to reject the epoch containing the spike in amplitude based on the
# maximum amplitude of the first channel. Please note that the callable in
# ``reject`` must return a (good, reason) tuple. Where the good must be bool
# and reason must be a str, list, or tuple where each entry is a str.
epochs = mne.Epochs(
edit_raw,
events,
tmin=0,
tmax=1,
baseline=None,
preload=True,
)
epochs.drop_bad(
reject=dict(eeg=lambda x: ((np.max(x, axis=1) > 1e-2).any(), "max amp"))
)
epochs.plot(scalings=dict(eeg=50e-5))
# %%
# Here, the epoch containing the spike in amplitude was rejected for having a
# maximum amplitude greater than 1e-2 Volts. Notice the use of the ``any()``
# function to check if any of the channels exceeded the threshold. We could
# have also used the ``all()`` function to check if all channels exceeded the
# threshold.
# Next, let's try to reject the epoch containing the increase in amplitude
# using the median.
epochs = mne.Epochs(
edit_raw,
events,
tmin=0,
tmax=1,
baseline=None,
preload=True,
)
epochs.drop_bad(
reject=dict(eeg=lambda x: ((np.median(x, axis=1) > 1e-4).any(), "median amp"))
)
epochs.plot(scalings=dict(eeg=50e-5))
# %%
# Finally, let's try to reject both epochs using a combination of the maximum
# and median. We'll define a custom function and use boolean operators to
# combine the two criteria.
def reject_criteria(x):
max_condition = np.max(x, axis=1) > 1e-2
median_condition = np.median(x, axis=1) > 1e-4
return ((max_condition.any() or median_condition.any()), ["max amp", "median amp"])
epochs = mne.Epochs(
edit_raw,
events,
tmin=0,
tmax=1,
baseline=None,
preload=True,
)
epochs.drop_bad(reject=dict(eeg=reject_criteria))
epochs.plot(events=True)
# %%
# Note that a complementary Python module, the `autoreject package`_, uses
# machine learning to find optimal rejection criteria, and is designed to
# integrate smoothly with MNE-Python workflows. This can be a considerable
# time-saver when working with heterogeneous datasets.
#
#
# .. LINKS
#
# .. _`memory-mapped`: https://en.wikipedia.org/wiki/Memory-mapped_file
# .. _`autoreject package`: http://autoreject.github.io/