--- a +++ b/EEGModels.py @@ -0,0 +1,402 @@ +""" + ARL_EEGModels - A collection of Convolutional Neural Network models for EEG + Signal Processing and Classification, using Keras and Tensorflow + + Requirements: + (1) tensorflow == 2.X (as of this writing, 2.0 - 2.3 have been verified + as working) + + To run the EEG/MEG ERP classification sample script, you will also need + + (4) mne >= 0.17.1 + (5) PyRiemann >= 0.2.5 + (6) scikit-learn >= 0.20.1 + (7) matplotlib >= 2.2.3 + + To use: + + (1) Place this file in the PYTHONPATH variable in your IDE (i.e.: Spyder) + (2) Import the model as + + from EEGModels import EEGNet + + model = EEGNet(nb_classes = ..., Chans = ..., Samples = ...) + + (3) Then compile and fit the model + + model.compile(loss = ..., optimizer = ..., metrics = ...) + fitted = model.fit(...) + predicted = model.predict(...) + + Portions of this project are works of the United States Government and are not + subject to domestic copyright protection under 17 USC Sec. 105. Those + portions are released world-wide under the terms of the Creative Commons Zero + 1.0 (CC0) license. + + Other portions of this project are subject to domestic copyright protection + under 17 USC Sec. 105. Those portions are licensed under the Apache 2.0 + license. The complete text of the license governing this material is in + the file labeled LICENSE.TXT that is a part of this project's official + distribution. +""" + +from tensorflow.keras.models import Model +from tensorflow.keras.layers import Dense, Activation, Permute, Dropout +from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D +from tensorflow.keras.layers import SeparableConv2D, DepthwiseConv2D +from tensorflow.keras.layers import BatchNormalization +from tensorflow.keras.layers import SpatialDropout2D +from tensorflow.keras.regularizers import l1_l2 +from tensorflow.keras.layers import Input, Flatten +from tensorflow.keras.constraints import max_norm +from tensorflow.keras import backend as K + + +def EEGNet(nb_classes, Chans = 64, Samples = 128, + dropoutRate = 0.5, kernLength = 64, F1 = 8, + D = 2, F2 = 16, norm_rate = 0.25, dropoutType = 'Dropout'): + """ Keras Implementation of EEGNet + http://iopscience.iop.org/article/10.1088/1741-2552/aace8c/meta + + Note that this implements the newest version of EEGNet and NOT the earlier + version (version v1 and v2 on arxiv). We strongly recommend using this + architecture as it performs much better and has nicer properties than + our earlier version. For example: + + 1. Depthwise Convolutions to learn spatial filters within a + temporal convolution. The use of the depth_multiplier option maps + exactly to the number of spatial filters learned within a temporal + filter. This matches the setup of algorithms like FBCSP which learn + spatial filters within each filter in a filter-bank. This also limits + the number of free parameters to fit when compared to a fully-connected + convolution. + + 2. Separable Convolutions to learn how to optimally combine spatial + filters across temporal bands. Separable Convolutions are Depthwise + Convolutions followed by (1x1) Pointwise Convolutions. + + + While the original paper used Dropout, we found that SpatialDropout2D + sometimes produced slightly better results for classification of ERP + signals. However, SpatialDropout2D significantly reduced performance + on the Oscillatory dataset (SMR, BCI-IV Dataset 2A). We recommend using + the default Dropout in most cases. + + Assumes the input signal is sampled at 128Hz. If you want to use this model + for any other sampling rate you will need to modify the lengths of temporal + kernels and average pooling size in blocks 1 and 2 as needed (double the + kernel lengths for double the sampling rate, etc). Note that we haven't + tested the model performance with this rule so this may not work well. + + The model with default parameters gives the EEGNet-8,2 model as discussed + in the paper. This model should do pretty well in general, although it is + advised to do some model searching to get optimal performance on your + particular dataset. + + We set F2 = F1 * D (number of input filters = number of output filters) for + the SeparableConv2D layer. We haven't extensively tested other values of this + parameter (say, F2 < F1 * D for compressed learning, and F2 > F1 * D for + overcomplete). We believe the main parameters to focus on are F1 and D. + + Inputs: + + nb_classes : int, number of classes to classify + Chans, Samples : number of channels and time points in the EEG data + dropoutRate : dropout fraction + kernLength : length of temporal convolution in first layer. We found + that setting this to be half the sampling rate worked + well in practice. For the SMR dataset in particular + since the data was high-passed at 4Hz we used a kernel + length of 32. + F1, F2 : number of temporal filters (F1) and number of pointwise + filters (F2) to learn. Default: F1 = 8, F2 = F1 * D. + D : number of spatial filters to learn within each temporal + convolution. Default: D = 2 + dropoutType : Either SpatialDropout2D or Dropout, passed as a string. + + """ + + if dropoutType == 'SpatialDropout2D': + dropoutType = SpatialDropout2D + elif dropoutType == 'Dropout': + dropoutType = Dropout + else: + raise ValueError('dropoutType must be one of SpatialDropout2D ' + 'or Dropout, passed as a string.') + + input1 = Input(shape = (Chans, Samples, 1)) + + ################################################################## + block1 = Conv2D(F1, (1, kernLength), padding = 'same', + input_shape = (Chans, Samples, 1), + use_bias = False)(input1) + block1 = BatchNormalization()(block1) + block1 = DepthwiseConv2D((Chans, 1), use_bias = False, + depth_multiplier = D, + depthwise_constraint = max_norm(1.))(block1) + block1 = BatchNormalization()(block1) + block1 = Activation('elu')(block1) + block1 = AveragePooling2D((1, 4))(block1) + block1 = dropoutType(dropoutRate)(block1) + + block2 = SeparableConv2D(F2, (1, 16), + use_bias = False, padding = 'same')(block1) + block2 = BatchNormalization()(block2) + block2 = Activation('elu')(block2) + block2 = AveragePooling2D((1, 8))(block2) + block2 = dropoutType(dropoutRate)(block2) + + flatten = Flatten(name = 'flatten')(block2) + + dense = Dense(nb_classes, name = 'dense', + kernel_constraint = max_norm(norm_rate))(flatten) + softmax = Activation('softmax', name = 'softmax')(dense) + + return Model(inputs=input1, outputs=softmax) + + + + +def EEGNet_SSVEP(nb_classes = 12, Chans = 8, Samples = 256, + dropoutRate = 0.5, kernLength = 256, F1 = 96, + D = 1, F2 = 96, dropoutType = 'Dropout'): + """ SSVEP Variant of EEGNet, as used in [1]. + + Inputs: + + nb_classes : int, number of classes to classify + Chans, Samples : number of channels and time points in the EEG data + dropoutRate : dropout fraction + kernLength : length of temporal convolution in first layer + F1, F2 : number of temporal filters (F1) and number of pointwise + filters (F2) to learn. + D : number of spatial filters to learn within each temporal + convolution. + dropoutType : Either SpatialDropout2D or Dropout, passed as a string. + + + [1]. Waytowich, N. et. al. (2018). Compact Convolutional Neural Networks + for Classification of Asynchronous Steady-State Visual Evoked Potentials. + Journal of Neural Engineering vol. 15(6). + http://iopscience.iop.org/article/10.1088/1741-2552/aae5d8 + + """ + + if dropoutType == 'SpatialDropout2D': + dropoutType = SpatialDropout2D + elif dropoutType == 'Dropout': + dropoutType = Dropout + else: + raise ValueError('dropoutType must be one of SpatialDropout2D ' + 'or Dropout, passed as a string.') + + input1 = Input(shape = (Chans, Samples, 1)) + + ################################################################## + block1 = Conv2D(F1, (1, kernLength), padding = 'same', + input_shape = (Chans, Samples, 1), + use_bias = False)(input1) + block1 = BatchNormalization()(block1) + block1 = DepthwiseConv2D((Chans, 1), use_bias = False, + depth_multiplier = D, + depthwise_constraint = max_norm(1.))(block1) + block1 = BatchNormalization()(block1) + block1 = Activation('elu')(block1) + block1 = AveragePooling2D((1, 4))(block1) + block1 = dropoutType(dropoutRate)(block1) + + block2 = SeparableConv2D(F2, (1, 16), + use_bias = False, padding = 'same')(block1) + block2 = BatchNormalization()(block2) + block2 = Activation('elu')(block2) + block2 = AveragePooling2D((1, 8))(block2) + block2 = dropoutType(dropoutRate)(block2) + + flatten = Flatten(name = 'flatten')(block2) + + dense = Dense(nb_classes, name = 'dense')(flatten) + softmax = Activation('softmax', name = 'softmax')(dense) + + return Model(inputs=input1, outputs=softmax) + + + +def EEGNet_old(nb_classes, Chans = 64, Samples = 128, regRate = 0.0001, + dropoutRate = 0.25, kernels = [(2, 32), (8, 4)], strides = (2, 4)): + """ Keras Implementation of EEGNet_v1 (https://arxiv.org/abs/1611.08024v2) + + This model is the original EEGNet model proposed on arxiv + https://arxiv.org/abs/1611.08024v2 + + with a few modifications: we use striding instead of max-pooling as this + helped slightly in classification performance while also providing a + computational speed-up. + + Note that we no longer recommend the use of this architecture, as the new + version of EEGNet performs much better overall and has nicer properties. + + Inputs: + + nb_classes : total number of final categories + Chans, Samples : number of EEG channels and samples, respectively + regRate : regularization rate for L1 and L2 regularizations + dropoutRate : dropout fraction + kernels : the 2nd and 3rd layer kernel dimensions (default is + the [2, 32] x [8, 4] configuration) + strides : the stride size (note that this replaces the max-pool + used in the original paper) + + """ + + # start the model + input_main = Input((Chans, Samples)) + layer1 = Conv2D(16, (Chans, 1), input_shape=(Chans, Samples, 1), + kernel_regularizer = l1_l2(l1=regRate, l2=regRate))(input_main) + layer1 = BatchNormalization()(layer1) + layer1 = Activation('elu')(layer1) + layer1 = Dropout(dropoutRate)(layer1) + + permute_dims = 2, 1, 3 + permute1 = Permute(permute_dims)(layer1) + + layer2 = Conv2D(4, kernels[0], padding = 'same', + kernel_regularizer=l1_l2(l1=0.0, l2=regRate), + strides = strides)(permute1) + layer2 = BatchNormalization()(layer2) + layer2 = Activation('elu')(layer2) + layer2 = Dropout(dropoutRate)(layer2) + + layer3 = Conv2D(4, kernels[1], padding = 'same', + kernel_regularizer=l1_l2(l1=0.0, l2=regRate), + strides = strides)(layer2) + layer3 = BatchNormalization()(layer3) + layer3 = Activation('elu')(layer3) + layer3 = Dropout(dropoutRate)(layer3) + + flatten = Flatten(name = 'flatten')(layer3) + + dense = Dense(nb_classes, name = 'dense')(flatten) + softmax = Activation('softmax', name = 'softmax')(dense) + + return Model(inputs=input_main, outputs=softmax) + + + +def DeepConvNet(nb_classes, Chans = 64, Samples = 256, + dropoutRate = 0.5): + """ Keras implementation of the Deep Convolutional Network as described in + Schirrmeister et. al. (2017), Human Brain Mapping. + + This implementation assumes the input is a 2-second EEG signal sampled at + 128Hz, as opposed to signals sampled at 250Hz as described in the original + paper. We also perform temporal convolutions of length (1, 5) as opposed + to (1, 10) due to this sampling rate difference. + + Note that we use the max_norm constraint on all convolutional layers, as + well as the classification layer. We also change the defaults for the + BatchNormalization layer. We used this based on a personal communication + with the original authors. + + ours original paper + pool_size 1, 2 1, 3 + strides 1, 2 1, 3 + conv filters 1, 5 1, 10 + + Note that this implementation has not been verified by the original + authors. + + """ + + # start the model + input_main = Input((Chans, Samples, 1)) + block1 = Conv2D(25, (1, 5), + input_shape=(Chans, Samples, 1), + kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main) + block1 = Conv2D(25, (Chans, 1), + kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) + block1 = BatchNormalization(epsilon=1e-05, momentum=0.9)(block1) + block1 = Activation('elu')(block1) + block1 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block1) + block1 = Dropout(dropoutRate)(block1) + + block2 = Conv2D(50, (1, 5), + kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) + block2 = BatchNormalization(epsilon=1e-05, momentum=0.9)(block2) + block2 = Activation('elu')(block2) + block2 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block2) + block2 = Dropout(dropoutRate)(block2) + + block3 = Conv2D(100, (1, 5), + kernel_constraint = max_norm(2., axis=(0,1,2)))(block2) + block3 = BatchNormalization(epsilon=1e-05, momentum=0.9)(block3) + block3 = Activation('elu')(block3) + block3 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block3) + block3 = Dropout(dropoutRate)(block3) + + block4 = Conv2D(200, (1, 5), + kernel_constraint = max_norm(2., axis=(0,1,2)))(block3) + block4 = BatchNormalization(epsilon=1e-05, momentum=0.9)(block4) + block4 = Activation('elu')(block4) + block4 = MaxPooling2D(pool_size=(1, 2), strides=(1, 2))(block4) + block4 = Dropout(dropoutRate)(block4) + + flatten = Flatten()(block4) + + dense = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten) + softmax = Activation('softmax')(dense) + + return Model(inputs=input_main, outputs=softmax) + + +# need these for ShallowConvNet +def square(x): + return K.square(x) + +def log(x): + return K.log(K.clip(x, min_value = 1e-7, max_value = 10000)) + + +def ShallowConvNet(nb_classes, Chans = 64, Samples = 128, dropoutRate = 0.5): + """ Keras implementation of the Shallow Convolutional Network as described + in Schirrmeister et. al. (2017), Human Brain Mapping. + + Assumes the input is a 2-second EEG signal sampled at 128Hz. Note that in + the original paper, they do temporal convolutions of length 25 for EEG + data sampled at 250Hz. We instead use length 13 since the sampling rate is + roughly half of the 250Hz which the paper used. The pool_size and stride + in later layers is also approximately half of what is used in the paper. + + Note that we use the max_norm constraint on all convolutional layers, as + well as the classification layer. We also change the defaults for the + BatchNormalization layer. We used this based on a personal communication + with the original authors. + + ours original paper + pool_size 1, 35 1, 75 + strides 1, 7 1, 15 + conv filters 1, 13 1, 25 + + Note that this implementation has not been verified by the original + authors. We do note that this implementation reproduces the results in the + original paper with minor deviations. + """ + + # start the model + input_main = Input((Chans, Samples, 1)) + block1 = Conv2D(40, (1, 13), + input_shape=(Chans, Samples, 1), + kernel_constraint = max_norm(2., axis=(0,1,2)))(input_main) + block1 = Conv2D(40, (Chans, 1), use_bias=False, + kernel_constraint = max_norm(2., axis=(0,1,2)))(block1) + block1 = BatchNormalization(epsilon=1e-05, momentum=0.9)(block1) + block1 = Activation(square)(block1) + block1 = AveragePooling2D(pool_size=(1, 35), strides=(1, 7))(block1) + block1 = Activation(log)(block1) + block1 = Dropout(dropoutRate)(block1) + flatten = Flatten()(block1) + dense = Dense(nb_classes, kernel_constraint = max_norm(0.5))(flatten) + softmax = Activation('softmax')(dense) + + return Model(inputs=input_main, outputs=softmax) + +