In [1]:
from scipy import io
from scipy.signal import butter, lfilter
import h5py
import random
import numpy as np
import os

In [2]:
datafolder = "new_dataset/"

In [3]:
# some filtering code copypasted from provided notebook 

def butter_bandpass(lowcut, highcut, sampling_rate, order=5):
    nyq_freq = sampling_rate*0.5
    low = lowcut/nyq_freq
    high = highcut/nyq_freq
    b, a = butter(order, [low, high], btype='band')
    return b, a

def butter_high_low_pass(lowcut, highcut, sampling_rate, order=5):
    nyq_freq = sampling_rate*0.5
    lower_bound = lowcut/nyq_freq
    higher_bound = highcut/nyq_freq
    b_high, a_high = butter(order, lower_bound, btype='high')
    b_low, a_low = butter(order, higher_bound, btype='low')
    return b_high, a_high, b_low, a_low

def butter_bandpass_filter(data, lowcut, highcut, sampling_rate, order=5, how_to_filt = 'separately'):
    if how_to_filt == 'separately':
        b_high, a_high, b_low, a_low = butter_high_low_pass(lowcut, highcut, sampling_rate, order=order)
        y = lfilter(b_high, a_high, data)
        y = lfilter(b_low, a_low, y)
    elif how_to_filt == 'simultaneously':
        b, a = butter_bandpass(lowcut, highcut, sampling_rate, order=order)
        y = lfilter(b, a, data)
    return y

In [4]:
def open_eeg_mat(filename, centered=True):
    all_data = io.loadmat(filename)
    eeg_data = all_data['data_cur']
    if centered:
        eeg_data = eeg_data - np.mean(eeg_data,1)[np.newaxis].T
        print('Data were centered: channels are zero-mean')
    states_labels = all_data['states_cur']
    states_codes = list(np.unique(states_labels)[:])
    sampling_rate = all_data['srate']
    chan_names = all_data['chan_names']
    return eeg_data, states_labels, sampling_rate, chan_names, eeg_data.shape[0], eeg_data.shape[1], states_codes

def butter_high_low_pass(lowcut, highcut, sampling_rate, order=5):
    nyq_freq = sampling_rate*0.5
    lower_bound = lowcut/nyq_freq
    higher_bound = highcut/nyq_freq
    b_high, a_high = butter(order, lower_bound, btype='high')
    b_low, a_low = butter(order, higher_bound, btype='low')
    return b_high, a_high, b_low, a_low

def butter_bandpass_filter(data, lowcut, highcut, sampling_rate, order=5, how_to_filt = 'simultaneously'):
    if how_to_filt == 'separately':
        b_high, a_high, b_low, a_low = butter_high_low_pass(lowcut, highcut, sampling_rate, order=order)
        y = lfilter(b_high, a_high, data)
        y = lfilter(b_low, a_low, y)
    elif how_to_filt == 'simultaneously':
        b, a = butter_bandpass(lowcut, highcut, sampling_rate, order=order)
        y = lfilter(b, a, data)
    return y

In [5]:
train_datas = {}
test_datas = {}

def to_onehot(label):
    labels_encoding = {1: np.array([1,0,0]), 2: np.array([0,1,0]), 6: np.array([0,0,1])}
    return labels_encoding[label]

for fname in os.listdir(datafolder):
    filename = datafolder + fname
    [eeg_data, states_labels, sampling_rate, chan_names, chan_numb, samp_numb, states_codes] = open_eeg_mat(filename, centered=False)
    sampling_rate = sampling_rate[0,0]
    eeg_data = butter_bandpass_filter(eeg_data, 0.5, 45, sampling_rate, order=5, how_to_filt = 'simultaneously')
    
    states_labels = states_labels[0]
    print(states_labels)
    states_labels = states_labels[2000:-2000]
    eeg_data = eeg_data[:,2000:-2000]
    
    experiment_name = "_".join(fname.split("_")[:-1])
    if fname.endswith("_2.mat"):
        test_datas[experiment_name] = {"eeg_data": eeg_data.T, "labels": states_labels}
    elif fname.endswith("_1.mat"):
        train_datas[experiment_name] = {"eeg_data": eeg_data.T, "labels": states_labels}

[1 1 1 ..., 2 2 2]
[1 1 1 ..., 6 6 6]
[6 6 6 ..., 6 6 6]
[1 1 1 ..., 6 6 6]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 6 6 6]
[1 1 1 ..., 6 6 6]
[1 1 1 ..., 6 6 6]
[6 6 6 ..., 6 6 6]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 6 6 6]
[1 1 1 ..., 6 6 6]
[6 6 6 ..., 6 6 6]
[1 1 1 ..., 6 6 6]
[1 1 1 ..., 2 2 2]
[1 1 1 ..., 2 2 2]
[6 6 6 ..., 2 2 2]


In [6]:
# separate scaling for each user, should not hurt 
from sklearn.preprocessing import StandardScaler

for key in train_datas.keys():
    sc = StandardScaler()
    train_datas[key]["eeg_data"] = sc.fit_transform(train_datas[key]["eeg_data"])
    test_datas[key]["eeg_data"] = sc.fit_transform(test_datas[key]["eeg_data"])

In [7]:
slice_len = 500

In [8]:
def generate_slice(test=False):
    if test:
        experiment_data = random.choice(list(test_datas.values()))
    else:
        experiment_data = random.choice(list(train_datas.values()))
    
    X = experiment_data["eeg_data"]
    y = experiment_data["labels"]
    
    while True:
        slice_start = np.random.choice(len(X) - slice_len)
        slice_end = slice_start + slice_len
        slice_x = X[slice_start:slice_end]
        #slice_x = normalize(slice_x)
        slice_y = y[slice_start:slice_end]
        
        if len(set(slice_y)) == 1:
            return slice_x, to_onehot(slice_y[-1])

In [9]:
generate_slice()[0].shape

(500, 24)

In [10]:
def data_generator(batch_size, test=False):
    while True:
        batch_x = []
        batch_y = []
        
        for i in range(0, batch_size):
            x, y = generate_slice(test=test)
            batch_x.append(x)
            batch_y.append(y)
            
        y = np.array(batch_y)
        x = np.array([i for i in batch_x])
        yield (x, y)

In [11]:
from keras.layers import Convolution1D, Dense, Dropout, Input, merge, GlobalMaxPooling1D, MaxPooling1D, Flatten, LSTM
from keras.models import Model, load_model
from keras.optimizers import RMSprop

Using TensorFlow backend.


In [12]:
def get_base_model(input_len, fsize):
    '''Base network to be shared (eq. to feature extraction).
    '''
    input_seq = Input(shape=(input_len, 24))
    nb_filters = 50
    convolved = Convolution1D(nb_filters, 5, border_mode="same", activation="tanh")(input_seq)
    pooled = GlobalMaxPooling1D()(convolved)
    compressed = Dense(50, activation="linear")(pooled)
    compressed = Dropout(0.3)(compressed)
    compressed = Dense(50, activation="relu")(compressed)
    compressed = Dropout(0.3)(compressed)
    model = Model(input=input_seq, output=compressed)            
    return model

In [13]:
input1125_seq = Input(shape=(slice_len, 24))

base_network1125 = get_base_model(slice_len, 10)

embedding_1125 = base_network1125(input1125_seq)
out = Dense(3, activation='softmax')(embedding_1125)
    
model = Model(input=input1125_seq, output=out)
    
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["categorical_accuracy"])

In [14]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

nb_epoch = 100000
earlyStopping = EarlyStopping(monitor='categorical_accuracy', patience=10, verbose=0, mode='auto')
checkpointer = ModelCheckpoint("convlstm_alldata.h5", monitor='categorical_accuracy', verbose=0,
                               save_best_only=True, mode='auto', period=1)

samples_per_epoch = 15000
nb_epoch = 1

model.fit_generator(data_generator(batch_size=25), samples_per_epoch, nb_epoch, 
                    callbacks=[earlyStopping, checkpointer], verbose=2, nb_val_samples=15000,
                    validation_data=data_generator(batch_size=25, test=True))

Epoch 1/1
47s - loss: 0.8520 - categorical_accuracy: 0.5665 - val_loss: 0.7560 - val_categorical_accuracy: 0.6100


<keras.callbacks.History at 0x7f1f07163668>