In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from cdtw import pydtw
import seaborn as sns
from tqdm import tqdm
import os
import json
import h5py

In [2]:
from keras import backend as K
from keras.regularizers import l2, activity_l2

from keras import backend as K
from keras.engine.topology import Layer
from keras.optimizers import RMSprop, SGD, Adam
from keras.layers.core import Dense, Dropout, Activation, Flatten, Lambda, Merge
from keras.layers.recurrent import LSTM, GRU
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Bidirectional, merge
    from keras.layers.convolutional import Convolution1D
from keras.layers.pooling import MaxPooling1D, AveragePooling1D, GlobalMaxPooling1D

Using TensorFlow backend.


In [None]:
def read_train(fname):
    subjects = {}
    with h5py.File(fname, "r") as data_file:
        for subject, subject_data in data_file.items():
            print(subject)
            X = subject_data["data"][:]
            y = subject_data["labels"][:]
            subjects[subject] = (X, y)
    return subjects

def read_test(fname):
    subjects = {}
    with h5py.File(fname, "r") as data_file:
        X = {}
        for subject, subject_data in data_file.items():
            X[subject] = {}
            for chunk_id, chunk in data_file[subject].items():
                X[subject][chunk_id] = chunk[:]

    return X

In [None]:
train = read_train("train.h5")
test = read_test("test.h5")

In [None]:
def batch(ts, y, n=1):
    l = len(ts)
    for ndx in range(0, l-n, 1):
        yield (ts[ndx:min(ndx + n, l)], y[ndx:min(ndx + n, l)])

In [None]:
def label_batch(batch):
    if all([i == 1 for i in batch[1]]):
        return 1
    elif all([i == 0 for i in batch[1]]):
        return 0
    elif all([i == 2 for i in batch[1]]):
        return 2
    return -1

subject_datas = {}
for subject, data in tqdm(train.items()):
    subject_ts = data[0].T
    subject_y = data[1][0]
    batches = [i for i in batch(subject_ts, subject_y, n=1125)]
    batches = [(i[0], label_batch(i)) for i in batches]
    batches = [i for i in batches if i[1] != -1]
    batches = [i for i in batches if len(i[0]) == 1125]
    subject_datas[subject] = batches

In [None]:
subject_datas["subject_1"][0][0].shape

In [None]:
X = []
y = []
for subj, subj_data in tqdm(subject_datas.items()):
    X.extend([i[0] for i in subj_data])
    y.extend([i[1] for i in subj_data])

In [None]:
X = np.array(X)
y = np.array(y)

In [None]:
def shuffle_in_unison_scary(a, b):
    rng_state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(rng_state)
    np.random.shuffle(b)

In [None]:
shuffle_in_unison_scary(X, y)

In [None]:
def toarr(label):
    arr = np.zeros(3)
    arr[label] = 1
    return arr
y_arr = np.vstack([toarr(i) for i in y])

In [None]:
validation_start = len(X) - 30000
X_train = X[:validation_start]
y_train = y_arr[:validation_start]
X_val = X[validation_start:]
y_val = y_arr[validation_start:]

In [3]:
def get_base_model():
    '''Base network to be shared (eq. to feature extraction).
    '''
    with K.tf.device('/gpu:2'):
        input_seq = Input(shape=(1125, 24))
        
        filter_sizes = [5, 7, 14]
        nb_filters = 100
        filter_size = 7
        different_scales = []
        for fsize in filter_sizes:
            convolved = Convolution1D(nb_filters, fsize, border_mode="same", activation="tanh")(input_seq)
            processed = GlobalMaxPooling1D()(convolved)
            different_scales.append(processed)
            
        different_scales = merge(different_scales, mode='concat')
        compressed = Dense(150, activation="tanh")(different_scales)
        compressed = Dropout(0.2)(compressed)
        compressed = Dense(150, activation="tanh")(compressed)
        model = Model(input=input_seq, output=compressed)
        #return model
        #filter_size = 5
        
        #convolved = Convolution1D(nb_filters, filter_size, border_mode="same", activation="tanh")(input_seq)
        #processed = GlobalMaxPooling1D()(convolved)
        #compressed = Dense(300, activation="tanh")(processed)
        #compressed = Dropout(0.3)(compressed)
        #compressed = Dense(300, activation="linear")(compressed)
        #model = Model(input=input_seq, output=compressed)            
        return model

In [4]:
with K.tf.device('/gpu:2'):
    base_network = get_base_model()
    input_seq = Input(shape=(1125, 24))

    embedding = base_network(input_seq)
    out = Dense(3, activation='softmax')(embedding)
    
    model = Model(input=input_seq, output=out)
    
    #opt = SGD(lr=0.001, momentum=0.9, nesterov=True, clipvalue=0.0001)
    #opt = RMSprop(lr=0.001, clipvalue=10**6)
    opt = Adam(lr=0.01)
    model.compile(loss="categorical_crossentropy", optimizer=opt)

In [13]:
model.layers[-2].layers[-3].get_output_at(0)

<tf.Tensor 'Tanh_3:0' shape=(?, 150) dtype=float32>

In [9]:
model.layers[-2].outputs = [model.layers[-2].layers[-3].get_output_at(0)]

[<tf.Tensor 'Tanh_4:0' shape=(?, 150) dtype=float32>]

In [10]:
from keras.callbacks import EarlyStopping
nb_epoch = 100000
earlyStopping = EarlyStopping(monitor='val_loss', patience=3, verbose=0, mode='auto')
#samples_per_epoch = 50000

with K.tf.device('/gpu:2'):
    model.fit(X_train, y_train, batch_size=60, callbacks=[earlyStopping],
              nb_epoch=100, verbose=1, validation_split=0.2, shuffle=True,
              class_weight=None, sample_weight=None)

NameError: name 'X_train' is not defined

In [None]:
model.layers[-2].layers[-8]

In [None]:
model.save("convnet-multiscale")

In [None]:
preds = [np.argmax(i) for i in model.predict(X_val)]

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score([np.argmax(i) for i in y_val], preds)

In [None]:
# GENERATES SUBMISSION DF
df = []
for subj in test:
    for chunk in tqdm(test[subj]):
        data = {}
        data["subject_id"] = int(subj.split("_")[-1])
        data["chunk_id"] = int(chunk.split("_")[-1])
        arr = test[subj][chunk].T
        preds = model.predict(np.array([arr]))[0]
        data["class_0_score"] = preds[0]
        data["class_1_score"] = preds[1]
        data["class_2_score"] = preds[2]
        for i in range(0, 1125):
            data["tick"] = i
            df.append(data.copy())
df = pd.DataFrame(df)
df = df[["subject_id", "chunk_id", "tick", "class_0_score",
         "class_1_score","class_2_score"]]

In [None]:
df.head()

In [None]:
df.to_csv('submit_multiscale_untrained.csv', index=False)