In [1]:
# Importing Libraries

In [2]:
import pandas as pd
import numpy as np
import sys

In [3]:
# Activities are the class labels
# It is a 6 class classification
ACTIVITIES = {
    0: 'WALKING',
    1: 'WALKING_UPSTAIRS',
    2: 'WALKING_DOWNSTAIRS',
    3: 'SITTING',
    4: 'STANDING',
    5: 'LAYING',
}

# Utility function to print the confusion matrix
def confusion_matrix(Y_true, Y_pred):
    Y_true = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_true, axis=1)])
    Y_pred = pd.Series([ACTIVITIES[y] for y in np.argmax(Y_pred, axis=1)])

    return pd.crosstab(Y_true, Y_pred, rownames=['True'], colnames=['Pred'])

### Data

In [5]:
# Data directory
DATADIR = 'UCI_HAR_Dataset'

In [6]:
# Raw data signals
# Signals are from Accelerometer and Gyroscope
# The signals are in x,y,z directions
# Sensor signals are filtered to have only body acceleration
# excluding the acceleration due to gravity
# Triaxial acceleration from the accelerometer is total acceleration
SIGNALS = [
    "body_acc_x",
    "body_acc_y",
    "body_acc_z",
    "body_gyro_x",
    "body_gyro_y",
    "body_gyro_z",
    "total_acc_x",
    "total_acc_y",
    "total_acc_z"
]

In [7]:
# Utility function to read the data from csv file
def _read_csv(filename):
    return pd.read_csv(filename, delim_whitespace=True, header=None)

# Utility function to load the load
def load_signals(subset):
    signals_data = []

    for signal in SIGNALS:
        filename = f'UCI_HAR_Dataset/{subset}/Inertial Signals/{signal}_{subset}.txt'
        signals_data.append(
            _read_csv(filename).as_matrix()
        ) 

    # Transpose is used to change the dimensionality of the output,
    # aggregating the signals by combination of sample/timestep.
    # Resultant shape is (7352 train/2947 test samples, 128 timesteps, 9 signals)
    return np.transpose(signals_data, (1, 2, 0))

In [8]:

def load_y(subset):
    """
    The objective that we are trying to predict is a integer, from 1 to 6,
    that represents a human activity. We return a binary representation of 
    every sample objective as a 6 bits vector using One Hot Encoding
    (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html)
    """
    filename = f'UCI_HAR_Dataset/{subset}/y_{subset}.txt'
    y = _read_csv(filename)[0]

    return pd.get_dummies(y).as_matrix()

In [9]:
def load_data():
    """
    Obtain the dataset from multiple files.
    Returns: X_train, X_test, y_train, y_test
    """
    X_train, X_test = load_signals('train'), load_signals('test')
    y_train, y_test = load_y('train'), load_y('test')

    return X_train, X_test, y_train, y_test

In [10]:
# Importing tensorflow
np.random.seed(42)
import tensorflow as tf
tf.set_random_seed(42)

In [11]:
# Configuring a session
session_conf = tf.ConfigProto(
    intra_op_parallelism_threads=1,
    inter_op_parallelism_threads=1
)

In [12]:
# Import Keras
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [14]:
# Importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers.core import Dense, Dropout

In [15]:
# Initializing parameters
epochs = 30
batch_size = 16
n_hidden = 32

In [16]:
# Utility function to count the number of classes
def _count_classes(y):
    return len(set([tuple(category) for category in y]))

In [17]:
# Loading the train and test data
X_train, X_test, Y_train, Y_test = load_data()

  if sys.path[0] == '':


In [18]:
timesteps = len(X_train[0])
input_dim = len(X_train[0][0])
n_classes = _count_classes(Y_train)

print(timesteps)
print(input_dim)
print(len(X_train))

128
9
7352


In [19]:
X_train.shape

(7352, 128, 9)

In [24]:
Y_train.shape

(7352, 6)

In [34]:
X_test.shape

(2947, 128, 9)

In [35]:
X_test

array([[[ 1.165315e-02, -2.939904e-02,  1.068262e-01, ...,
          1.041216e+00, -2.697959e-01,  2.377977e-02],
        [ 1.310909e-02, -3.972867e-02,  1.524549e-01, ...,
          1.041803e+00, -2.800250e-01,  7.629271e-02],
        [ 1.126885e-02, -5.240586e-02,  2.168462e-01, ...,
          1.039086e+00, -2.926631e-01,  1.474754e-01],
        ...,
        [ 1.291511e-03,  1.173502e-02,  3.665587e-03, ...,
          9.930164e-01, -2.599865e-01,  1.443951e-01],
        [ 1.469997e-03,  9.517414e-03,  4.041945e-03, ...,
          9.932414e-01, -2.620643e-01,  1.447033e-01],
        [ 2.573841e-03,  7.305069e-03,  4.888436e-03, ...,
          9.943906e-01, -2.641348e-01,  1.454939e-01]],

       [[ 9.279629e-03,  6.650520e-03, -2.631933e-02, ...,
          9.991921e-01, -2.649349e-01,  1.256164e-01],
        [ 4.929711e-03,  1.864973e-02, -2.688753e-02, ...,
          9.946787e-01, -2.532142e-01,  1.256249e-01],
        [ 3.953596e-03,  1.553950e-02, -3.663861e-02, ...,
          9.93

In [26]:
Y_test.shape

(2947, 6)

- Defining the Architecture of LSTM

In [27]:
# Initiliazing the sequential model

model = Sequential()
# Configuring the parameters
model.add(LSTM(n_hidden, input_shape=(timesteps, input_dim)))
# Adding a dropout layer
model.add(Dropout(0.5))
# Adding a dense output layer with sigmoid activation
model.add(Dense(n_classes, activation='sigmoid'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 32)                5376      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 6)                 198       
Total params: 5,574
Trainable params: 5,574
Non-trainable params: 0
_________________________________________________________________


In [28]:
# Compiling the model
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [29]:
# Training the model
model.fit(X_train,
          Y_train,
          batch_size=batch_size,
          validation_data=(X_test, Y_test),
          epochs=epochs)

Train on 7352 samples, validate on 2947 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fd101658ef0>

In [30]:
score = model.evaluate(X_test, Y_test)



In [33]:
 model.predict(X_test)

array([[6.73119284e-05, 1.16691635e-05, 6.29200213e-06, 3.27186123e-03,
        3.09266567e-01, 6.14459668e-06],
       [6.55044132e-05, 2.05861852e-05, 1.43757034e-05, 6.77454285e-03,
        4.01470065e-01, 6.11893711e-06],
       [6.84985353e-05, 1.96996807e-05, 1.35612627e-05, 6.61419239e-03,
        4.27904457e-01, 6.24169252e-06],
       ...,
       [1.55011995e-03, 7.93270528e-01, 3.01599008e-04, 2.30963960e-05,
        5.54955914e-05, 1.02767759e-08],
       [4.90763341e-04, 3.85723859e-01, 1.03853172e-05, 6.35696642e-06,
        2.14066167e-05, 1.14835030e-08],
       [7.23787583e-04, 6.95120990e-01, 2.18840923e-05, 8.08145796e-06,
        6.29514252e-05, 5.91321019e-08]], dtype=float32)

In [31]:
# Confusion Matrix
print(confusion_matrix(Y_test, model.predict(X_test)))

Pred                LAYING  SITTING  STANDING  WALKING  WALKING_DOWNSTAIRS  \
True                                                                         
LAYING                 510        0        27        0                   0   
SITTING                  0      375       110        3                   0   
STANDING                 0       80       446        2                   0   
WALKING                  0        0         0      410                  27   
WALKING_DOWNSTAIRS       0        0         0        2                 407   
WALKING_UPSTAIRS         0        0         0        3                  10   

Pred                WALKING_UPSTAIRS  
True                                  
LAYING                             0  
SITTING                            3  
STANDING                           4  
WALKING                           59  
WALKING_DOWNSTAIRS                11  
WALKING_UPSTAIRS                 458  


In [32]:
score

[0.44746464555687265, 0.8842891075670173]

- With a simple 2 layer architecture we got 90.09% accuracy and a loss of 0.30
- We can further imporve the performace with Hyperparameter tuning