1252 lines (1251 with data), 276.8 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Imports the required libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"\"\"\" Imports the required libraries \"\"\"\n",
"\n",
"# import tensorflow as tf\n",
"import keras\n",
"from keras.models import Sequential\n",
"from keras.optimizers import Adam\n",
"from keras.layers import Dense, Activation, Conv2D, MaxPool2D, MaxPooling2D\n",
"from keras.layers import Flatten, Dropout, BatchNormalization, Reshape\n",
"from keras.utils.vis_utils import plot_model\n",
"\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scipy.stats as stats\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import preprocessing\n",
"from sklearn.model_selection import train_test_split, cross_val_score, KFold\n",
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
"from mlxtend.plotting import plot_confusion_matrix\n",
"from sklearn.metrics import confusion_matrix, classification_report\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Stores the path of the sensor files in the corresponding list"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Stores the path of the sensor files in the corresponding list \"\"\"\n",
"\n",
"base_path = \"./data/raw_data/\"\n",
"phone_accel_file_paths = []\n",
"phone_gyro_file_paths = []\n",
"watch_accel_file_paths = []\n",
"watch_gyro_file_paths = []\n",
"\n",
"for directories, subdirectories, files in os.walk(base_path):\n",
" for filename in files:\n",
" if \"phone\" in filename and \"accel\" in filename:\n",
" phone_accel_file_paths.append(f\"{base_path}phone/accel/{filename}\")\n",
" elif \"phone\" in filename and \"gyro\" in filename:\n",
" phone_gyro_file_paths.append(f\"{base_path}phone/gyro/{filename}\")\n",
" elif \"watch\" in filename and \"accel\" in filename:\n",
" watch_accel_file_paths.append(f\"{base_path}watch/accel/{filename}\")\n",
" elif \"watch\" in filename and \"gyro\" in filename:\n",
" watch_gyro_file_paths.append(f\"{base_path}watch/gyro/{filename}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Stores the actual name of each activity in the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Stores the actual name of each activity in the dictionary \"\"\"\n",
"\n",
"activity_dict= {\"A\":\"Walking\",\n",
" \"B\":\"Jogging\",\n",
" \"C\":\"Stairs\",\n",
" \"D\":\"Sitting\",\n",
" \"E\":\"Standing\",\n",
" \"F\":\"Typing\",\n",
" \"G\":\"Brushing\",\n",
" \"H\":\"Eat Soup\",\n",
" \"I\":\"Eat Chips\",\n",
" \"J\":\"Eat Pasta\",\n",
" \"K\":\"Drinking\",\n",
" \"L\":\"Eat Sandwich\",\n",
" \"M\":\"Kicking\",\n",
" \"O\":\"Playing\",\n",
" \"P\":\"Dribblinlg\",\n",
" \"Q\":\"Writing\",\n",
" \"R\":\"Clapping\",\n",
" \"S\":\"Folding\"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Removes the columns \"SubjectID\" and \"Timestamp\" from the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def clean_data(dataframe):\n",
" \"\"\" Removes the columns \"SubjectID\" and \"Timestamp\" from the dataframe \"\"\"\n",
"\n",
" cleaned_df = dataframe.drop([\"SubjectID\", \"Timestamp\"], axis = 1).copy()\n",
"\n",
" return cleaned_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Normalizes the data using StandardScaler() function"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def scale_data(data):\n",
" \"\"\" Normalizes the data using StandardScaler() function \"\"\"\n",
" \n",
" le = LabelEncoder()\n",
" data['ActivityCode'] = le.fit_transform(data['ActivityCode'])\n",
" \n",
" X = data[['X', 'Y', 'Z']]\n",
" y = data['ActivityCode']\n",
" \n",
" scaler = StandardScaler()\n",
" X = scaler.fit_transform(X)\n",
" \n",
" scaled_df = pd.DataFrame(data = X, columns = ['X', 'Y', 'Z'])\n",
" scaled_df['ActivityCode'] = y.values\n",
" \n",
" return scaled_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Decodes the activity labels and stores them in a dictionary"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def activity_dictionary(dataframe): \n",
" \"\"\" Decodes the activity labels and stores them in the dictionary \"\"\"\n",
"\n",
" activity_labels = dataframe[\"ActivityCode\"]\n",
" le = LabelEncoder()\n",
" activity_indices = le.fit_transform(activity_labels)\n",
" mapped_labels = dict(zip(le.transform(le.classes_), le.classes_))\n",
" \n",
" return mapped_labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Preprocesses the data using clean() and scale() functions"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def preprocess_data(dataframe): \n",
" \"\"\" Preprocesses the data using clean() and scale() functions \"\"\"\n",
"\n",
" cleaned_df = clean_data(dataframe)\n",
" \n",
" return scale_data(cleaned_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Prints the cross validation report"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def crossval_report(accuracy_per_fold, loss_per_fold):\n",
" \"\"\" Prints the cross validation report \"\"\"\n",
"\n",
" print('------------------------------------------------------------------------')\n",
" print('Score per fold')\n",
" for i in range(10):\n",
" print('------------------------------------------------------------------------')\n",
" print(f'> Fold {i+1} - Loss: {loss_per_fold[i]:.4f} - Accuracy: {accuracy_per_fold[i]:.4f}%')\n",
" print('------------------------------------------------------------------------')\n",
" print('Average scores for all folds:')\n",
" print(f'> Accuracy: {np.mean(accuracy_per_fold):.4f} (+- {np.std(accuracy_per_fold):.4f})')\n",
" print(f'> Loss: {np.mean(loss_per_fold):.4f}')\n",
" print('------------------------------------------------------------------------')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Plots training & validation accuracy values"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def plot_learningCurve(history, epochs):\n",
" \"\"\" Plots training & validation accuracy values \"\"\"\n",
"\n",
" epoch_range = range(1, epochs+1)\n",
" plt.plot(epoch_range, history.history['accuracy'])\n",
" plt.plot(epoch_range, history.history[\"val_accuracy\"])\n",
" plt.title('Model accuracy')\n",
" plt.ylabel('Accuracy')\n",
" plt.xlabel('Epoch')\n",
" plt.legend(['Train', 'Val'], loc='lower right')\n",
" plt.show()\n",
" \n",
" \"\"\" Plots training & validation loss values \"\"\"\n",
" \n",
" plt.plot(epoch_range, history.history['loss'])\n",
" plt.plot(epoch_range, history.history['val_loss'])\n",
" plt.title('Model loss')\n",
" plt.ylabel('Loss')\n",
" plt.xlabel('Epoch')\n",
" plt.legend(['Train', 'Val'], loc='upper right')\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Divides data into 2D frames\n",
"###### To write this code cell, we used part of this tutorial: https://www.youtube.com/watch?v=lUI6VMj43PE&t=2112s"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Divides data into 2D frames \"\"\"\n",
"\n",
"frequency = 20 # Based on Hertz\n",
"time_period = 10 # Based on Second\n",
"frame_size = frequency * time_period\n",
"step_size = frame_size # In order not to have an overlap\n",
"\n",
"def get_frames(df, frame_size, step_size):\n",
" n_features = 3\n",
" frames = []\n",
" labels = []\n",
" for i in range(0, len(df) - frame_size, step_size):\n",
" x = df['X'].values[i: i + frame_size]\n",
" y = df['Y'].values[i: i + frame_size]\n",
" z = df['Z'].values[i: i + frame_size]\n",
" \n",
" label = stats.mode(df['ActivityCode'][i: i + frame_size])[0][0]\n",
" frames.append([x, y, z])\n",
" labels.append(label)\n",
"\n",
" frames = np.asarray(frames).reshape(-1, frame_size, n_features)\n",
" labels = np.asarray(labels)\n",
"\n",
" return frames, labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Builds the model (the Convolutional Neural Network)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def get_model():\n",
" \"\"\" Builds the model (the Convolutional Neural Network) \"\"\"\n",
" \n",
" # Defines model\n",
" model = Sequential()\n",
" model.add(Conv2D(64, (2, 2), activation = 'relu', input_shape = inputs[0].shape))\n",
"# model.add(Dropout(0.1))\n",
"# model.add(Conv2D(64, (2, 2), activation='relu'))\n",
"# model.add(Dropout(0.2))\n",
" model.add(MaxPooling2D(pool_size=2))\n",
" model.add(Flatten())\n",
" model.add(Dense(128, activation = 'relu'))\n",
" model.add(Dense(128, activation = 'relu'))\n",
" model.add(Dropout(0.5))\n",
" model.add(Dense(18, activation='softmax'))\n",
"\n",
" # Compiles model\n",
" model.compile(optimizer=Adam(learning_rate = 0.001), \n",
" loss = 'sparse_categorical_crossentropy', \n",
" metrics = ['accuracy'])\n",
" \n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Phone Accelerometer"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Phone Accelerometer \"\"\"\n",
"\n",
"num_folds = 10\n",
"kfold = KFold(n_splits=num_folds, shuffle=True)\n",
"\n",
"phone_accel_accuracy_per_fold = {}\n",
"phone_accel_loss_per_fold = {}\n",
"\n",
"phone_accel_accuracy = {}\n",
"phone_accel_precision = {}\n",
"phone_accel_recall = {}\n",
"phone_accel_f1 = {}\n",
"\n",
"phone_accel_matrix = {}\n",
"phone_accel_activity_accuracy = {}\n",
"phone_accel_classification_reports={}\n",
"\n",
"fold_number = 1\n",
"\n",
"for subjectid, file in enumerate(phone_accel_file_paths[:]):\n",
" subjectid = file.split(\"_\")[1]\n",
"\n",
" data = pd.read_csv(file, verbose=False)\n",
"\n",
" activity_labels = list(activity_dictionary(data).values())\n",
"\n",
" processed_data = preprocess_data(data)\n",
"\n",
" X, y = get_frames(processed_data, frame_size, step_size)\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)\n",
"\n",
" # Makes the input data form 4-Dimensional\n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
"\n",
" inputs = np.concatenate((X_train, X_test), axis=0)\n",
" targets = np.concatenate((y_train, y_test), axis=0)\n",
"\n",
" acc_per_fold = []\n",
" loss_per_fold = [] \n",
" print(\"#########################################################################################\")\n",
" print(subjectid)\n",
" fold_no = 1\n",
" for train, test in kfold.split(inputs, targets):\n",
" model = get_model()\n",
"\n",
" history = model.fit(inputs[train], targets[train], batch_size=128, epochs=70, validation_split=0.2, verbose=1)\n",
"\n",
" print('------------------------------------------------------------------------')\n",
" print(f'Training for fold {fold_no} ...')\n",
"\n",
" scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
"\n",
" print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
" acc_per_fold.append(scores[1] * 100)\n",
" loss_per_fold.append(scores[0])\n",
"\n",
" # Increases fold number\n",
" fold_no = fold_no + 1\n",
"\n",
" phone_accel_accuracy_per_fold[subjectid] = acc_per_fold\n",
" phone_accel_loss_per_fold[subjectid] = loss_per_fold\n",
"\n",
" y_true = targets[test]\n",
" y_pred = model.predict_classes(inputs[test], verbose=0)\n",
"\n",
" # Accuracy: (tp + tn) / (p + n)\n",
" phone_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_f1[subjectid] = dict(zip(activity_labels, f1)) \n",
" \n",
"# phone_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
"# matrix = confusion_matrix(y_test, y_pred)\n",
"# phone_accel_matrix[subjectid] = matrix\n",
" \n",
"# accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
"# phone_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### The cross validation report"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------------------\n",
"Score per fold\n",
"------------------------------------------------------------------------\n",
"> Fold 1 - Loss: 0.2271 - Accuracy: 93.7500%\n",
"------------------------------------------------------------------------\n",
"> Fold 2 - Loss: 0.2516 - Accuracy: 95.8477%\n",
"------------------------------------------------------------------------\n",
"> Fold 3 - Loss: 0.1782 - Accuracy: 96.1938%\n",
"------------------------------------------------------------------------\n",
"> Fold 4 - Loss: 0.1891 - Accuracy: 94.8097%\n",
"------------------------------------------------------------------------\n",
"> Fold 5 - Loss: 0.2165 - Accuracy: 95.5017%\n",
"------------------------------------------------------------------------\n",
"> Fold 6 - Loss: 0.2196 - Accuracy: 95.1557%\n",
"------------------------------------------------------------------------\n",
"> Fold 7 - Loss: 0.2086 - Accuracy: 96.5398%\n",
"------------------------------------------------------------------------\n",
"> Fold 8 - Loss: 0.1336 - Accuracy: 96.5398%\n",
"------------------------------------------------------------------------\n",
"> Fold 9 - Loss: 0.1705 - Accuracy: 96.8858%\n",
"------------------------------------------------------------------------\n",
"> Fold 10 - Loss: 0.1841 - Accuracy: 95.1557%\n",
"------------------------------------------------------------------------\n",
"Average scores for all folds:\n",
"> Accuracy: 95.6380 (+- 0.9094)\n",
"> Loss: 0.1979\n",
"------------------------------------------------------------------------\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_learningCurve(history, 70)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### In order to prevent overfitting, we applied dropout (0.5) before the last layer of the network"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_learningCurve(history, 70)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Phone Gyroscope"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Phone Gyroscope \"\"\"\n",
"\n",
"num_folds = 10\n",
"kfold = KFold(n_splits=num_folds, shuffle=True)\n",
"\n",
"phone_gyro_accuracy_per_fold = {}\n",
"phone_gyro_loss_per_fold = {}\n",
"\n",
"phone_gyro_accuracy = {}\n",
"phone_gyro_precision = {}\n",
"phone_gyro_recall = {}\n",
"phone_gyro_f1 = {}\n",
"\n",
"phone_gyro_matrix = {}\n",
"phone_gyro_activity_accuracy = {}\n",
"phone_gyro_classification_reports={}\n",
"\n",
"\n",
"fold_number = 1\n",
"\n",
"for subjectid, file in enumerate(phone_gyro_file_paths[:]):\n",
" subjectid = file.split(\"_\")[1]\n",
"\n",
" data = pd.read_csv(file, verbose=False)\n",
"\n",
" activity_labels = list(activity_dictionary(data).values())\n",
"\n",
" processed_data = preprocess_data(data)\n",
"\n",
" X, y = get_frames(processed_data, frame_size, step_size)\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)\n",
"\n",
" # Makes the input data form 4-Dimensional\n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
"\n",
" inputs = np.concatenate((X_train, X_test), axis=0)\n",
" targets = np.concatenate((y_train, y_test), axis=0)\n",
"\n",
" acc_per_fold = []\n",
" loss_per_fold = [] \n",
" print(\"#########################################################################################\")\n",
" print(subjectid)\n",
" fold_no = 1\n",
" for train, test in kfold.split(inputs, targets):\n",
" model = get_model()\n",
"\n",
" history = model.fit(inputs[train], targets[train], batch_size=128, epochs=70, validation_split=0.2, verbose=1)\n",
"\n",
" print('------------------------------------------------------------------------')\n",
" print(f'Training for fold {fold_no} ...')\n",
"\n",
" scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
"\n",
" print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
" acc_per_fold.append(scores[1] * 100)\n",
" loss_per_fold.append(scores[0])\n",
"\n",
" # Increases fold number\n",
" fold_no = fold_no + 1\n",
"\n",
" phone_gyro_accuracy_per_fold[subjectid] = acc_per_fold\n",
" phone_gyro_loss_per_fold[subjectid] = loss_per_fold\n",
"\n",
" y_true = targets[test]\n",
" y_pred = model.predict_classes(inputs[test], verbose=0)\n",
"\n",
" # Accuracy: (tp + tn) / (p + n)\n",
" phone_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_f1[subjectid] = dict(zip(activity_labels, f1)) \n",
" \n",
"# phone_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
"# matrix = confusion_matrix(y_test, y_pred)\n",
"# phone_gyro_matrix[subjectid] = matrix\n",
" \n",
"# accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
"# phone_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### The cross validation report"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------------------\n",
"Score per fold\n",
"------------------------------------------------------------------------\n",
"> Fold 1 - Loss: 0.7301 - Accuracy: 88.1944%\n",
"------------------------------------------------------------------------\n",
"> Fold 2 - Loss: 0.9662 - Accuracy: 87.5433%\n",
"------------------------------------------------------------------------\n",
"> Fold 3 - Loss: 0.9321 - Accuracy: 85.4671%\n",
"------------------------------------------------------------------------\n",
"> Fold 4 - Loss: 0.9679 - Accuracy: 87.5433%\n",
"------------------------------------------------------------------------\n",
"> Fold 5 - Loss: 0.9317 - Accuracy: 87.8893%\n",
"------------------------------------------------------------------------\n",
"> Fold 6 - Loss: 1.1612 - Accuracy: 86.8512%\n",
"------------------------------------------------------------------------\n",
"> Fold 7 - Loss: 0.8738 - Accuracy: 87.5433%\n",
"------------------------------------------------------------------------\n",
"> Fold 8 - Loss: 0.8802 - Accuracy: 87.1972%\n",
"------------------------------------------------------------------------\n",
"> Fold 9 - Loss: 0.9409 - Accuracy: 89.2734%\n",
"------------------------------------------------------------------------\n",
"> Fold 10 - Loss: 0.7402 - Accuracy: 86.1592%\n",
"------------------------------------------------------------------------\n",
"Average scores for all folds:\n",
"> Accuracy: 87.3662 (+- 1.0024)\n",
"> Loss: 0.9124\n",
"------------------------------------------------------------------------\n"
]
}
],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Watch Accelerometer"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# \"\"\" Watch Accelerometer \"\"\"\n",
"\n",
"num_folds = 10\n",
"kfold = KFold(n_splits=num_folds, shuffle=True)\n",
"\n",
"watch_accel_accuracy_per_fold = {}\n",
"watch_accel_loss_per_fold = {}\n",
"\n",
"watch_accel_accuracy = {}\n",
"watch_accel_precision = {}\n",
"watch_accel_recall = {}\n",
"watch_accel_f1 = {}\n",
"\n",
"watch_accel_matrix = {}\n",
"watch_accel_activity_accuracy = {}\n",
"watch_accel_classification_reports={}\n",
"\n",
"fold_number = 1\n",
"\n",
"for subjectid, file in enumerate(watch_accel_file_paths[:]):\n",
" subjectid = file.split(\"_\")[1]\n",
"\n",
" data = pd.read_csv(file, verbose=False)\n",
"\n",
" activity_labels = list(activity_dictionary(data).values())\n",
"\n",
" processed_data = preprocess_data(data)\n",
"\n",
" X, y = get_frames(processed_data, frame_size, step_size)\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)\n",
"\n",
" # Makes the input data form 4-Dimensional\n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
"\n",
" inputs = np.concatenate((X_train, X_test), axis=0)\n",
" targets = np.concatenate((y_train, y_test), axis=0)\n",
"\n",
" acc_per_fold = []\n",
" loss_per_fold = [] \n",
" print(\"#########################################################################################\")\n",
" print(subjectid)\n",
" fold_no = 1\n",
" for train, test in kfold.split(inputs, targets):\n",
" model = get_model()\n",
"\n",
" history = model.fit(inputs[train], targets[train], batch_size=128, epochs=2, validation_split=0.2, verbose=1)\n",
"\n",
" print('------------------------------------------------------------------------')\n",
" print(f'Training for fold {fold_no} ...')\n",
"\n",
" scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
"\n",
" print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
" acc_per_fold.append(scores[1] * 100)\n",
" loss_per_fold.append(scores[0])\n",
"\n",
" # Increases fold number\n",
" fold_no = fold_no + 1\n",
"\n",
" watch_accel_accuracy_per_fold[subjectid] = acc_per_fold\n",
" watch_accel_loss_per_fold[subjectid] = loss_per_fold\n",
"\n",
" y_true = targets[test]\n",
" y_pred = model.predict_classes(inputs[test], verbose=0)\n",
"\n",
" # Accuracy: (tp + tn) / (p + n)\n",
" watch_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_f1[subjectid] = dict(zip(activity_labels, f1)) \n",
" \n",
"# watch_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
"# matrix = confusion_matrix(y_test, y_pred)\n",
"# watch_accel_matrix[subjectid] = matrix\n",
" \n",
"# accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
"# watch_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### The cross validation report"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------------------\n",
"Score per fold\n",
"------------------------------------------------------------------------\n",
"> Fold 1 - Loss: 0.2169 - Accuracy: 95.8730%\n",
"------------------------------------------------------------------------\n",
"> Fold 2 - Loss: 0.2066 - Accuracy: 94.9367%\n",
"------------------------------------------------------------------------\n",
"> Fold 3 - Loss: 0.1725 - Accuracy: 95.2532%\n",
"------------------------------------------------------------------------\n",
"> Fold 4 - Loss: 0.2720 - Accuracy: 94.3038%\n",
"------------------------------------------------------------------------\n",
"> Fold 5 - Loss: 0.2252 - Accuracy: 94.6203%\n",
"------------------------------------------------------------------------\n",
"> Fold 6 - Loss: 0.2351 - Accuracy: 93.0380%\n",
"------------------------------------------------------------------------\n",
"> Fold 7 - Loss: 0.2449 - Accuracy: 95.8861%\n",
"------------------------------------------------------------------------\n",
"> Fold 8 - Loss: 0.2657 - Accuracy: 93.6709%\n",
"------------------------------------------------------------------------\n",
"> Fold 9 - Loss: 0.3390 - Accuracy: 94.3038%\n",
"------------------------------------------------------------------------\n",
"> Fold 10 - Loss: 0.2642 - Accuracy: 93.3544%\n",
"------------------------------------------------------------------------\n",
"Average scores for all folds:\n",
"> Accuracy: 94.5240 (+- 0.9374)\n",
"> Loss: 0.2442\n",
"------------------------------------------------------------------------\n"
]
}
],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Watch Gyroscope"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Watch Gyroscope \"\"\"\n",
"\n",
"num_folds = 10\n",
"kfold = KFold(n_splits=num_folds, shuffle=True)\n",
"\n",
"watch_gyro_accuracy_per_fold = {}\n",
"watch_gyro_loss_per_fold = {}\n",
"\n",
"watch_gyro_accuracy = {}\n",
"watch_gyro_precision = {}\n",
"watch_gyro_recall = {}\n",
"watch_gyro_f1 = {}\n",
"\n",
"watch_gyro_matrix = {}\n",
"watch_gyro_activity_accuracy = {}\n",
"watch_gyro_classification_reports={}\n",
"\n",
"fold_number = 1\n",
"\n",
"for subjectid, file in enumerate(watch_gyro_file_paths[:]):\n",
" subjectid = file.split(\"_\")[1]\n",
"\n",
" data = pd.read_csv(file, verbose=False)\n",
"\n",
" activity_labels = list(activity_dictionary(data).values())\n",
"\n",
" processed_data = preprocess_data(data)\n",
"\n",
" X, y = get_frames(processed_data, frame_size, step_size)\n",
"\n",
" X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)\n",
"\n",
" # Makes the input data form 4-Dimensional\n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
"\n",
" inputs = np.concatenate((X_train, X_test), axis=0)\n",
" targets = np.concatenate((y_train, y_test), axis=0)\n",
"\n",
" acc_per_fold = []\n",
" loss_per_fold = [] \n",
" print(\"#########################################################################################\")\n",
" print(subjectid)\n",
" fold_no = 1\n",
" for train, test in kfold.split(inputs, targets):\n",
" model = get_model()\n",
"\n",
" history = model.fit(inputs[train], targets[train], batch_size=128, epochs=70, validation_split=0.2, verbose=1)\n",
"\n",
" print('------------------------------------------------------------------------')\n",
" print(f'Training for fold {fold_no} ...')\n",
"\n",
" scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
"\n",
" print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
" acc_per_fold.append(scores[1] * 100)\n",
" loss_per_fold.append(scores[0])\n",
"\n",
" # Increases fold number\n",
" fold_no = fold_no + 1\n",
"\n",
" watch_gyro_accuracy_per_fold[subjectid] = acc_per_fold\n",
" watch_gyro_loss_per_fold[subjectid] = loss_per_fold\n",
"\n",
" y_true = targets[test]\n",
" y_pred = model.predict_classes(inputs[test], verbose=0)\n",
"\n",
" # Accuracy: (tp + tn) / (p + n)\n",
" watch_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_f1[subjectid] = dict(zip(activity_labels, f1)) \n",
" \n",
"# watch_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
"# matrix = confusion_matrix(y_test, y_pred)\n",
"# watch_gyro_matrix[subjectid] = matrix\n",
" \n",
"# accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
"# watch_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### The cross validation report"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------------------------\n",
"Score per fold\n",
"------------------------------------------------------------------------\n",
"> Fold 1 - Loss: 0.4807 - Accuracy: 89.8413%\n",
"------------------------------------------------------------------------\n",
"> Fold 2 - Loss: 0.3752 - Accuracy: 91.4557%\n",
"------------------------------------------------------------------------\n",
"> Fold 3 - Loss: 0.5942 - Accuracy: 90.8228%\n",
"------------------------------------------------------------------------\n",
"> Fold 4 - Loss: 0.6152 - Accuracy: 90.1899%\n",
"------------------------------------------------------------------------\n",
"> Fold 5 - Loss: 0.5313 - Accuracy: 91.1392%\n",
"------------------------------------------------------------------------\n",
"> Fold 6 - Loss: 0.5823 - Accuracy: 89.2405%\n",
"------------------------------------------------------------------------\n",
"> Fold 7 - Loss: 0.4954 - Accuracy: 90.1899%\n",
"------------------------------------------------------------------------\n",
"> Fold 8 - Loss: 0.6890 - Accuracy: 88.9241%\n",
"------------------------------------------------------------------------\n",
"> Fold 9 - Loss: 0.6603 - Accuracy: 88.9241%\n",
"------------------------------------------------------------------------\n",
"> Fold 10 - Loss: 0.6202 - Accuracy: 90.1899%\n",
"------------------------------------------------------------------------\n",
"Average scores for all folds:\n",
"> Accuracy: 90.0917 (+- 0.8388)\n",
"> Loss: 0.5644\n",
"------------------------------------------------------------------------\n"
]
}
],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Accuracy obtained using different values of learning rate"
]
},
{
"cell_type": "code",
"execution_count": 424,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>Learning Rate</th>\n",
" <th>0.0001</th>\n",
" <th>0.0010</th>\n",
" <th>0.0100</th>\n",
" <th>10.0000</th>\n",
" <th>1.0000</th>\n",
" <th>10.0000</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Accuracy</th>\n",
" <td>79.93</td>\n",
" <td>95.96</td>\n",
" <td>85.9</td>\n",
" <td>30.1</td>\n",
" <td>10.53</td>\n",
" <td>5.88</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Learning Rate 0.0001 0.0010 0.0100 10.0000 1.0000 10.0000\n",
"Accuracy 79.93 95.96 85.9 30.1 10.53 5.88"
]
},
"execution_count": 424,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learning_rates = [1e-4, 1e-3, 1e-2, 1e1, 1.0, 10.0]\n",
"accuracies = [79.93, 95.96, 85.9, 30.10, 10.53, 5.88]\n",
"data = {'Learning Rate': learning_rates, 'Accuracy': accuracies}\n",
"df = pd.DataFrame(data=data)\n",
"df.set_index('Learning Rate', inplace=True)\n",
"df.transpose()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Plots training accuracy values w.r.t learning rate"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"def plot_learningCurve():\n",
" \"\"\" Plots training accuracy values w.r.t learning rate \"\"\"\n",
" \n",
" x = [-4, -3, -2, -1, 0, 1]\n",
" y = [79.93, 95.96, 85.9, 30.10, 10.53, 5.98]\n",
" \n",
" plt.plot(x, y, color='green', lw=2, linestyle='--', marker='D', markerfacecolor='red', markeredgecolor='red', markersize=5)\n",
" plt.title('Model accuracy w.r.t learning rate', fontweight='bold', fontsize=15)\n",
" plt.xlabel(\"Learning Rate (10^)\", fontsize=13)\n",
" plt.ylabel(\"Accuracy (%)\", fontsize=13)\n",
" plt.grid(color='b', linestyle='--', linewidth=0.5)\n",
"# plt.xticks([0.1, 0.01, 0.001, 0.0001])\n",
" plt.yticks(y)\n",
"# plt.xlim(1.5, 12.5)\n",
" plt.ylim(0, 100)\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plot_learningCurve()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"activity_dict= {0:\"Walking\",\n",
" 1:\"Jogging\",\n",
" 2:\"Stairs\",\n",
" 3:\"Sitting\",\n",
" 4:\"Standing\",\n",
" 5:\"Typing\",\n",
" 6:\"Brushing Teeth\",\n",
" 7:\"Eating Soup\",\n",
" 8:\"Eating Chips\",\n",
" 9:\"Eating Pasta\",\n",
" 10:\"Drinking from Cup\",\n",
" 11:\"Eating Sandwich\",\n",
" 12:\"Kicking\",\n",
" 13:\"Playing Catch\",\n",
" 14:\"Dribblinlg Basketball\",\n",
" 15:\"Writing\",\n",
" 16:\"Clapping\",\n",
" 17:\"Folding Clothes\"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Plots the confusion Matrix"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x1080 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"y_pred = model.predict_classes(X_test)\n",
"matrix = confusion_matrix(y_test, y_pred)\n",
"matrix_plot = plot_confusion_matrix(conf_mat=matrix, class_names=activity_dict.values(), show_normed=True, figsize=(15,15), cmap=plt.cm.Purples);\n",
"plt.title('Confusion Matrix')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}