[c1ec9e]: / code / CNN_Personal_TransformedData.ipynb

Download this file

967 lines (966 with data), 157.1 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Imports the required libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "\"\"\" Imports the required libraries \"\"\"\n",
    "\n",
    "# import tensorflow as tf\n",
    "import keras\n",
    "from keras.models import Sequential\n",
    "from keras.optimizers import Adam\n",
    "from keras.layers import Dense, Activation, Conv1D, MaxPooling1D, MaxPooling2D\n",
    "from keras.layers import Flatten, Dropout, BatchNormalization, Reshape\n",
    "from keras.utils.vis_utils import plot_model\n",
    "from keras.wrappers.scikit_learn import KerasClassifier\n",
    "\n",
    "import os\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "from mlxtend.plotting import plot_confusion_matrix\n",
    "\n",
    "import sklearn\n",
    "from sklearn.model_selection import train_test_split, cross_val_score, KFold\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Stores the path of the sensor files in the corresponding list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Stores the path of the sensor files in the corresponding list \"\"\"\n",
    "\n",
    "base_path = \"./data/transformed_data/\"\n",
    "phone_accel_file_paths = []\n",
    "phone_gyro_file_paths = []\n",
    "watch_accel_file_paths = []\n",
    "watch_gyro_file_paths = []\n",
    "\n",
    "for directories, subdirectories, files in os.walk(base_path):\n",
    "    for filename in files:\n",
    "        if \"phone\" in filename and \"accel\" in filename:\n",
    "            phone_accel_file_paths.append(f\"{base_path}phone/accel/{filename}\")\n",
    "        elif \"phone\" in filename and \"gyro\" in filename:\n",
    "            phone_gyro_file_paths.append(f\"{base_path}phone/gyro/{filename}\")\n",
    "        elif \"watch\" in filename and \"accel\" in filename:\n",
    "            watch_accel_file_paths.append(f\"{base_path}watch/accel/{filename}\")\n",
    "        elif \"watch\" in filename and \"gyro\" in filename:\n",
    "            watch_gyro_file_paths.append(f\"{base_path}watch/gyro/{filename}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Stores the actual name of each activity in the dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Stores the actual name of each activity in the dictionary \"\"\"\n",
    "\n",
    "activity_dict= {\"A\":\"Walking\",\n",
    "                \"B\":\"Jogging\",\n",
    "                \"C\":\"Stairs\",\n",
    "                \"D\":\"Sitting\",\n",
    "                \"E\":\"Standing\",\n",
    "                \"F\":\"Typing\",\n",
    "                \"G\":\"Brushing\",\n",
    "                \"H\":\"Eat Soup\",\n",
    "                \"I\":\"Eat Chips\",\n",
    "                \"J\":\"Eat Pasta\",\n",
    "                \"K\":\"Drinking\",\n",
    "                \"L\":\"Eat Sandwich\",\n",
    "                \"M\":\"Kicking\",\n",
    "                \"O\":\"Playing\",\n",
    "                \"P\":\"Dribblinlg\",\n",
    "                \"Q\":\"Writing\",\n",
    "                \"R\":\"Clapping\",\n",
    "                \"S\":\"Folding\"}\n",
    "\n",
    "def get_key(val): \n",
    "    for key, value in activity_dict.items(): \n",
    "        if val == value: \n",
    "            return key "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Plots the distribution of data among activities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import seaborn as sns\n",
    "def plot_distribution(data):\n",
    "    \"\"\" Plots the distribution of data among activities \"\"\"\n",
    "    \n",
    "    activity_df = pd.DataFrame()\n",
    "    activity_df[\"Activity\"] = [activity_dict[item] for item in np.unique(data['ACTIVITY'])]\n",
    "    act_percentages = []\n",
    "\n",
    "    for act in activity_df[\"Activity\"]:\n",
    "        act_percentages.append(len(data[data[\"ACTIVITY\"] == get_key(act)])/len(data)*100)\n",
    "    activity_df[\"Distr. of Data Among Activities %\"] = act_percentages\n",
    "    \n",
    "    sns.set(style=\"whitegrid\")\n",
    "    fig, ax = plt.subplots(figsize=(10, 4))\n",
    "\n",
    "    act = sns.barplot(x=\"Activity\", y=\"Distr. of Data Among Activities %\", data=activity_df)\n",
    "    ax.set_xlabel(\"Activity\", fontsize = 14)\n",
    "    ax.set_ylabel(\"Distr. of Data Among Activities %\", fontsize = 13)\n",
    "#     ax.set_title(\"Distribution of Data Among Activities\", fontsize = 15)\n",
    "    plt.xticks(rotation=90);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Distribution of Data Among Activities"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 720x288 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data = pd.read_csv(\"./data/raw_data/all_data.csv\", verbose=False)\n",
    "plot_distribution(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The data is slightly imbalanced\n",
    "#### In order to balance the data, the first 17 data rows of each activity are taken"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def balance_data(dataframe):\n",
    "    \"\"\"  Takes only the first 17 data rows for each activity \"\"\"\n",
    "\n",
    "    A = dataframe[dataframe['ACTIVITY']=='A'].head(17).copy()\n",
    "    B = dataframe[dataframe['ACTIVITY']=='B'].head(17).copy()\n",
    "    C = dataframe[dataframe['ACTIVITY']=='C'].head(17).copy()\n",
    "    D = dataframe[dataframe['ACTIVITY']=='D'].head(17).copy()\n",
    "    E = dataframe[dataframe['ACTIVITY']=='E'].head(17).copy()\n",
    "    F = dataframe[dataframe['ACTIVITY']=='F'].head(17).copy()\n",
    "    G = dataframe[dataframe['ACTIVITY']=='G'].head(17).copy()\n",
    "    H = dataframe[dataframe['ACTIVITY']=='H'].head(17).copy()\n",
    "    I = dataframe[dataframe['ACTIVITY']=='I'].head(17).copy()\n",
    "    J = dataframe[dataframe['ACTIVITY']=='J'].head(17).copy()\n",
    "    K = dataframe[dataframe['ACTIVITY']=='K'].head(17).copy()\n",
    "    L = dataframe[dataframe['ACTIVITY']=='L'].head(17).copy()\n",
    "    M = dataframe[dataframe['ACTIVITY']=='M'].head(17).copy()\n",
    "    O = dataframe[dataframe['ACTIVITY']=='O'].head(17).copy()\n",
    "    P = dataframe[dataframe['ACTIVITY']=='P'].head(17).copy()\n",
    "    Q = dataframe[dataframe['ACTIVITY']=='Q'].head(17).copy()\n",
    "    R = dataframe[dataframe['ACTIVITY']=='R'].head(17).copy()\n",
    "    S = dataframe[dataframe['ACTIVITY']=='S'].head(17).copy()\n",
    "\n",
    "    balanced_data = pd.DataFrame()\n",
    "    balanced_data = balanced_data.append([A, B, C, D, E, F, G, H, I, J, K, L, M, O, P, Q, R, S], ignore_index=True)\n",
    "\n",
    "    return balanced_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Takes only 43 important features of the dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def clean_data(dataframe):\n",
    "    \"\"\" Removes the columns \"ACTIVITY\" and \"class\" from the dataframe \"\"\"\n",
    "    \"\"\" Takes only 43 important features of the dataframe  \"\"\"\n",
    "    \n",
    "    df = dataframe.drop(['ACTIVITY', 'class'], axis = 1).copy()\n",
    "    x1 = df.loc[:, \"X0\":\"ZSTANDDEV\"]\n",
    "    x2 = df.loc[:, 'RESULTANT']   \n",
    "    cleaned_df = pd.concat([x1, x2], axis=1, join='inner')\n",
    "    \n",
    "    return cleaned_df "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Normalizes the data using StandardScaler() function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def scale_data(data, labels):\n",
    "    \"\"\" Normalizes the data using StandardScaler() function \"\"\"\n",
    "\n",
    "    le = LabelEncoder()\n",
    "    activity_labels = le.fit_transform(labels)\n",
    "    \n",
    "    X_train, X_test, y_train, y_test = train_test_split(data, activity_labels, test_size=0.3, random_state=0)\n",
    "    \n",
    "    scaler = StandardScaler().fit(X_train)\n",
    "    X_train = scaler.transform(X_train)\n",
    "    X_test = scaler.transform(X_test)\n",
    "\n",
    "    return X_train, X_test, y_train, y_test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Decodes the activity labels and stores them in a dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def activity_dictionary(dataframe):    \n",
    "    \"\"\" Decodes the activity labels and stores them in a dictionary \"\"\"\n",
    "\n",
    "    activity_labels = dataframe[\"ACTIVITY\"]\n",
    "    le = LabelEncoder()\n",
    "    activity_indices = le.fit_transform(activity_labels)\n",
    "    mapped_labels = dict(zip(le.transform(le.classes_), le.classes_))\n",
    "    \n",
    "    return mapped_labels"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Preprocesses the data using balance(), clean(), and scale() functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess_data(dataframe):\n",
    "    \"\"\" Preprocesses the data using balance(), clean(), and scale() functions \"\"\"\n",
    "\n",
    "    balanced_df = balance_data(dataframe)\n",
    "    activity_labels = balanced_df[\"ACTIVITY\"]\n",
    "    cleaned_df = clean_data(balanced_df)\n",
    "    \n",
    "    return scale_data(cleaned_df, activity_labels)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Prints the cross validation report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def crossval_report(accuracy_per_fold, loss_per_fold):\n",
    "    \"\"\" Prints the cross validation report \"\"\"\n",
    "    \n",
    "    print('------------------------------------------------------------------------')\n",
    "    print('Score per fold')\n",
    "    for i in range(10):\n",
    "        print('------------------------------------------------------------------------')\n",
    "        print(f'> Fold {i+1} - Loss: {accuracy_per_fold[i]} - Accuracy: {loss_per_fold[i]}%')\n",
    "    print('------------------------------------------------------------------------')\n",
    "    print('Average scores for all folds:')\n",
    "    print(f'> Accuracy: {np.mean(accuracy_per_fold)} (+- {np.std(accuracy_per_fold)})')\n",
    "    print(f'> Loss: {np.mean(loss_per_fold)}')\n",
    "    print('------------------------------------------------------------------------')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Plots training & validation accuracy values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_learningCurve(history, epochs):\n",
    "    \"\"\" Plots training & validation accuracy values \"\"\"\n",
    "\n",
    "    epoch_range = range(1, epochs+1)\n",
    "    plt.plot(epoch_range, history.history['accuracy'])\n",
    "    plt.plot(epoch_range, history.history[\"val_accuracy\"])\n",
    "    plt.title('Model accuracy')\n",
    "    plt.ylabel('Accuracy')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.legend(['Train', 'Val'], loc='lower right')\n",
    "    plt.show()\n",
    "    \n",
    "    \"\"\" Plots training & validation loss values \"\"\"\n",
    "    \n",
    "    plt.plot(epoch_range, history.history['loss'])\n",
    "    plt.plot(epoch_range, history.history['val_loss'])\n",
    "    plt.title('Model loss')\n",
    "    plt.ylabel('Loss')\n",
    "    plt.xlabel('Epoch')\n",
    "    plt.legend(['Train', 'Val'], loc='upper right')\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Builds the model (the Convolutional Neural Network)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_model():\n",
    "    \"\"\" Builds the model (the Convolutional Neural Network) \"\"\"\n",
    "    \n",
    "    # Defines model\n",
    "    model = Sequential()\n",
    "    model.add(Conv1D(filters=43, kernel_size=1, activation='relu', input_shape=(1,43)))\n",
    "#     model.add(Conv1D(filters=64, kernel_size=1, activation='relu'))\n",
    "    model.add(MaxPooling1D(pool_size=1))\n",
    "    model.add(Flatten())\n",
    "#     model.add(Dense(256, activation='relu',name='D1'))\n",
    "    model.add(Dense(128, activation='relu'))\n",
    "    model.add(Dense(128, activation='relu'))\n",
    "    model.add(Dropout(0.5))\n",
    "    model.add(Dense(18, activation='softmax'))\n",
    "\n",
    "    # Compiles model\n",
    "    model.compile(optimizer=Adam(learning_rate = 0.001), \n",
    "                  loss='sparse_categorical_crossentropy', \n",
    "                  metrics=['accuracy'])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Phone Accelerometer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Phone Accelerometer \"\"\"\n",
    "\n",
    "num_folds = 10\n",
    "kfold = KFold(n_splits=num_folds, shuffle=False)\n",
    "\n",
    "phone_accel_accuracy_per_fold={}\n",
    "phone_accel_loss_per_fold={}\n",
    "\n",
    "phone_accel_accuracy={}\n",
    "phone_accel_precision={}\n",
    "phone_accel_recall={}\n",
    "phone_accel_f1={}\n",
    "\n",
    "phone_accel_matrix = {}\n",
    "phone_accel_activity_accuracy = {}\n",
    "phone_accel_classification_reports={}\n",
    "\n",
    "for subjectid, file in enumerate(phone_accel_file_paths[:]):\n",
    "    subjectid = file.split(\"_\")[2]\n",
    "    \n",
    "    data = pd.read_csv(file, verbose=False)\n",
    "    \n",
    "    activity_labels = list(activity_dictionary(data).values())\n",
    "    X_train, X_test, y_train, y_test = preprocess_data(data)\n",
    "    \n",
    "    # Makes the input data form 3-Dimensional\n",
    "    X_train = X_train.reshape(X_train.shape[0], 1, 43)\n",
    "    X_test = X_test.reshape(X_test.shape[0], 1, 43)\n",
    "    \n",
    "    inputs = np.concatenate((X_train, X_test), axis=0)\n",
    "    targets = np.concatenate((y_train, y_test), axis=0)\n",
    "        \n",
    "    acc_per_fold = []\n",
    "    loss_per_fold = []    \n",
    "    print(\"#########################################################################################\")\n",
    "    print(subjectid)\n",
    "    fold_no = 1\n",
    "    for train, test in kfold.split(inputs, targets):\n",
    "        model = get_model()\n",
    "        \n",
    "        history = model.fit(inputs[train], targets[train], batch_size=1, epochs=70, validation_split=0.2, verbose=1)\n",
    "\n",
    "        print('------------------------------------------------------------------------')\n",
    "        print(f'Training for fold {fold_no} ...')\n",
    "\n",
    "        scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
    "\n",
    "        print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
    "        acc_per_fold.append(scores[1] * 100)\n",
    "        loss_per_fold.append(scores[0])\n",
    "\n",
    "        # Increases fold number\n",
    "        fold_no = fold_no + 1\n",
    "        \n",
    "    phone_accel_accuracy_per_fold[subjectid] = acc_per_fold\n",
    "    phone_accel_loss_per_fold[subjectid] = loss_per_fold\n",
    "    \n",
    "    y_true = targets[test]\n",
    "    y_pred = model.predict_classes(inputs[test], verbose=0)\n",
    "\n",
    "    # Accuracy: (tp + tn) / (p + n)\n",
    "    phone_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
    "\n",
    "    # Precision tp / (tp + fp)\n",
    "    precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
    "\n",
    "    # Recall: tp / (tp + fn)\n",
    "    recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
    "\n",
    "    # F1: 2 tp / (2 tp + fp + fn)\n",
    "    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_accel_f1[subjectid] = dict(zip(activity_labels, f1))\n",
    "    \n",
    "#     phone_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
    "    \n",
    "#     matrix = confusion_matrix(y_test, y_pred)\n",
    "#     phone_accel_matrix[subjectid] = matrix\n",
    "    \n",
    "#     accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
    "#     phone_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### The cross validation report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------------------------------------\n",
      "Score per fold\n",
      "------------------------------------------------------------------------\n",
      "> Fold 1 - Loss: 94.18181777000427 - Accuracy: 0.3544494605064392%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 2 - Loss: 92.36363768577576 - Accuracy: 0.4488078521056609%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 3 - Loss: 92.72727370262146 - Accuracy: 0.5173578348891301%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 4 - Loss: 92.00000166893005 - Accuracy: 0.5027308412031694%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 5 - Loss: 92.00000166893005 - Accuracy: 0.5405109715461731%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 6 - Loss: 93.81818175315857 - Accuracy: 0.41282150837508114%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 7 - Loss: 93.478262424469 - Accuracy: 0.4353983595535375%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 8 - Loss: 95.28985619544983 - Accuracy: 0.38435162728031474%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 9 - Loss: 95.652174949646 - Accuracy: 0.22068002890201582%\n",
      "------------------------------------------------------------------------\n",
      "> Fold 10 - Loss: 95.652174949646 - Accuracy: 0.2679230518963026%\n",
      "------------------------------------------------------------------------\n",
      "Average scores for all folds:\n",
      "> Accuracy: 93.7163382768631 (+- 1.37674542221131)\n",
      "> Loss: 0.4085031536257825\n",
      "------------------------------------------------------------------------\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_learningCurve(history, 70)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### In order to prevent overfitting, we applied dropout (0.5) before the last layer of the network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    },
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot_learningCurve(history, 70)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Phone Gyroscope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Phone Gyroscope \"\"\"\n",
    "\n",
    "num_folds = 10\n",
    "kfold = KFold(n_splits=num_folds, shuffle=False)\n",
    "\n",
    "phone_gyro_accuracy_per_fold={}\n",
    "phone_gyro_loss_per_fold={}\n",
    "\n",
    "phone_gyro_accuracy={}\n",
    "phone_gyro_precision={}\n",
    "phone_gyro_recall={}\n",
    "phone_gyro_f1={}\n",
    "\n",
    "phone_gyro_matrix = {}\n",
    "phone_gyro_activity_accuracy = {}\n",
    "phone_gyro_classification_reports={}\n",
    "\n",
    "for subjectid, file in enumerate(phone_gyro_file_paths[:]):\n",
    "    subjectid = file.split(\"_\")[2]\n",
    "    \n",
    "    data = pd.read_csv(file, verbose=False)\n",
    "    \n",
    "    activity_labels = list(activity_dictionary(data).values())\n",
    "    X_train, X_test, y_train, y_test = preprocess_data(data)\n",
    "    \n",
    "    # Makes the input data form 3-Dimensional\n",
    "    X_train = X_train.reshape(X_train.shape[0], 1, 43)\n",
    "    X_test = X_test.reshape(X_test.shape[0], 1, 43)\n",
    "    \n",
    "    inputs = np.concatenate((X_train, X_test), axis=0)\n",
    "    targets = np.concatenate((y_train, y_test), axis=0)\n",
    "        \n",
    "    acc_per_fold = []\n",
    "    loss_per_fold = []    \n",
    "    \n",
    "    fold_no = 1\n",
    "    for train, test in kfold.split(inputs, targets):\n",
    "        model = get_model()\n",
    "        \n",
    "        history = model.fit(inputs[train], targets[train], batch_size=1, epochs=70, validation_split=0.2, verbose=1)\n",
    "\n",
    "        print('------------------------------------------------------------------------')\n",
    "        print(f'Training for fold {fold_no} ...')\n",
    "\n",
    "        scores = model.evaluate(inputs[test], targets[test], verbose=0)\n",
    "\n",
    "        print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
    "        acc_per_fold.append(scores[1] * 100)\n",
    "        loss_per_fold.append(scores[0])\n",
    "\n",
    "        # Increases fold number\n",
    "        fold_no = fold_no + 1\n",
    "        \n",
    "    phone_gyro_accuracy_per_fold[subjectid] = acc_per_fold\n",
    "    phone_gyro_loss_per_fold[subjectid] = loss_per_fold\n",
    "    \n",
    "    y_true = targets[test]\n",
    "    y_pred = model.predict_classes(inputs[test], verbose=0)\n",
    "\n",
    "    # Accuracy: (tp + tn) / (p + n)\n",
    "    phone_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
    "\n",
    "    # Precision tp / (tp + fp)\n",
    "    precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
    "\n",
    "    # Recall: tp / (tp + fn)\n",
    "    recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_gyro_recall[subjectid] = dict(zip(activity_labels, recall))\n",
    "\n",
    "    # F1: 2 tp / (2 tp + fp + fn)\n",
    "    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    phone_gyro_f1[subjectid] = dict(zip(activity_labels, f1))\n",
    "    \n",
    "#     phone_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
    "    \n",
    "#     matrix = confusion_matrix(y_test, y_pred)\n",
    "#     phone_gyro_matrix[subjectid] = matrix\n",
    "    \n",
    "#     accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
    "#     phone_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Watch Accelerometer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Watch Accelerometer \"\"\"\n",
    "\n",
    "num_folds = 10\n",
    "kfold = KFold(n_splits=num_folds, shuffle=False)\n",
    "\n",
    "watch_accel_accuracy_per_fold={}\n",
    "watch_accel_loss_per_fold={}\n",
    "\n",
    "watch_accel_accuracy={}\n",
    "watch_accel_precision={}\n",
    "watch_accel_recall={}\n",
    "watch_accel_f1={}\n",
    "\n",
    "watch_accel_matrix = {}\n",
    "watch_accel_activity_accuracy = {}\n",
    "watch_accel_classification_reports={}\n",
    "\n",
    "for subjectid, file in enumerate(watch_accel_file_paths[:]):\n",
    "    subjectid = file.split(\"_\")[2]\n",
    "    \n",
    "    data = pd.read_csv(file, verbose=False)\n",
    "    \n",
    "    activity_labels = list(activity_dictionary(data).values())\n",
    "    X_train, X_test, y_train, y_test = preprocess_data(data)\n",
    "    \n",
    "    # Makes the input data form 3-Dimensional\n",
    "    X_train = X_train.reshape(X_train.shape[0], 1, 43)\n",
    "    X_test = X_test.reshape(X_test.shape[0], 1, 43)\n",
    "    \n",
    "    inputs = np.concatenate((X_train, X_test), axis=0)\n",
    "    targets = np.concatenate((y_train, y_test), axis=0)\n",
    "        \n",
    "    acc_per_fold = []\n",
    "    loss_per_fold = []    \n",
    "    \n",
    "    fold_no = 1\n",
    "    for train, test in kfold.split(inputs, targets):\n",
    "        model = get_model()\n",
    "        \n",
    "        history = model.fit(inputs[train], targets[train], batch_size=1, epochs=70, validation_split=0.2, verbose=1)\n",
    "\n",
    "        print('------------------------------------------------------------------------')\n",
    "        print(f'Training for fold {fold_no} ...')\n",
    "\n",
    "        scores = model.evaluate(inputs[test], targets[test], verbose=0)\n",
    "\n",
    "        print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
    "        acc_per_fold.append(scores[1] * 100)\n",
    "        loss_per_fold.append(scores[0])\n",
    "\n",
    "        # Increases fold number\n",
    "        fold_no = fold_no + 1\n",
    "        \n",
    "    watch_accel_accuracy_per_fold[subjectid] = acc_per_fold\n",
    "    watch_accel_loss_per_fold[subjectid] = loss_per_fold\n",
    "    \n",
    "    y_true = targets[test]\n",
    "    y_pred = model.predict_classes(inputs[test], verbose=0)\n",
    "\n",
    "    # Accuracy: (tp + tn) / (p + n)\n",
    "    watch_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
    "\n",
    "    # Precision tp / (tp + fp)\n",
    "    precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
    "\n",
    "    # Recall: tp / (tp + fn)\n",
    "    recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
    "\n",
    "    # F1: 2 tp / (2 tp + fp + fn)\n",
    "    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_accel_f1[subjectid] = dict(zip(activity_labels, f1))\n",
    "    \n",
    "#     watch_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
    "    \n",
    "#     matrix = confusion_matrix(y_test, y_pred)\n",
    "#     watch_accel_matrix[subjectid] = matrix\n",
    "    \n",
    "#     accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
    "#     watch_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Watch Gyroscope"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" Watch Gyroscope \"\"\"\n",
    "\n",
    "num_folds = 10\n",
    "kfold = KFold(n_splits=num_folds, shuffle=False)\n",
    "\n",
    "watch_gyro_accuracy_per_fold={}\n",
    "watch_gyro_loss_per_fold={}\n",
    "\n",
    "watch_gyro_accuracy={}\n",
    "watch_gyro_precision={}\n",
    "watch_gyro_recall={}\n",
    "watch_gyro_f1={}\n",
    "\n",
    "watch_gyro_matrix = {}\n",
    "watch_gyro_activity_accuracy = {}\n",
    "watch_gyro_classification_reports={}\n",
    "\n",
    "for subjectid, file in enumerate(watch_gyro_file_paths[:]):\n",
    "    subjectid = file.split(\"_\")[2]\n",
    "    \n",
    "    data = pd.read_csv(file, verbose=False)\n",
    "    \n",
    "    activity_labels = list(activity_dictionary(data).values())\n",
    "    X_train, X_test, y_train, y_test = preprocess_data(data)\n",
    "    \n",
    "    # Makes the input data form 3-Dimensional\n",
    "    X_train = X_train.reshape(X_train.shape[0], 1, 43)\n",
    "    X_test = X_test.reshape(X_test.shape[0], 1, 43)\n",
    "    \n",
    "    inputs = np.concatenate((X_train, X_test), axis=0)\n",
    "    targets = np.concatenate((y_train, y_test), axis=0)\n",
    "        \n",
    "    acc_per_fold = []\n",
    "    loss_per_fold = []    \n",
    "    \n",
    "    fold_no = 1\n",
    "    for train, test in kfold.split(inputs, targets):\n",
    "        model = get_model()\n",
    "        \n",
    "        history = model.fit(inputs[train], targets[train], batch_size=1, epochs=70, validation_split=0.2, verbose=1)\n",
    "\n",
    "        print('------------------------------------------------------------------------')\n",
    "        print(f'Training for fold {fold_no} ...')\n",
    "\n",
    "        scores = model.evaluate(inputs[train], targets[train], verbose=0)\n",
    "\n",
    "        print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')\n",
    "        acc_per_fold.append(scores[1] * 100)\n",
    "        loss_per_fold.append(scores[0])\n",
    "\n",
    "        # Increases fold number\n",
    "        fold_no = fold_no + 1\n",
    "        \n",
    "    watch_gyro_accuracy_per_fold[subjectid] = acc_per_fold\n",
    "    watch_gyro_loss_per_fold[subjectid] = loss_per_fold\n",
    "    \n",
    "    y_true = targets[test]\n",
    "    y_pred = model.predict_classes(inputs[test], verbose=0)\n",
    "\n",
    "    # Accuracy: (tp + tn) / (p + n)\n",
    "    watch_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
    "\n",
    "    # Precision tp / (tp + fp)\n",
    "    precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
    "\n",
    "    # Recall: tp / (tp + fn)\n",
    "    recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_gyro_recal [subjectid] = dict(zip(activity_labels, recall))\n",
    "\n",
    "    # F1: 2 tp / (2 tp + fp + fn)\n",
    "    f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
    "    watch_gyro_f1[subjectid] = dict(zip(activity_labels, f1))\n",
    "    \n",
    "#     watch_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
    "    \n",
    "#     matrix = confusion_matrix(y_test, y_pred)\n",
    "#     watch_gyro_matrix[subjectid] = matrix\n",
    "    \n",
    "#     accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
    "#     watch_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}