661 lines (660 with data), 22.6 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Imports the required libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"\"\"\" Imports the required libraries \"\"\"\n",
"\n",
"# import tensorflow as tf\n",
"import keras\n",
"from keras.models import Sequential\n",
"from keras.optimizers import Adam\n",
"from keras.layers import Dense, Activation, Conv2D, MaxPool2D, MaxPooling2D\n",
"from keras.layers import Flatten, Dropout, BatchNormalization, Reshape\n",
"from keras.utils.vis_utils import plot_model\n",
"\n",
"import os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import scipy.stats as stats\n",
"import matplotlib.pyplot as plt\n",
"from sklearn import preprocessing\n",
"from sklearn.model_selection import train_test_split, cross_val_score, KFold\n",
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
"from mlxtend.plotting import plot_confusion_matrix\n",
"from sklearn.metrics import confusion_matrix, classification_report\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Stores the path of the sensor files in the corresponding list"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Stores the path of the sensor files in the corresponding list \"\"\"\n",
"\n",
"base_path = \"./data/raw_data/\"\n",
"phone_accel_file_paths = []\n",
"phone_gyro_file_paths = []\n",
"watch_accel_file_paths = []\n",
"watch_gyro_file_paths = []\n",
"\n",
"for directories, subdirectories, files in os.walk(base_path):\n",
" for filename in files:\n",
" if \"phone\" in filename and \"accel\" in filename:\n",
" phone_accel_file_paths.append(f\"{base_path}phone/accel/{filename}\")\n",
" elif \"phone\" in filename and \"gyro\" in filename:\n",
" phone_gyro_file_paths.append(f\"{base_path}phone/gyro/{filename}\")\n",
" elif \"watch\" in filename and \"accel\" in filename:\n",
" watch_accel_file_paths.append(f\"{base_path}watch/accel/{filename}\")\n",
" elif \"watch\" in filename and \"gyro\" in filename:\n",
" watch_gyro_file_paths.append(f\"{base_path}watch/gyro/{filename}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Stores the actual name of each activity in the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Stores the actual name of each activity in the dictionary \"\"\"\n",
"\n",
"activity_dict= {\"A\":\"Walking\",\n",
" \"B\":\"Jogging\",\n",
" \"C\":\"Stairs\",\n",
" \"D\":\"Sitting\",\n",
" \"E\":\"Standing\",\n",
" \"F\":\"Typing\",\n",
" \"G\":\"Brushing\",\n",
" \"H\":\"Eat Soup\",\n",
" \"I\":\"Eat Chips\",\n",
" \"J\":\"Eat Pasta\",\n",
" \"K\":\"Drinking\",\n",
" \"L\":\"Eat Sandwich\",\n",
" \"M\":\"Kicking\",\n",
" \"O\":\"Playing\",\n",
" \"P\":\"Dribblinlg\",\n",
" \"Q\":\"Writing\",\n",
" \"R\":\"Clapping\",\n",
" \"S\":\"Folding\"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Removes the columns \"SubjectID\" and \"Timestamp\" from the dataframe"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def clean_data(dataframe):\n",
" \"\"\" Removes the columns \"SubjectID\" and \"Timestamp\" from the dataframe \"\"\"\n",
"\n",
" cleaned_df = dataframe.drop([\"SubjectID\", \"Timestamp\"], axis = 1).copy()\n",
"\n",
" return cleaned_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Normalizes the data using StandardScaler() function"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def scale_data(data):\n",
" \"\"\" Normalizes the data using StandardScaler() function \"\"\"\n",
" \n",
" le = LabelEncoder()\n",
" data['ActivityCode'] = le.fit_transform(data['ActivityCode'])\n",
" \n",
" X = data[['X', 'Y', 'Z']]\n",
" y = data['ActivityCode']\n",
" \n",
" scaler = StandardScaler()\n",
" X = scaler.fit_transform(X)\n",
" \n",
" scaled_df = pd.DataFrame(data = X, columns = ['X', 'Y', 'Z'])\n",
" scaled_df['ActivityCode'] = y.values\n",
" \n",
" return scaled_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Decodes the activity labels and stores them in the dictionary"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def activity_dictionary(dataframe): \n",
" \"\"\" Decodes the activity labels and stores them in the dictionary \"\"\"\n",
"\n",
" activity_labels = dataframe[\"ActivityCode\"]\n",
" le = LabelEncoder()\n",
" activity_indices = le.fit_transform(activity_labels)\n",
" mapped_labels = dict(zip(le.transform(le.classes_), le.classes_))\n",
" \n",
" return mapped_labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Preprocesses the data using balance(), clean(), and scale() functions"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def preprocess_data(dataframe): \n",
" \"\"\" Preprocesses the data using balance(), clean(), and scale() functions \"\"\"\n",
"\n",
" cleaned_df = clean_data(dataframe)\n",
" \n",
" return scale_data(cleaned_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Plots training & validation accuracy values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def plot_learningCurve(history, epochs):\n",
" \"\"\" Plots training & validation accuracy values \"\"\"\n",
"\n",
" epoch_range = range(1, epochs+1)\n",
" plt.plot(epoch_range, history.history['accuracy'])\n",
" plt.plot(epoch_range, history.history[\"val_accuracy\"])\n",
" plt.title('Model accuracy')\n",
" plt.ylabel('Accuracy')\n",
" plt.xlabel('Epoch')\n",
" plt.legend(['Train', 'Val'], loc='lower right')\n",
" plt.show()\n",
" \n",
" \"\"\" Plots training & validation loss values \"\"\"\n",
" \n",
" plt.plot(epoch_range, history.history['loss'])\n",
" plt.plot(epoch_range, history.history['val_loss'])\n",
" plt.title('Model loss')\n",
" plt.ylabel('Loss')\n",
" plt.xlabel('Epoch')\n",
" plt.legend(['Train', 'Val'], loc='upper right')\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Divides data into 2D frames\n",
"###### To write this code cell, we used part of this tutorial: https://www.youtube.com/watch?v=lUI6VMj43PE&t=2112s"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Divides data into 2D frames \"\"\"\n",
"\n",
"frequency = 20 # Based on Hertz\n",
"time_period = 10 # Based on Second\n",
"frame_size = frequency * time_period\n",
"step_size = frame_size # In order not to have an overlap\n",
"\n",
"def get_frames(df, frame_size, step_size):\n",
" n_features = 3\n",
" frames = []\n",
" labels = []\n",
" for i in range(0, len(df) - frame_size, step_size):\n",
" x = df['X'].values[i: i + frame_size]\n",
" y = df['Y'].values[i: i + frame_size]\n",
" z = df['Z'].values[i: i + frame_size]\n",
" \n",
" label = stats.mode(df['ActivityCode'][i: i + frame_size])[0][0]\n",
" frames.append([x, y, z])\n",
" labels.append(label)\n",
"\n",
" frames = np.asarray(frames).reshape(-1, frame_size, n_features)\n",
" labels = np.asarray(labels)\n",
"\n",
" return frames, labels"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Builds the model (the Convolutional Neural Network)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def get_model():\n",
" \"\"\" Builds the model (the Convolutional Neural Network) \"\"\"\n",
" \n",
" # Defines model\n",
" model = Sequential()\n",
" model.add(Conv2D(64, (2, 2), activation = 'relu', input_shape = X_train[0].shape))\n",
"# model.add(Dropout(0.1))\n",
"# model.add(Conv2D(64, (4, 2), activation='relu'))\n",
"# model.add(Dropout(0.2))\n",
" model.add(MaxPooling2D(pool_size=2))\n",
" model.add(Flatten())\n",
" model.add(Dense(64, activation = 'relu'))\n",
"# model.add(Dense(64, activation = 'relu'))\n",
" model.add(Dropout(0.5))\n",
" model.add(Dense(18, activation='softmax'))\n",
"\n",
" # Compiles model\n",
" model.compile(optimizer=Adam(learning_rate = 0.001), \n",
" loss = 'sparse_categorical_crossentropy', \n",
" metrics = ['accuracy'])\n",
" \n",
" return model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Phone Accelerometer"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"\"\"\" Phone Accelerometer \"\"\"\n",
"\n",
"phone_accel_accuracy = {}\n",
"phone_accel_precision = {}\n",
"phone_accel_recall = {}\n",
"phone_accel_f1 = {}\n",
"\n",
"phone_accel_matrix = {}\n",
"phone_accel_activity_accuracy = {}\n",
"phone_accel_classification_reports={}\n",
"\n",
"data = pd.concat(map(pd.read_csv, phone_accel_file_paths))\n",
"\n",
"subjectIDs = data[\"SubjectID\"].unique()\n",
"\n",
"for subjectid in subjectIDs:\n",
" activity_labels = list(activity_dictionary(data).values())\n",
" \n",
" train_data = data[data[\"SubjectID\"] != subjectid]\n",
" test_data = data[data[\"SubjectID\"] == subjectid]\n",
"\n",
" processed_train_data = preprocess_data(train_data)\n",
" processed_test_data = preprocess_data(test_data)\n",
"\n",
" X_train, y_train = get_frames(processed_train_data, frame_size, step_size)\n",
" X_test, y_test = get_frames(processed_train_data, frame_size, step_size)\n",
" \n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
" \n",
" model = get_model()\n",
" \n",
" history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)\n",
"\n",
" scores = model.evaluate(X_test, y_test, verbose=0)\n",
" \n",
" y_true = y_test\n",
" y_pred = model.predict_classes(X_test, verbose=0)\n",
" \n",
" # Accuracy: (tp + tn) / (p + n)\n",
" phone_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_accel_f1[subjectid] = dict(zip(activity_labels, f1))\n",
" \n",
" phone_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
" matrix = confusion_matrix(y_test, y_pred)\n",
" phone_accel_matrix[subjectid] = matrix\n",
" \n",
" accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
" phone_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Phone Gyroscope"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Phone Gyroscope \"\"\"\n",
"\n",
"phone_gyro_accuracy = {}\n",
"phone_gyro_precision = {}\n",
"phone_gyro_recall = {}\n",
"phone_gyro_f1 = {}\n",
"\n",
"phone_gyro_matrix = {}\n",
"phone_gyro_activity_accuracy = {}\n",
"phone_gyro_classification_reports={}\n",
"\n",
"data = pd.concat(map(pd.read_csv, phone_gyro_file_paths))\n",
"\n",
"subjectIDs = data[\"SubjectID\"].unique()\n",
"\n",
"for subjectid in subjectIDs:\n",
" activity_labels = list(activity_dictionary(data).values())\n",
" \n",
" train_data = data[data[\"SubjectID\"] != subjectid]\n",
" test_data = data[data[\"SubjectID\"] == subjectid]\n",
"\n",
" processed_train_data = preprocess_data(train_data)\n",
" processed_test_data = preprocess_data(test_data)\n",
"\n",
" X_train, y_train = get_frames(processed_train_data, frame_size, step_size)\n",
" X_test, y_test = get_frames(processed_train_data, frame_size, step_size)\n",
" \n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
" \n",
" model = get_model()\n",
" \n",
" history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)\n",
"\n",
" scores = model.evaluate(X_test, y_test, verbose=0)\n",
" \n",
" y_true = y_test\n",
" y_pred = model.predict_classes(X_test, verbose=0)\n",
" \n",
" # Accuracy: (tp + tn) / (p + n)\n",
" phone_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" phone_gyro_f1[subjectid] = dict(zip(activity_labels, f1))\n",
" \n",
" phone_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
" matrix = confusion_matrix(y_test, y_pred)\n",
" phone_gyro_matrix[subjectid] = matrix\n",
" \n",
" accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
" phone_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Watch Accelerometer"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Watch Accelerometer \"\"\"\n",
"\n",
"watch_accel_accuracy = {}\n",
"watch_accel_precision = {}\n",
"watch_accel_recall = {}\n",
"watch_accel_f1 = {}\n",
"\n",
"watch_accel_matrix = {}\n",
"watch_accel_activity_accuracy = {}\n",
"watch_accel_classification_reports={}\n",
"\n",
"data = pd.concat(map(pd.read_csv, watch_accel_file_paths))\n",
"\n",
"subjectIDs = data[\"SubjectID\"].unique()\n",
"\n",
"for subjectid in subjectIDs:\n",
" activity_labels = list(activity_dictionary(data).values())\n",
" \n",
" train_data = data[data[\"SubjectID\"] != subjectid]\n",
" test_data = data[data[\"SubjectID\"] == subjectid]\n",
"\n",
" processed_train_data = preprocess_data(train_data)\n",
" processed_test_data = preprocess_data(test_data)\n",
"\n",
" X_train, y_train = get_frames(processed_train_data, frame_size, step_size)\n",
" X_test, y_test = get_frames(processed_train_data, frame_size, step_size)\n",
" \n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
" \n",
" model = get_model()\n",
" \n",
" history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)\n",
"\n",
" scores = model.evaluate(X_test, y_test, verbose=0)\n",
" \n",
" y_true = y_test\n",
" y_pred = model.predict_classes(X_test, verbose=0)\n",
" \n",
" # Accuracy: (tp + tn) / (p + n)\n",
" watch_accel_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_accel_f1[subjectid] = dict(zip(activity_labels, f1))\n",
" \n",
" watch_accel_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
" matrix = confusion_matrix(y_test, y_pred)\n",
" watch_accel_matrix[subjectid] = matrix\n",
" \n",
" accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
" watch_accel_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Watch Gyroscope"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"\"\"\" Watch Gyroscope \"\"\"\n",
"\n",
"watch_gyro_accuracy = {}\n",
"watch_gyro_precision = {}\n",
"watch_gyro_recall = {}\n",
"watch_gyro_f1 = {}\n",
"\n",
"watch_gyro_matrix = {}\n",
"watch_gyro_activity_accuracy = {}\n",
"watch_gyro_classification_reports={}\n",
"\n",
"data = pd.concat(map(pd.read_csv, watch_gyro_file_paths))\n",
"\n",
"subjectIDs = data[\"SubjectID\"].unique()\n",
"\n",
"for subjectid in subjectIDs:\n",
" activity_labels = list(activity_dictionary(data).values())\n",
" \n",
" train_data = data[data[\"SubjectID\"] != subjectid]\n",
" test_data = data[data[\"SubjectID\"] == subjectid]\n",
"\n",
" processed_train_data = preprocess_data(train_data)\n",
" processed_test_data = preprocess_data(test_data)\n",
"\n",
" X_train, y_train = get_frames(processed_train_data, frame_size, step_size)\n",
" X_test, y_test = get_frames(processed_train_data, frame_size, step_size)\n",
" \n",
" X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 3, 1)\n",
" X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 3, 1)\n",
" \n",
" model = get_model()\n",
" \n",
" history = model.fit(X_train, y_train, batch_size=1, epochs=70, verbose=1)\n",
"\n",
" scores = model.evaluate(X_test, y_test, verbose=0)\n",
" \n",
" y_true = y_test\n",
" y_pred = model.predict_classes(X_test, verbose=0)\n",
" \n",
" # Accuracy: (tp + tn) / (p + n)\n",
" watch_gyro_accuracy[subjectid] = accuracy_score(y_true, y_pred)\n",
"\n",
" # Precision tp / (tp + fp)\n",
" precision = precision_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_precision[subjectid] = dict(zip(activity_labels, precision))\n",
"\n",
" # Recall: tp / (tp + fn)\n",
" recall = recall_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_recall[subjectid] = dict(zip(activity_labels, recall))\n",
"\n",
" # F1: 2 tp / (2 tp + fp + fn)\n",
" f1 = f1_score(y_true, y_pred, average=None, zero_division=1)\n",
" watch_gyro_f1[subjectid] = dict(zip(activity_labels, f1))\n",
" \n",
" watch_gyro_classification_reports[subjectid] = classification_report(y_test, y_pred, zero_division=1)\n",
" \n",
" matrix = confusion_matrix(y_test, y_pred)\n",
" watch_gyro_matrix[subjectid] = matrix\n",
" \n",
" accu_per_class = matrix.diagonal()/ matrix.sum(axis=1)\n",
" watch_gyro_activity_accuracy[subjectid] = dict(zip(activity_labels, accu_per_class))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}