--- a +++ b/HAR_PREDICTION_MODELS.ipynb @@ -0,0 +1,1840 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Obtain the train and test data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "File b'UCI_HAR_dataset/csv_files/train.csv' does not exist", + "output_type": "error", + "traceback": [ + "\u001b[0;31m-----------------------------------------------------------------\u001b[0m", + "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-2-f985c27c5009>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'UCI_HAR_dataset/csv_files/train.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mtest\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'UCI_HAR_dataset/csv_files/test.csv'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Downloads/ENTER/envs/py36/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, doublequote, delim_whitespace, low_memory, memory_map, float_precision)\u001b[0m\n\u001b[1;32m 676\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[1;32m 677\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 678\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 679\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 680\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Downloads/ENTER/envs/py36/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 440\u001b[0;31m \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 441\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 442\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Downloads/ENTER/envs/py36/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m 785\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'has_index_names'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 786\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 787\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 788\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 789\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Downloads/ENTER/envs/py36/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, engine)\u001b[0m\n\u001b[1;32m 1012\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'c'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1013\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'c'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1014\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1015\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1016\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'python'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Downloads/ENTER/envs/py36/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, src, **kwds)\u001b[0m\n\u001b[1;32m 1706\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'usecols'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0musecols\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1707\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1708\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1709\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1710\u001b[0m \u001b[0mpassed_names\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnames\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__\u001b[0;34m()\u001b[0m\n", + "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mFileNotFoundError\u001b[0m: File b'UCI_HAR_dataset/csv_files/train.csv' does not exist" + ] + } + ], + "source": [ + "train = pd.read_csv('UCI_HAR_dataset/csv_files/train.csv')\n", + "test = pd.read_csv('UCI_HAR_dataset/csv_files/test.csv')\n", + "print(train.shape, test.shape)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>tBodyAccmeanX</th>\n", + " <th>tBodyAccmeanY</th>\n", + " <th>tBodyAccmeanZ</th>\n", + " <th>tBodyAccstdX</th>\n", + " <th>tBodyAccstdY</th>\n", + " <th>tBodyAccstdZ</th>\n", + " <th>tBodyAccmadX</th>\n", + " <th>tBodyAccmadY</th>\n", + " <th>tBodyAccmadZ</th>\n", + " <th>tBodyAccmaxX</th>\n", + " <th>...</th>\n", + " <th>angletBodyAccMeangravity</th>\n", + " <th>angletBodyAccJerkMeangravityMean</th>\n", + " <th>angletBodyGyroMeangravityMean</th>\n", + " <th>angletBodyGyroJerkMeangravityMean</th>\n", + " <th>angleXgravityMean</th>\n", + " <th>angleYgravityMean</th>\n", + " <th>angleZgravityMean</th>\n", + " <th>subject</th>\n", + " <th>Activity</th>\n", + " <th>ActivityName</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0.288585</td>\n", + " <td>-0.020294</td>\n", + " <td>-0.132905</td>\n", + " <td>-0.995279</td>\n", + " <td>-0.983111</td>\n", + " <td>-0.913526</td>\n", + " <td>-0.995112</td>\n", + " <td>-0.983185</td>\n", + " <td>-0.923527</td>\n", + " <td>-0.934724</td>\n", + " <td>...</td>\n", + " <td>-0.112754</td>\n", + " <td>0.030400</td>\n", + " <td>-0.464761</td>\n", + " <td>-0.018446</td>\n", + " <td>-0.841247</td>\n", + " <td>0.179941</td>\n", + " <td>-0.058627</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>STANDING</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0.278419</td>\n", + " <td>-0.016411</td>\n", + " <td>-0.123520</td>\n", + " <td>-0.998245</td>\n", + " <td>-0.975300</td>\n", + " <td>-0.960322</td>\n", + " <td>-0.998807</td>\n", + " <td>-0.974914</td>\n", + " <td>-0.957686</td>\n", + " <td>-0.943068</td>\n", + " <td>...</td>\n", + " <td>0.053477</td>\n", + " <td>-0.007435</td>\n", + " <td>-0.732626</td>\n", + " <td>0.703511</td>\n", + " <td>-0.844788</td>\n", + " <td>0.180289</td>\n", + " <td>-0.054317</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>STANDING</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0.279653</td>\n", + " <td>-0.019467</td>\n", + " <td>-0.113462</td>\n", + " <td>-0.995380</td>\n", + " <td>-0.967187</td>\n", + " <td>-0.978944</td>\n", + " <td>-0.996520</td>\n", + " <td>-0.963668</td>\n", + " <td>-0.977469</td>\n", + " <td>-0.938692</td>\n", + " <td>...</td>\n", + " <td>-0.118559</td>\n", + " <td>0.177899</td>\n", + " <td>0.100699</td>\n", + " <td>0.808529</td>\n", + " <td>-0.848933</td>\n", + " <td>0.180637</td>\n", + " <td>-0.049118</td>\n", + " <td>1</td>\n", + " <td>5</td>\n", + " <td>STANDING</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>3 rows × 564 columns</p>\n", + "</div>" + ], + "text/plain": [ + " tBodyAccmeanX tBodyAccmeanY tBodyAccmeanZ tBodyAccstdX tBodyAccstdY \\\n", + "0 0.288585 -0.020294 -0.132905 -0.995279 -0.983111 \n", + "1 0.278419 -0.016411 -0.123520 -0.998245 -0.975300 \n", + "2 0.279653 -0.019467 -0.113462 -0.995380 -0.967187 \n", + "\n", + " tBodyAccstdZ tBodyAccmadX tBodyAccmadY tBodyAccmadZ tBodyAccmaxX \\\n", + "0 -0.913526 -0.995112 -0.983185 -0.923527 -0.934724 \n", + "1 -0.960322 -0.998807 -0.974914 -0.957686 -0.943068 \n", + "2 -0.978944 -0.996520 -0.963668 -0.977469 -0.938692 \n", + "\n", + " ... angletBodyAccMeangravity angletBodyAccJerkMeangravityMean \\\n", + "0 ... -0.112754 0.030400 \n", + "1 ... 0.053477 -0.007435 \n", + "2 ... -0.118559 0.177899 \n", + "\n", + " angletBodyGyroMeangravityMean angletBodyGyroJerkMeangravityMean \\\n", + "0 -0.464761 -0.018446 \n", + "1 -0.732626 0.703511 \n", + "2 0.100699 0.808529 \n", + "\n", + " angleXgravityMean angleYgravityMean angleZgravityMean subject Activity \\\n", + "0 -0.841247 0.179941 -0.058627 1 5 \n", + "1 -0.844788 0.180289 -0.054317 1 5 \n", + "2 -0.848933 0.180637 -0.049118 1 5 \n", + "\n", + " ActivityName \n", + "0 STANDING \n", + "1 STANDING \n", + "2 STANDING \n", + "\n", + "[3 rows x 564 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# get X_train and y_train from csv files\n", + "X_train = train.drop(['subject', 'Activity', 'ActivityName'], axis=1)\n", + "y_train = train.ActivityName" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# get X_test and y_test from test csv file\n", + "X_test = test.drop(['subject', 'Activity', 'ActivityName'], axis=1)\n", + "y_test = test.ActivityName" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_train and y_train : ((7352, 561),(7352,))\n", + "X_test and y_test : ((2947, 561),(2947,))\n" + ] + } + ], + "source": [ + "print('X_train and y_train : ({},{})'.format(X_train.shape, y_train.shape))\n", + "print('X_test and y_test : ({},{})'.format(X_test.shape, y_test.shape))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Let's model with our data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Labels that are useful in plotting confusion matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "labels=['LAYING', 'SITTING','STANDING','WALKING','WALKING_DOWNSTAIRS','WALKING_UPSTAIRS']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Function to plot the confusion matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import itertools\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.metrics import confusion_matrix\n", + "plt.rcParams[\"font.family\"] = 'DejaVu Sans'\n", + "\n", + "def plot_confusion_matrix(cm, classes,\n", + " normalize=False,\n", + " title='Confusion matrix',\n", + " cmap=plt.cm.Blues):\n", + " if normalize:\n", + " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", + "\n", + " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", + " plt.title(title)\n", + " plt.colorbar()\n", + " tick_marks = np.arange(len(classes))\n", + " plt.xticks(tick_marks, classes, rotation=90)\n", + " plt.yticks(tick_marks, classes)\n", + "\n", + " fmt = '.2f' if normalize else 'd'\n", + " thresh = cm.max() / 2.\n", + " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", + " plt.text(j, i, format(cm[i, j], fmt),\n", + " horizontalalignment=\"center\",\n", + " color=\"white\" if cm[i, j] > thresh else \"black\")\n", + "\n", + " plt.tight_layout()\n", + " plt.ylabel('True label')\n", + " plt.xlabel('Predicted label')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generic function to run any model specified" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "def perform_model(model, X_train, y_train, X_test, y_test, class_labels, cm_normalize=True, \\\n", + " print_cm=True, cm_cmap=plt.cm.Greens):\n", + " \n", + " \n", + " # to store results at various phases\n", + " results = dict()\n", + " \n", + " # time at which model starts training \n", + " train_start_time = datetime.now()\n", + " print('training the model..')\n", + " model.fit(X_train, y_train)\n", + " print('Done \\n \\n')\n", + " train_end_time = datetime.now()\n", + " results['training_time'] = train_end_time - train_start_time\n", + " print('training_time(HH:MM:SS.ms) - {}\\n\\n'.format(results['training_time']))\n", + " \n", + " \n", + " # predict test data\n", + " print('Predicting test data')\n", + " test_start_time = datetime.now()\n", + " y_pred = model.predict(X_test)\n", + " test_end_time = datetime.now()\n", + " print('Done \\n \\n')\n", + " results['testing_time'] = test_end_time - test_start_time\n", + " print('testing time(HH:MM:SS:ms) - {}\\n\\n'.format(results['testing_time']))\n", + " results['predicted'] = y_pred\n", + " \n", + "\n", + " # calculate overall accuracty of the model\n", + " accuracy = metrics.accuracy_score(y_true=y_test, y_pred=y_pred)\n", + " # store accuracy in results\n", + " results['accuracy'] = accuracy\n", + " print('---------------------')\n", + " print('| Accuracy |')\n", + " print('---------------------')\n", + " print('\\n {}\\n\\n'.format(accuracy))\n", + " \n", + " \n", + " # confusion matrix\n", + " cm = metrics.confusion_matrix(y_test, y_pred)\n", + " results['confusion_matrix'] = cm\n", + " if print_cm: \n", + " print('--------------------')\n", + " print('| Confusion Matrix |')\n", + " print('--------------------')\n", + " print('\\n {}'.format(cm))\n", + " \n", + " # plot confusin matrix\n", + " plt.figure(figsize=(8,8))\n", + " plt.grid(b=False)\n", + " plot_confusion_matrix(cm, classes=class_labels, normalize=True, title='Normalized confusion matrix', cmap = cm_cmap)\n", + " plt.show()\n", + " \n", + " # get classification report\n", + " print('-------------------------')\n", + " print('| Classifiction Report |')\n", + " print('-------------------------')\n", + " classification_report = metrics.classification_report(y_test, y_pred)\n", + " # store report in results\n", + " results['classification_report'] = classification_report\n", + " print(classification_report)\n", + " \n", + " # add the trained model to the results\n", + " results['model'] = model\n", + " \n", + " return results\n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Method to print the gridsearch Attributes" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def print_grid_search_attributes(model):\n", + " # Estimator that gave highest score among all the estimators formed in GridSearch\n", + " print('--------------------------')\n", + " print('| Best Estimator |')\n", + " print('--------------------------')\n", + " print('\\n\\t{}\\n'.format(model.best_estimator_))\n", + "\n", + "\n", + " # parameters that gave best results while performing grid search\n", + " print('--------------------------')\n", + " print('| Best parameters |')\n", + " print('--------------------------')\n", + " print('\\tParameters of best estimator : \\n\\n\\t{}\\n'.format(model.best_params_))\n", + "\n", + "\n", + " # number of cross validation splits\n", + " print('---------------------------------')\n", + " print('| No of CrossValidation sets |')\n", + " print('--------------------------------')\n", + " print('\\n\\tTotal numbre of cross validation sets: {}\\n'.format(model.n_splits_))\n", + "\n", + "\n", + " # Average cross validated score of the best estimator, from the Grid Search \n", + " print('--------------------------')\n", + " print('| Best Score |')\n", + " print('--------------------------')\n", + " print('\\n\\tAverage Cross Validate scores of best estimator : \\n\\n\\t{}\\n'.format(model.best_score_))\n", + "\n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Logistic Regression with Grid Search" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn import linear_model\n", + "from sklearn import metrics\n", + "\n", + "from sklearn.model_selection import GridSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Fitting 3 folds for each of 12 candidates, totalling 36 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Done 36 out of 36 | elapsed: 1.2min finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:01:25.843810\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:00.009192\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.9626739056667798\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 1 428 58 0 0 4]\n", + " [ 0 12 519 1 0 0]\n", + " [ 0 0 0 495 1 0]\n", + " [ 0 0 0 3 409 8]\n", + " [ 0 0 0 22 0 449]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.97 0.87 0.92 491\n", + " STANDING 0.90 0.98 0.94 532\n", + " WALKING 0.95 1.00 0.97 496\n", + "WALKING_DOWNSTAIRS 1.00 0.97 0.99 420\n", + " WALKING_UPSTAIRS 0.97 0.95 0.96 471\n", + "\n", + " avg / total 0.96 0.96 0.96 2947\n", + "\n" + ] + } + ], + "source": [ + "\n", + "# start Grid search\n", + "parameters = {'C':[0.01, 0.1, 1, 10, 20, 30], 'penalty':['l2','l1']}\n", + "log_reg = linear_model.LogisticRegression()\n", + "log_reg_grid = GridSearchCV(log_reg, param_grid=parameters, cv=3, verbose=1, n_jobs=-1)\n", + "log_reg_grid_results = perform_model(log_reg_grid, X_train, y_train, X_test, y_test, class_labels=labels)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(8,8))\n", + "plt.grid(b=False)\n", + "plot_confusion_matrix(log_reg_grid_results['confusion_matrix'], classes=labels, cmap=plt.cm.Greens, )\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tLogisticRegression(C=30, class_weight=None, dual=False, fit_intercept=True,\n", + " intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,\n", + " penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n", + " verbose=0, warm_start=False)\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'C': 30, 'penalty': 'l2'}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.9461371055495104\n", + "\n" + ] + } + ], + "source": [ + "# observe the attributes of the model \n", + "print_grid_search_attributes(log_reg_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# 2. Linear SVC with GridSearch" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.svm import LinearSVC" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Fitting 3 folds for each of 6 candidates, totalling 18 fits\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=-1)]: Done 18 out of 18 | elapsed: 24.9s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:00:32.951942\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:00.012182\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.9660671869697998\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 2 426 58 0 0 5]\n", + " [ 0 14 518 0 0 0]\n", + " [ 0 0 0 495 0 1]\n", + " [ 0 0 0 2 413 5]\n", + " [ 0 0 0 12 1 458]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.97 0.87 0.92 491\n", + " STANDING 0.90 0.97 0.94 532\n", + " WALKING 0.97 1.00 0.99 496\n", + "WALKING_DOWNSTAIRS 1.00 0.98 0.99 420\n", + " WALKING_UPSTAIRS 0.98 0.97 0.97 471\n", + "\n", + " avg / total 0.97 0.97 0.97 2947\n", + "\n" + ] + } + ], + "source": [ + "parameters = {'C':[0.125, 0.5, 1, 2, 8, 16]}\n", + "lr_svc = LinearSVC(tol=0.00005)\n", + "lr_svc_grid = GridSearchCV(lr_svc, param_grid=parameters, n_jobs=-1, verbose=1)\n", + "lr_svc_grid_results = perform_model(lr_svc_grid, X_train, y_train, X_test, y_test, class_labels=labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tLinearSVC(C=8, class_weight=None, dual=True, fit_intercept=True,\n", + " intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n", + " multi_class='ovr', penalty='l2', random_state=None, tol=5e-05,\n", + " verbose=0)\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'C': 8}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.9465451577801959\n", + "\n" + ] + } + ], + "source": [ + "print_grid_search_attributes(lr_svc_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Kernel SVM with GridSearch" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:05:46.182889\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:05.221285\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.9626739056667798\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 0 441 48 0 0 2]\n", + " [ 0 12 520 0 0 0]\n", + " [ 0 0 0 489 2 5]\n", + " [ 0 0 0 4 397 19]\n", + " [ 0 0 0 17 1 453]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.97 0.90 0.93 491\n", + " STANDING 0.92 0.98 0.95 532\n", + " WALKING 0.96 0.99 0.97 496\n", + "WALKING_DOWNSTAIRS 0.99 0.95 0.97 420\n", + " WALKING_UPSTAIRS 0.95 0.96 0.95 471\n", + "\n", + " avg / total 0.96 0.96 0.96 2947\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "parameters = {'C':[2,8,16],\\\n", + " 'gamma': [ 0.0078125, 0.125, 2]}\n", + "rbf_svm = SVC(kernel='rbf')\n", + "rbf_svm_grid = GridSearchCV(rbf_svm,param_grid=parameters, n_jobs=-1)\n", + "rbf_svm_grid_results = perform_model(rbf_svm_grid, X_train, y_train, X_test, y_test, class_labels=labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tSVC(C=16, cache_size=200, class_weight=None, coef0=0.0,\n", + " decision_function_shape='ovr', degree=3, gamma=0.0078125, kernel='rbf',\n", + " max_iter=-1, probability=False, random_state=None, shrinking=True,\n", + " tol=0.001, verbose=False)\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'C': 16, 'gamma': 0.0078125}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.9440968443960827\n", + "\n" + ] + } + ], + "source": [ + "print_grid_search_attributes(rbf_svm_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# 4. Decision Trees with GridSearchCV" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:00:19.476858\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:00.012858\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.8642687478791992\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 0 386 105 0 0 0]\n", + " [ 0 93 439 0 0 0]\n", + " [ 0 0 0 472 16 8]\n", + " [ 0 0 0 15 344 61]\n", + " [ 0 0 0 73 29 369]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.81 0.79 0.80 491\n", + " STANDING 0.81 0.83 0.82 532\n", + " WALKING 0.84 0.95 0.89 496\n", + "WALKING_DOWNSTAIRS 0.88 0.82 0.85 420\n", + " WALKING_UPSTAIRS 0.84 0.78 0.81 471\n", + "\n", + " avg / total 0.86 0.86 0.86 2947\n", + "\n", + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tDecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=7,\n", + " max_features=None, max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n", + " splitter='best')\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'max_depth': 7}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.8369151251360174\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "parameters = {'max_depth':np.arange(3,10,2)}\n", + "dt = DecisionTreeClassifier()\n", + "dt_grid = GridSearchCV(dt,param_grid=parameters, n_jobs=-1)\n", + "dt_grid_results = perform_model(dt_grid, X_train, y_train, X_test, y_test, class_labels=labels)\n", + "print_grid_search_attributes(dt_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Random Forest Classifier with GridSearch" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:06:22.775270\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:00.025937\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.9131319986426875\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 0 427 64 0 0 0]\n", + " [ 0 52 480 0 0 0]\n", + " [ 0 0 0 484 10 2]\n", + " [ 0 0 0 38 332 50]\n", + " [ 0 0 0 34 6 431]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.89 0.87 0.88 491\n", + " STANDING 0.88 0.90 0.89 532\n", + " WALKING 0.87 0.98 0.92 496\n", + "WALKING_DOWNSTAIRS 0.95 0.79 0.86 420\n", + " WALKING_UPSTAIRS 0.89 0.92 0.90 471\n", + "\n", + " avg / total 0.92 0.91 0.91 2947\n", + "\n", + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tRandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", + " max_depth=7, max_features='auto', max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=70, n_jobs=1,\n", + " oob_score=False, random_state=None, verbose=0,\n", + " warm_start=False)\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'max_depth': 7, 'n_estimators': 70}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.9141730141458106\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "params = {'n_estimators': np.arange(10,201,20), 'max_depth':np.arange(3,15,2)}\n", + "rfc = RandomForestClassifier()\n", + "rfc_grid = GridSearchCV(rfc, param_grid=params, n_jobs=-1)\n", + "rfc_grid_results = perform_model(rfc_grid, X_train, y_train, X_test, y_test, class_labels=labels)\n", + "print_grid_search_attributes(rfc_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6. Gradient Boosted Decision Trees With GridSearch" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "training the model..\n", + "Done \n", + " \n", + "\n", + "training_time(HH:MM:SS.ms) - 0:28:03.653432\n", + "\n", + "\n", + "Predicting test data\n", + "Done \n", + " \n", + "\n", + "testing time(HH:MM:SS:ms) - 0:00:00.058843\n", + "\n", + "\n", + "---------------------\n", + "| Accuracy |\n", + "---------------------\n", + "\n", + " 0.9222938581608415\n", + "\n", + "\n", + "--------------------\n", + "| Confusion Matrix |\n", + "--------------------\n", + "\n", + " [[537 0 0 0 0 0]\n", + " [ 0 396 93 0 0 2]\n", + " [ 0 37 495 0 0 0]\n", + " [ 0 0 0 483 7 6]\n", + " [ 0 0 0 10 374 36]\n", + " [ 0 1 0 31 6 433]]\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "<Figure size 576x576 with 2 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-------------------------\n", + "| Classifiction Report |\n", + "-------------------------\n", + " precision recall f1-score support\n", + "\n", + " LAYING 1.00 1.00 1.00 537\n", + " SITTING 0.91 0.81 0.86 491\n", + " STANDING 0.84 0.93 0.88 532\n", + " WALKING 0.92 0.97 0.95 496\n", + "WALKING_DOWNSTAIRS 0.97 0.89 0.93 420\n", + " WALKING_UPSTAIRS 0.91 0.92 0.91 471\n", + "\n", + " avg / total 0.92 0.92 0.92 2947\n", + "\n", + "--------------------------\n", + "| Best Estimator |\n", + "--------------------------\n", + "\n", + "\tGradientBoostingClassifier(criterion='friedman_mse', init=None,\n", + " learning_rate=0.1, loss='deviance', max_depth=5,\n", + " max_features=None, max_leaf_nodes=None,\n", + " min_impurity_decrease=0.0, min_impurity_split=None,\n", + " min_samples_leaf=1, min_samples_split=2,\n", + " min_weight_fraction_leaf=0.0, n_estimators=140,\n", + " presort='auto', random_state=None, subsample=1.0, verbose=0,\n", + " warm_start=False)\n", + "\n", + "--------------------------\n", + "| Best parameters |\n", + "--------------------------\n", + "\tParameters of best estimator : \n", + "\n", + "\t{'max_depth': 5, 'n_estimators': 140}\n", + "\n", + "---------------------------------\n", + "| No of CrossValidation sets |\n", + "--------------------------------\n", + "\n", + "\tTotal numbre of cross validation sets: 3\n", + "\n", + "--------------------------\n", + "| Best Score |\n", + "--------------------------\n", + "\n", + "\tAverage Cross Validate scores of best estimator : \n", + "\n", + "\t0.904379760609358\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import GradientBoostingClassifier\n", + "param_grid = {'max_depth': np.arange(5,8,1), \\\n", + " 'n_estimators':np.arange(130,170,10)}\n", + "gbdt = GradientBoostingClassifier()\n", + "gbdt_grid = GridSearchCV(gbdt, param_grid=param_grid, n_jobs=-1)\n", + "gbdt_grid_results = perform_model(gbdt_grid, X_train, y_train, X_test, y_test, class_labels=labels)\n", + "print_grid_search_attributes(gbdt_grid_results['model'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# 7. Comparing all models" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + " Accuracy Error\n", + " ---------- --------\n", + "Logistic Regression : 96.27% 3.733%\n", + "Linear SVC : 96.61% 3.393% \n", + "rbf SVM classifier : 96.27% 3.733% \n", + "DecisionTree : 86.43% 13.57% \n", + "Random Forest : 91.31% 8.687% \n", + "GradientBoosting DT : 91.31% 8.687% \n" + ] + } + ], + "source": [ + "print('\\n Accuracy Error')\n", + "print(' ---------- --------')\n", + "print('Logistic Regression : {:.04}% {:.04}%'.format(log_reg_grid_results['accuracy'] * 100,\\\n", + " 100-(log_reg_grid_results['accuracy'] * 100)))\n", + "\n", + "print('Linear SVC : {:.04}% {:.04}% '.format(lr_svc_grid_results['accuracy'] * 100,\\\n", + " 100-(lr_svc_grid_results['accuracy'] * 100)))\n", + "\n", + "print('rbf SVM classifier : {:.04}% {:.04}% '.format(rbf_svm_grid_results['accuracy'] * 100,\\\n", + " 100-(rbf_svm_grid_results['accuracy'] * 100)))\n", + "\n", + "print('DecisionTree : {:.04}% {:.04}% '.format(dt_grid_results['accuracy'] * 100,\\\n", + " 100-(dt_grid_results['accuracy'] * 100)))\n", + "\n", + "print('Random Forest : {:.04}% {:.04}% '.format(rfc_grid_results['accuracy'] * 100,\\\n", + " 100-(rfc_grid_results['accuracy'] * 100)))\n", + "print('GradientBoosting DT : {:.04}% {:.04}% '.format(rfc_grid_results['accuracy'] * 100,\\\n", + " 100-(rfc_grid_results['accuracy'] * 100)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> We can choose ___Logistic regression___ or ___Linear SVC___ or ___rbf SVM___." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conclusion :" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the real world, domain-knowledge, EDA and feature-engineering matter most." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}