[d8f241]: / Acceleration Features, Train = 10 users Test = 5 users, Random Analysis.ipynb

Download this file

638 lines (637 with data), 13.9 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train 1(14=0.01) test 14 (.99)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.18 s, sys: 577 ms, total: 1.76 s\n",
      "Wall time: 9.18 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.ensemble import ExtraTreesClassifier\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 25s, sys: 13.7 s, total: 1min 38s\n",
      "Wall time: 2min 26s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "df = pd.read_csv(\"master_data.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['target', 'subject', 'chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z',\n",
       "       'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp', 'chest_Resp'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 533 ms, sys: 124 ms, total: 657 ms\n",
      "Wall time: 698 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "df=df[['chest_ACC_x','chest_ACC_y','chest_ACC_z','target','subject']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 510 ms, sys: 43 µs, total: 510 ms\n",
      "Wall time: 508 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "df['subject'].unique()\n",
    "list_of_subjects=list(df['subject'].unique())\n",
    "list_of_subjects.sort()\n",
    "list_of_subjects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 107 µs, sys: 18 µs, total: 125 µs\n",
      "Wall time: 129 µs\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "features=df.columns.tolist()\n",
    "to_remove = [fea for fea in features if \"target\"  in fea or \"subject\"  in fea]\n",
    "feature = [x for x in features if x not in to_remove]\n",
    "feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n",
      "subject_2\n",
      "3\n",
      "subject_3\n",
      "4\n",
      "subject_4\n",
      "5\n",
      "subject_5\n",
      "6\n",
      "subject_6\n",
      "7\n",
      "subject_7\n",
      "8\n",
      "subject_8\n",
      "9\n",
      "subject_9\n",
      "10\n",
      "subject_10\n",
      "11\n",
      "subject_11\n",
      "13\n",
      "subject_13\n",
      "14\n",
      "subject_14\n",
      "15\n",
      "subject_15\n",
      "16\n",
      "subject_16\n",
      "17\n",
      "subject_17\n",
      "CPU times: user 16.3 s, sys: 2.31 s, total: 18.6 s\n",
      "Wall time: 18.7 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for i in list_of_subjects:\n",
    "    print(i)\n",
    "    globals()['subject_%s' % i]=df[df['subject'] == i]\n",
    "    globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=0.3)\n",
    "    print('subject_'+str(i))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4165000, 5)\n",
      "(2915500, 5)\n",
      "(1249500, 5)\n"
     ]
    }
   ],
   "source": [
    "print(subject_2.shape)\n",
    "print(subject_2_train.shape)\n",
    "print(subject_2_test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 614 ms, sys: 1.47 s, total: 2.08 s\n",
      "Wall time: 2.08 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "train=pd.concat([subject_2,subject_3,subject_4,subject_5,subject_6,subject_7,subject_8,subject_9,subject_10,subject_11])\n",
    "test=pd.concat([subject_13,subject_14,subject_15,subject_16,subject_17])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(40143599, 5)\n",
      "(18981901, 5)\n"
     ]
    }
   ],
   "source": [
    "print(train.shape)\n",
    "print(test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index          321148792\n",
      "chest_ACC_x    321148792\n",
      "chest_ACC_y    321148792\n",
      "chest_ACC_z    321148792\n",
      "target         321148792\n",
      "subject        321148792\n",
      "dtype: int64\n",
      "Index          151855208\n",
      "chest_ACC_x    151855208\n",
      "chest_ACC_y    151855208\n",
      "chest_ACC_z    151855208\n",
      "target         151855208\n",
      "subject        151855208\n",
      "dtype: int64\n"
     ]
    }
   ],
   "source": [
    "print(train.memory_usage(index=True, deep=False))\n",
    "print(test.memory_usage(index=True, deep=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 40143599 entries, 26710599 to 8382499\n",
      "Data columns (total 5 columns):\n",
      "chest_ACC_x    float64\n",
      "chest_ACC_y    float64\n",
      "chest_ACC_z    float64\n",
      "target         int64\n",
      "subject        int64\n",
      "dtypes: float64(3), int64(2)\n",
      "memory usage: 1.8 GB\n",
      "None\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 18981901 entries, 39094999 to 51311399\n",
      "Data columns (total 5 columns):\n",
      "chest_ACC_x    float64\n",
      "chest_ACC_y    float64\n",
      "chest_ACC_z    float64\n",
      "target         int64\n",
      "subject        int64\n",
      "dtypes: float64(3), int64(2)\n",
      "memory usage: 868.9 MB\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print(train.info(memory_usage='deep'))\n",
    "print(test.info(memory_usage='deep'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "features=['chest_ACC_x','chest_ACC_y','chest_ACC_z']\n",
    "target=['target']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']\n",
      "CPU times: user 98 µs, sys: 17 µs, total: 115 µs\n",
      "Wall time: 120 µs\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "features=feature\n",
    "print(features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['target']\n",
      "CPU times: user 99 µs, sys: 0 ns, total: 99 µs\n",
      "Wall time: 103 µs\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "target=['target']\n",
    "print(target)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2\n",
      "3\n",
      "4\n",
      "5\n",
      "6\n",
      "7\n",
      "8\n",
      "9\n",
      "10\n",
      "11\n",
      "13\n",
      "14\n",
      "15\n",
      "16\n",
      "17\n",
      "CPU times: user 870 µs, sys: 127 ms, total: 127 ms\n",
      "Wall time: 122 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for i in list_of_subjects[0:]:\n",
    "    print(i)\n",
    "    del(globals()['subject_%s' % i])\n",
    "    del(globals()['subject_%s_train' % i])\n",
    "    del(globals()['subject_%s_test' % i])\n",
    "del df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ExtraTreesClassifier\t classification_report\t feature\t features\t i\t list_of_subjects\t np\t pd\t target\t \n",
      "test\t to_remove\t train\t train_test_split\t \n"
     ]
    }
   ],
   "source": [
    "who"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_of_subjects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/sf/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n",
      "  \n",
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "building tree 1 of 50\n",
      "building tree 2 of 50\n",
      "building tree 3 of 50building tree 4 of 50\n",
      "building tree 5 of 50\n",
      "building tree 6 of 50\n",
      "\n",
      "building tree 7 of 50\n",
      "building tree 8 of 50\n",
      "building tree 9 of 50\n",
      "building tree 10 of 50\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "et = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=2)\n",
    "et.fit(train[features],train[target])\n",
    "y_pred=et.predict(test[features])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.86      0.87      0.87   8295722\n",
      "           1       0.85      0.83      0.84   3698748\n",
      "           2       0.74      0.73      0.74   2092335\n",
      "           3       0.82      0.80      0.81   1170648\n",
      "           4       0.88      0.88      0.88   2480199\n",
      "\n",
      "    accuracy                           0.84  17737652\n",
      "   macro avg       0.83      0.82      0.83  17737652\n",
      "weighted avg       0.84      0.84      0.84  17737652\n",
      "\n",
      "CPU times: user 34.9 s, sys: 6.72 s, total: 41.6 s\n",
      "Wall time: 41.6 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "print(classification_report(test[target],y_pred ))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "del train\n",
    "del test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "11\n",
      "14\n",
      "8\n",
      "15\n",
      "9\n",
      "10\n",
      "16\n",
      "4\n",
      "13\n",
      "3\n",
      "17\n",
      "5\n",
      "7\n",
      "CPU times: user 19.1 ms, sys: 306 ms, total: 325 ms\n",
      "Wall time: 318 ms\n"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}