--- a +++ b/Acceleration Features, Train = 10 users Test = 5 users, Random Analysis.ipynb @@ -0,0 +1,637 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# train 1(14=0.01) test 14 (.99)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.18 s, sys: 577 ms, total: 1.76 s\n", + "Wall time: 9.18 s\n" + ] + } + ], + "source": [ + "%%time\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.ensemble import ExtraTreesClassifier\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1min 25s, sys: 13.7 s, total: 1min 38s\n", + "Wall time: 2min 26s\n" + ] + } + ], + "source": [ + "%%time\n", + "df = pd.read_csv(\"master_data.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['target', 'subject', 'chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z',\n", + " 'chest_ECG', 'chest_EMG', 'chest_EDA', 'chest_Temp', 'chest_Resp'],\n", + " dtype='object')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 533 ms, sys: 124 ms, total: 657 ms\n", + "Wall time: 698 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "df=df[['chest_ACC_x','chest_ACC_y','chest_ACC_z','target','subject']]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 510 ms, sys: 43 µs, total: 510 ms\n", + "Wall time: 508 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "df['subject'].unique()\n", + "list_of_subjects=list(df['subject'].unique())\n", + "list_of_subjects.sort()\n", + "list_of_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 107 µs, sys: 18 µs, total: 125 µs\n", + "Wall time: 129 µs\n" + ] + }, + { + "data": { + "text/plain": [ + "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "features=df.columns.tolist()\n", + "to_remove = [fea for fea in features if \"target\" in fea or \"subject\" in fea]\n", + "feature = [x for x in features if x not in to_remove]\n", + "feature" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "subject_2\n", + "3\n", + "subject_3\n", + "4\n", + "subject_4\n", + "5\n", + "subject_5\n", + "6\n", + "subject_6\n", + "7\n", + "subject_7\n", + "8\n", + "subject_8\n", + "9\n", + "subject_9\n", + "10\n", + "subject_10\n", + "11\n", + "subject_11\n", + "13\n", + "subject_13\n", + "14\n", + "subject_14\n", + "15\n", + "subject_15\n", + "16\n", + "subject_16\n", + "17\n", + "subject_17\n", + "CPU times: user 16.3 s, sys: 2.31 s, total: 18.6 s\n", + "Wall time: 18.7 s\n" + ] + } + ], + "source": [ + "%%time\n", + "for i in list_of_subjects:\n", + " print(i)\n", + " globals()['subject_%s' % i]=df[df['subject'] == i]\n", + " globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=0.3)\n", + " print('subject_'+str(i))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4165000, 5)\n", + "(2915500, 5)\n", + "(1249500, 5)\n" + ] + } + ], + "source": [ + "print(subject_2.shape)\n", + "print(subject_2_train.shape)\n", + "print(subject_2_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 614 ms, sys: 1.47 s, total: 2.08 s\n", + "Wall time: 2.08 s\n" + ] + } + ], + "source": [ + "%%time\n", + "train=pd.concat([subject_2,subject_3,subject_4,subject_5,subject_6,subject_7,subject_8,subject_9,subject_10,subject_11])\n", + "test=pd.concat([subject_13,subject_14,subject_15,subject_16,subject_17])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(40143599, 5)\n", + "(18981901, 5)\n" + ] + } + ], + "source": [ + "print(train.shape)\n", + "print(test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index 321148792\n", + "chest_ACC_x 321148792\n", + "chest_ACC_y 321148792\n", + "chest_ACC_z 321148792\n", + "target 321148792\n", + "subject 321148792\n", + "dtype: int64\n", + "Index 151855208\n", + "chest_ACC_x 151855208\n", + "chest_ACC_y 151855208\n", + "chest_ACC_z 151855208\n", + "target 151855208\n", + "subject 151855208\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "print(train.memory_usage(index=True, deep=False))\n", + "print(test.memory_usage(index=True, deep=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "<class 'pandas.core.frame.DataFrame'>\n", + "Int64Index: 40143599 entries, 26710599 to 8382499\n", + "Data columns (total 5 columns):\n", + "chest_ACC_x float64\n", + "chest_ACC_y float64\n", + "chest_ACC_z float64\n", + "target int64\n", + "subject int64\n", + "dtypes: float64(3), int64(2)\n", + "memory usage: 1.8 GB\n", + "None\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "Int64Index: 18981901 entries, 39094999 to 51311399\n", + "Data columns (total 5 columns):\n", + "chest_ACC_x float64\n", + "chest_ACC_y float64\n", + "chest_ACC_z float64\n", + "target int64\n", + "subject int64\n", + "dtypes: float64(3), int64(2)\n", + "memory usage: 868.9 MB\n", + "None\n" + ] + } + ], + "source": [ + "print(train.info(memory_usage='deep'))\n", + "print(test.info(memory_usage='deep'))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "features=['chest_ACC_x','chest_ACC_y','chest_ACC_z']\n", + "target=['target']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['chest_ACC_x', 'chest_ACC_y', 'chest_ACC_z']\n", + "CPU times: user 98 µs, sys: 17 µs, total: 115 µs\n", + "Wall time: 120 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "features=feature\n", + "print(features)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['target']\n", + "CPU times: user 99 µs, sys: 0 ns, total: 99 µs\n", + "Wall time: 103 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "target=['target']\n", + "print(target)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "3\n", + "4\n", + "5\n", + "6\n", + "7\n", + "8\n", + "9\n", + "10\n", + "11\n", + "13\n", + "14\n", + "15\n", + "16\n", + "17\n", + "CPU times: user 870 µs, sys: 127 ms, total: 127 ms\n", + "Wall time: 122 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "for i in list_of_subjects[0:]:\n", + " print(i)\n", + " del(globals()['subject_%s' % i])\n", + " del(globals()['subject_%s_train' % i])\n", + " del(globals()['subject_%s_test' % i])\n", + "del df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ExtraTreesClassifier\t classification_report\t feature\t features\t i\t list_of_subjects\t np\t pd\t target\t \n", + "test\t to_remove\t train\t train_test_split\t \n" + ] + } + ], + "source": [ + "who" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list_of_subjects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/sf/.local/lib/python3.6/site-packages/ipykernel_launcher.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " \n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "building tree 1 of 50\n", + "building tree 2 of 50\n", + "building tree 3 of 50building tree 4 of 50\n", + "building tree 5 of 50\n", + "building tree 6 of 50\n", + "\n", + "building tree 7 of 50\n", + "building tree 8 of 50\n", + "building tree 9 of 50\n", + "building tree 10 of 50\n" + ] + } + ], + "source": [ + "%%time\n", + "et = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=2)\n", + "et.fit(train[features],train[target])\n", + "y_pred=et.predict(test[features])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.86 0.87 0.87 8295722\n", + " 1 0.85 0.83 0.84 3698748\n", + " 2 0.74 0.73 0.74 2092335\n", + " 3 0.82 0.80 0.81 1170648\n", + " 4 0.88 0.88 0.88 2480199\n", + "\n", + " accuracy 0.84 17737652\n", + " macro avg 0.83 0.82 0.83 17737652\n", + "weighted avg 0.84 0.84 0.84 17737652\n", + "\n", + "CPU times: user 34.9 s, sys: 6.72 s, total: 41.6 s\n", + "Wall time: 41.6 s\n" + ] + } + ], + "source": [ + "%%time\n", + "print(classification_report(test[target],y_pred ))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del train\n", + "del test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11\n", + "14\n", + "8\n", + "15\n", + "9\n", + "10\n", + "16\n", + "4\n", + "13\n", + "3\n", + "17\n", + "5\n", + "7\n", + "CPU times: user 19.1 ms, sys: 306 ms, total: 325 ms\n", + "Wall time: 318 ms\n" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}