1312 lines (1311 with data), 42.6 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 0 ns, sys: 37 µs, total: 37 µs\n",
"Wall time: 41 µs\n"
]
}
],
"source": [
"%%time\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.ensemble import ExtraTreesClassifier\n",
"from sklearn.metrics import classification_report\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 1min 1s, sys: 4.38 s, total: 1min 5s\n",
"Wall time: 1min 18s\n"
]
}
],
"source": [
"%%time\n",
"df = pd.read_csv(\"master_data.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(31470603, 10)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['subject'].unique()\n",
"list_of_subjects=list(df['subject'].unique())\n",
"list_of_subjects.sort()\n",
"list_of_subjects"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['chest_ACC_x',\n",
" 'chest_ACC_y',\n",
" 'chest_ACC_z',\n",
" 'chest_ECG',\n",
" 'chest_EMG',\n",
" 'chest_EDA',\n",
" 'chest_Temp',\n",
" 'chest_Resp']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"features=df.columns.tolist()\n",
"to_remove = [fea for fea in features if \"target\" in fea or \"subject\" in fea]\n",
"feature = [x for x in features if x not in to_remove]\n",
"feature"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 2, 4, 3])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['target'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"6\n",
"11\n",
"14\n",
"8\n",
"15\n",
"9\n",
"10\n",
"2\n",
"16\n",
"4\n",
"13\n",
"3\n",
"17\n",
"5\n",
"7\n",
"CPU times: user 2.25 s, sys: 663 ms, total: 2.92 s\n",
"Wall time: 3.07 s\n"
]
}
],
"source": [
"%%time\n",
"test_subject=list(df['subject'].unique())\n",
"for i in test_subject:\n",
" print(i)\n",
" globals()['subject_%s' % i]=df[df['subject'] == i]\n",
"# globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=test_shape)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"subject_list=[subject_2,subject_3,subject_4,subject_5,subject_6,subject_7,subject_8,subject_9,subject_10,subject_11,subject_13,subject_14,subject_15,subject_16,subject_17]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"x=[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"\n",
"for i in range(len(x)):\n",
" \n",
" globals()['df_1_%s' % x[i]]=subject_list[i][subject_list[i]['target']==1]\n",
" globals()['df_2_%s' % x[i]]=subject_list[i][subject_list[i]['target']==2]\n",
" globals()['df_3_%s' % x[i]]=subject_list[i][subject_list[i]['target']==3]\n",
" globals()['df_4_%s' % x[i]]=subject_list[i][subject_list[i]['target']==4]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ExtraTreesClassifier\t LogisticRegression\t RandomForestClassifier\t classification_report\t df\t df_1_10\t df_1_11\t df_1_13\t df_1_14\t \n",
"df_1_15\t df_1_16\t df_1_17\t df_1_2\t df_1_3\t df_1_4\t df_1_5\t df_1_6\t df_1_7\t \n",
"df_1_8\t df_1_9\t df_2_10\t df_2_11\t df_2_13\t df_2_14\t df_2_15\t df_2_16\t df_2_17\t \n",
"df_2_2\t df_2_3\t df_2_4\t df_2_5\t df_2_6\t df_2_7\t df_2_8\t df_2_9\t df_3_10\t \n",
"df_3_11\t df_3_13\t df_3_14\t df_3_15\t df_3_16\t df_3_17\t df_3_2\t df_3_3\t df_3_4\t \n",
"df_3_5\t df_3_6\t df_3_7\t df_3_8\t df_3_9\t df_4_10\t df_4_11\t df_4_13\t df_4_14\t \n",
"df_4_15\t df_4_16\t df_4_17\t df_4_2\t df_4_3\t df_4_4\t df_4_5\t df_4_6\t df_4_7\t \n",
"df_4_8\t df_4_9\t feature\t features\t i\t list_of_subjects\t np\t pd\t subject_10\t \n",
"subject_11\t subject_13\t subject_14\t subject_15\t subject_16\t subject_17\t subject_2\t subject_3\t subject_4\t \n",
"subject_5\t subject_6\t subject_7\t subject_8\t subject_9\t subject_list\t test_subject\t to_remove\t train_test_split\t \n",
"x\t \n"
]
}
],
"source": [
"who"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"x=[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17]\n",
"cls=[1,2,3,4]"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"84000"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"no_of_rows=int(700*120)\n",
"no_of_rows"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"for i in cls:\n",
" for j in x:\n",
" globals()['df_{}_train_{}'.format(i,j)] = globals()['df_{}_{}'.format(i,j)].iloc[:no_of_rows]\n",
" #globals()['df_{}_train_{}'.format(i,j)],globals()['df_{}_test_{}'.format(i,j)]=train_test_split(globals()['df_{}_{}'.format(i,j)], test_size=0.3)\n",
" #print('subject_'+str(i))\n",
" globals()['df_{}_test_{}'.format(i,j)] = globals()['df_{}_{}'.format(i,j)].iloc[no_of_rows:] "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"concat_list=[]\n",
"for i in cls:\n",
" for j in x:\n",
" concat_list.append(globals()['df_{}_train_{}'.format(i,j)])\n",
"#concat_list[0]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target</th>\n",
" <th>subject</th>\n",
" <th>chest_ACC_x</th>\n",
" <th>chest_ACC_y</th>\n",
" <th>chest_ACC_z</th>\n",
" <th>chest_ECG</th>\n",
" <th>chest_EMG</th>\n",
" <th>chest_EDA</th>\n",
" <th>chest_Temp</th>\n",
" <th>chest_Resp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>14786800</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8914</td>\n",
" <td>-0.1102</td>\n",
" <td>-0.2576</td>\n",
" <td>0.030945</td>\n",
" <td>-0.003708</td>\n",
" <td>5.710983</td>\n",
" <td>29.083618</td>\n",
" <td>1.191711</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14786801</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8926</td>\n",
" <td>-0.1086</td>\n",
" <td>-0.2544</td>\n",
" <td>0.033646</td>\n",
" <td>-0.014145</td>\n",
" <td>5.719376</td>\n",
" <td>29.122437</td>\n",
" <td>1.139832</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14786802</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8930</td>\n",
" <td>-0.1094</td>\n",
" <td>-0.2580</td>\n",
" <td>0.033005</td>\n",
" <td>0.010208</td>\n",
" <td>5.706406</td>\n",
" <td>29.115234</td>\n",
" <td>1.141357</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14786803</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8934</td>\n",
" <td>-0.1082</td>\n",
" <td>-0.2538</td>\n",
" <td>0.031815</td>\n",
" <td>0.012634</td>\n",
" <td>5.712509</td>\n",
" <td>29.126709</td>\n",
" <td>1.155090</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14786804</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.8930</td>\n",
" <td>-0.1096</td>\n",
" <td>-0.2570</td>\n",
" <td>0.030350</td>\n",
" <td>0.002060</td>\n",
" <td>5.727005</td>\n",
" <td>29.100861</td>\n",
" <td>1.133728</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16809094</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.4378</td>\n",
" <td>-0.2348</td>\n",
" <td>-0.8380</td>\n",
" <td>-0.182602</td>\n",
" <td>-0.015793</td>\n",
" <td>0.484085</td>\n",
" <td>31.926239</td>\n",
" <td>-1.609802</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16809095</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.4378</td>\n",
" <td>-0.2338</td>\n",
" <td>-0.8394</td>\n",
" <td>-0.170609</td>\n",
" <td>0.000687</td>\n",
" <td>0.473404</td>\n",
" <td>31.932190</td>\n",
" <td>-1.646423</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16809096</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.4388</td>\n",
" <td>-0.2338</td>\n",
" <td>-0.8386</td>\n",
" <td>-0.160812</td>\n",
" <td>0.004532</td>\n",
" <td>0.463486</td>\n",
" <td>31.918823</td>\n",
" <td>-1.643372</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16809097</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.4398</td>\n",
" <td>-0.2374</td>\n",
" <td>-0.8390</td>\n",
" <td>-0.156326</td>\n",
" <td>0.000595</td>\n",
" <td>0.459290</td>\n",
" <td>31.932190</td>\n",
" <td>-1.661682</td>\n",
" </tr>\n",
" <tr>\n",
" <td>16809098</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>0.4386</td>\n",
" <td>-0.2366</td>\n",
" <td>-0.8408</td>\n",
" <td>-0.154312</td>\n",
" <td>-0.009201</td>\n",
" <td>0.455475</td>\n",
" <td>31.927704</td>\n",
" <td>-1.646423</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2022299 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" target subject chest_ACC_x chest_ACC_y chest_ACC_z chest_ECG \\\n",
"14786800 1 2 0.8914 -0.1102 -0.2576 0.030945 \n",
"14786801 1 2 0.8926 -0.1086 -0.2544 0.033646 \n",
"14786802 1 2 0.8930 -0.1094 -0.2580 0.033005 \n",
"14786803 1 2 0.8934 -0.1082 -0.2538 0.031815 \n",
"14786804 1 2 0.8930 -0.1096 -0.2570 0.030350 \n",
"... ... ... ... ... ... ... \n",
"16809094 4 2 0.4378 -0.2348 -0.8380 -0.182602 \n",
"16809095 4 2 0.4378 -0.2338 -0.8394 -0.170609 \n",
"16809096 4 2 0.4388 -0.2338 -0.8386 -0.160812 \n",
"16809097 4 2 0.4398 -0.2374 -0.8390 -0.156326 \n",
"16809098 4 2 0.4386 -0.2366 -0.8408 -0.154312 \n",
"\n",
" chest_EMG chest_EDA chest_Temp chest_Resp \n",
"14786800 -0.003708 5.710983 29.083618 1.191711 \n",
"14786801 -0.014145 5.719376 29.122437 1.139832 \n",
"14786802 0.010208 5.706406 29.115234 1.141357 \n",
"14786803 0.012634 5.712509 29.126709 1.155090 \n",
"14786804 0.002060 5.727005 29.100861 1.133728 \n",
"... ... ... ... ... \n",
"16809094 -0.015793 0.484085 31.926239 -1.609802 \n",
"16809095 0.000687 0.473404 31.932190 -1.646423 \n",
"16809096 0.004532 0.463486 31.918823 -1.643372 \n",
"16809097 0.000595 0.459290 31.932190 -1.661682 \n",
"16809098 -0.009201 0.455475 31.927704 -1.646423 \n",
"\n",
"[2022299 rows x 10 columns]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subject_2"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>target</th>\n",
" <th>subject</th>\n",
" <th>chest_ACC_x</th>\n",
" <th>chest_ACC_y</th>\n",
" <th>chest_ACC_z</th>\n",
" <th>chest_ECG</th>\n",
" <th>chest_EMG</th>\n",
" <th>chest_EDA</th>\n",
" <th>chest_Temp</th>\n",
" <th>chest_Resp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>14870800</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.6296</td>\n",
" <td>-0.1086</td>\n",
" <td>-0.7042</td>\n",
" <td>0.126160</td>\n",
" <td>-0.005585</td>\n",
" <td>3.750992</td>\n",
" <td>28.752167</td>\n",
" <td>-2.301025</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14870801</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.6296</td>\n",
" <td>-0.1058</td>\n",
" <td>-0.7094</td>\n",
" <td>0.124100</td>\n",
" <td>-0.007004</td>\n",
" <td>3.757477</td>\n",
" <td>28.765045</td>\n",
" <td>-2.740479</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14870802</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.6292</td>\n",
" <td>-0.1042</td>\n",
" <td>-0.7086</td>\n",
" <td>0.120346</td>\n",
" <td>0.002335</td>\n",
" <td>3.776169</td>\n",
" <td>28.745026</td>\n",
" <td>-2.276611</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14870803</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.6266</td>\n",
" <td>-0.1022</td>\n",
" <td>-0.7086</td>\n",
" <td>0.113754</td>\n",
" <td>-0.012863</td>\n",
" <td>3.753662</td>\n",
" <td>28.766479</td>\n",
" <td>-2.287292</td>\n",
" </tr>\n",
" <tr>\n",
" <td>14870804</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.6258</td>\n",
" <td>-0.1022</td>\n",
" <td>-0.7106</td>\n",
" <td>0.109909</td>\n",
" <td>-0.002975</td>\n",
" <td>3.759766</td>\n",
" <td>28.737854</td>\n",
" <td>-2.284241</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15587595</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.7148</td>\n",
" <td>0.0758</td>\n",
" <td>-0.0428</td>\n",
" <td>0.308167</td>\n",
" <td>0.016617</td>\n",
" <td>1.204681</td>\n",
" <td>29.716492</td>\n",
" <td>-1.144409</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15587596</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.7144</td>\n",
" <td>0.0670</td>\n",
" <td>-0.0618</td>\n",
" <td>0.332840</td>\n",
" <td>-0.001740</td>\n",
" <td>1.197052</td>\n",
" <td>29.762756</td>\n",
" <td>-1.118469</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15587597</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.7146</td>\n",
" <td>0.0642</td>\n",
" <td>-0.0726</td>\n",
" <td>0.359528</td>\n",
" <td>-0.005814</td>\n",
" <td>1.200104</td>\n",
" <td>29.715027</td>\n",
" <td>-1.078796</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15587598</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.7244</td>\n",
" <td>0.0606</td>\n",
" <td>-0.0818</td>\n",
" <td>0.387680</td>\n",
" <td>-0.001602</td>\n",
" <td>1.190948</td>\n",
" <td>29.717896</td>\n",
" <td>-1.025391</td>\n",
" </tr>\n",
" <tr>\n",
" <td>15587599</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>0.7282</td>\n",
" <td>0.0506</td>\n",
" <td>-0.0948</td>\n",
" <td>0.415009</td>\n",
" <td>-0.028244</td>\n",
" <td>1.198959</td>\n",
" <td>29.717896</td>\n",
" <td>-0.996399</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>716800 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" target subject chest_ACC_x chest_ACC_y chest_ACC_z chest_ECG \\\n",
"14870800 1 2 0.6296 -0.1086 -0.7042 0.126160 \n",
"14870801 1 2 0.6296 -0.1058 -0.7094 0.124100 \n",
"14870802 1 2 0.6292 -0.1042 -0.7086 0.120346 \n",
"14870803 1 2 0.6266 -0.1022 -0.7086 0.113754 \n",
"14870804 1 2 0.6258 -0.1022 -0.7106 0.109909 \n",
"... ... ... ... ... ... ... \n",
"15587595 1 2 0.7148 0.0758 -0.0428 0.308167 \n",
"15587596 1 2 0.7144 0.0670 -0.0618 0.332840 \n",
"15587597 1 2 0.7146 0.0642 -0.0726 0.359528 \n",
"15587598 1 2 0.7244 0.0606 -0.0818 0.387680 \n",
"15587599 1 2 0.7282 0.0506 -0.0948 0.415009 \n",
"\n",
" chest_EMG chest_EDA chest_Temp chest_Resp \n",
"14870800 -0.005585 3.750992 28.752167 -2.301025 \n",
"14870801 -0.007004 3.757477 28.765045 -2.740479 \n",
"14870802 0.002335 3.776169 28.745026 -2.276611 \n",
"14870803 -0.012863 3.753662 28.766479 -2.287292 \n",
"14870804 -0.002975 3.759766 28.737854 -2.284241 \n",
"... ... ... ... ... \n",
"15587595 0.016617 1.204681 29.716492 -1.144409 \n",
"15587596 -0.001740 1.197052 29.762756 -1.118469 \n",
"15587597 -0.005814 1.200104 29.715027 -1.078796 \n",
"15587598 -0.001602 1.190948 29.717896 -1.025391 \n",
"15587599 -0.028244 1.198959 29.717896 -0.996399 \n",
"\n",
"[716800 rows x 10 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"concat_list1=[]\n",
"for i in cls:\n",
" for j in x:\n",
" concat_list1.append(globals()['df_{}_test_{}'.format(i,j)])\n",
"concat_list1[0]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(5040000, 10)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df=pd.concat(concat_list)\n",
"train_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4 1260000\n",
"3 1260000\n",
"2 1260000\n",
"1 1260000\n",
"Name: target, dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.target.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"15"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train_df.subject.unique())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(26430603, 10)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df=pd.concat(concat_list1)\n",
"test_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"for i in test_subject:\n",
" del(globals()['subject_%s' % i])\n",
" \n",
"for i in range(len(x)): \n",
" del(globals()['df_1_%s' % x[i]])\n",
" del(globals()['df_2_%s' % x[i]])\n",
" del(globals()['df_3_%s' % x[i]])\n",
" del(globals()['df_4_%s' % x[i]])\n",
"for i in cls:\n",
" for j in x:\n",
" del(globals()['df_{}_train_{}'.format(i,j)])\n",
" del(globals()['df_{}_test_{}'.format(i,j)])\n",
"del df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"who"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"et = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=2,random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#et = RandomForestClassifier(n_estimators=100, n_jobs=10, verbose=2,random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"et.fit(train_df[feature],train_df['target'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time \n",
"y_pred=et.predict(test_df[feature])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(classification_report(test_df['target'], y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#train_df.to_csv('1_min_train.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#test_df.to_csv('1_min_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# train_df.to_csv('30_sec_train.csv')"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"# test_df.to_csv('30_sec_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"train_df.to_csv('2_min_train.csv')"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"test_df.to_csv('2_min_test.csv')s"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import cross_val_score\n",
"scores = cross_val_score(et, train_df[feature],train_df['target'], cv=4)\n",
"print(scores)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 17.2s\n",
"[CV] n_neighbors=1 ...................................................\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 17.2s remaining: 0.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] .................................... n_neighbors=1, total= 22.0s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 18.1s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 13.1s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 21.9s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 30.5s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 17.4s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 19.3s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 21.8s\n",
"[CV] n_neighbors=1 ...................................................\n",
"[CV] .................................... n_neighbors=1, total= 21.4s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 21.4s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 28.9s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 20.3s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 15.2s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 21.3s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 24.5s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 19.0s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 20.7s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 24.1s\n",
"[CV] n_neighbors=3 ...................................................\n",
"[CV] .................................... n_neighbors=3, total= 23.5s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 22.2s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 24.9s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 21.1s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 13.9s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 22.0s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 27.2s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 19.0s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 21.3s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 25.0s\n",
"[CV] n_neighbors=5 ...................................................\n",
"[CV] .................................... n_neighbors=5, total= 24.3s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 22.7s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 25.2s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 21.6s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 13.8s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 22.1s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 28.4s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 19.4s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 21.6s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 25.3s\n",
"[CV] n_neighbors=7 ...................................................\n",
"[CV] .................................... n_neighbors=7, total= 24.4s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 23.3s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 25.8s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 22.1s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 13.8s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 22.5s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 28.0s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 21.1s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 22.1s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 26.2s\n",
"[CV] n_neighbors=9 ...................................................\n",
"[CV] .................................... n_neighbors=9, total= 24.9s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 24.0s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 26.5s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 29.7s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 14.2s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 23.2s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 28.7s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 21.7s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 22.7s\n",
"[CV] n_neighbors=11 ..................................................\n",
"[CV] ................................... n_neighbors=11, total= 27.1s\n",
"[CV] n_neighbors=11 ..................................................\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[CV] ................................... n_neighbors=11, total= 25.6s\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 60 out of 60 | elapsed: 22.4min finished\n"
]
},
{
"data": {
"text/plain": [
"GridSearchCV(cv=10, error_score='raise-deprecating',\n",
" estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
" metric='minkowski',\n",
" metric_params=None, n_jobs=None,\n",
" n_neighbors=5, p=2,\n",
" weights='uniform'),\n",
" iid='warn', n_jobs=None,\n",
" param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]},\n",
" pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
" scoring=None, verbose=2)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"# param_grid = { \n",
"# 'n_estimators': [20],\n",
"# 'max_features': ['auto'],\n",
"# # 'max_depth' : [4,5,6,7,8],\n",
"# 'criterion' :['gini', 'entropy']\n",
"# }\n",
"\n",
"param_grid = { \n",
" 'n_neighbors': [1,3,5,7,9,11]\n",
"}\n",
"\n",
"clf = KNeighborsClassifier()\n",
"\n",
"CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 10,verbose=2)\n",
"CV_rfc.fit(train_df[feature],train_df['target'])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'n_neighbors': 1}"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"CV_rfc.best_params_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
"et = ExtraTreesClassifier(n_estimators=20, n_jobs=10, verbose=2,random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"et.fit(train_df[feature],train_df['target'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%%time \n",
"y_pred=et.predict(test_df[feature])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(classification_report(test_df['target'], y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clf = KNeighborsClassifier(n_neighbors=1)\n",
"clf.fit(train_df[feature],train_df['target'])\n",
"y_pred=clf.predict(test_df[feature])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(classification_report(test_df['target'], y_pred))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}