[d8f241]: / Random / Random 4.ipynb

Download this file

1312 lines (1311 with data), 42.6 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 0 ns, sys: 37 µs, total: 37 µs\n",
      "Wall time: 41 µs\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.ensemble import ExtraTreesClassifier\n",
    "from sklearn.metrics import classification_report\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 1s, sys: 4.38 s, total: 1min 5s\n",
      "Wall time: 1min 18s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "df = pd.read_csv(\"master_data.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(31470603, 10)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['subject'].unique()\n",
    "list_of_subjects=list(df['subject'].unique())\n",
    "list_of_subjects.sort()\n",
    "list_of_subjects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['chest_ACC_x',\n",
       " 'chest_ACC_y',\n",
       " 'chest_ACC_z',\n",
       " 'chest_ECG',\n",
       " 'chest_EMG',\n",
       " 'chest_EDA',\n",
       " 'chest_Temp',\n",
       " 'chest_Resp']"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "features=df.columns.tolist()\n",
    "to_remove = [fea for fea in features if \"target\"  in fea or \"subject\"  in fea]\n",
    "feature = [x for x in features if x not in to_remove]\n",
    "feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 2, 4, 3])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['target'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "6\n",
      "11\n",
      "14\n",
      "8\n",
      "15\n",
      "9\n",
      "10\n",
      "2\n",
      "16\n",
      "4\n",
      "13\n",
      "3\n",
      "17\n",
      "5\n",
      "7\n",
      "CPU times: user 2.25 s, sys: 663 ms, total: 2.92 s\n",
      "Wall time: 3.07 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "test_subject=list(df['subject'].unique())\n",
    "for i in test_subject:\n",
    "    print(i)\n",
    "    globals()['subject_%s' % i]=df[df['subject'] == i]\n",
    "#     globals()['subject_%s_train' % i],globals()['subject_%s_test' % i]=train_test_split(globals()['subject_%s' % i], test_size=test_shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "subject_list=[subject_2,subject_3,subject_4,subject_5,subject_6,subject_7,subject_8,subject_9,subject_10,subject_11,subject_13,subject_14,subject_15,subject_16,subject_17]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "x=[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "for i in range(len(x)):\n",
    "        \n",
    "        globals()['df_1_%s' % x[i]]=subject_list[i][subject_list[i]['target']==1]\n",
    "        globals()['df_2_%s' % x[i]]=subject_list[i][subject_list[i]['target']==2]\n",
    "        globals()['df_3_%s' % x[i]]=subject_list[i][subject_list[i]['target']==3]\n",
    "        globals()['df_4_%s' % x[i]]=subject_list[i][subject_list[i]['target']==4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ExtraTreesClassifier\t LogisticRegression\t RandomForestClassifier\t classification_report\t df\t df_1_10\t df_1_11\t df_1_13\t df_1_14\t \n",
      "df_1_15\t df_1_16\t df_1_17\t df_1_2\t df_1_3\t df_1_4\t df_1_5\t df_1_6\t df_1_7\t \n",
      "df_1_8\t df_1_9\t df_2_10\t df_2_11\t df_2_13\t df_2_14\t df_2_15\t df_2_16\t df_2_17\t \n",
      "df_2_2\t df_2_3\t df_2_4\t df_2_5\t df_2_6\t df_2_7\t df_2_8\t df_2_9\t df_3_10\t \n",
      "df_3_11\t df_3_13\t df_3_14\t df_3_15\t df_3_16\t df_3_17\t df_3_2\t df_3_3\t df_3_4\t \n",
      "df_3_5\t df_3_6\t df_3_7\t df_3_8\t df_3_9\t df_4_10\t df_4_11\t df_4_13\t df_4_14\t \n",
      "df_4_15\t df_4_16\t df_4_17\t df_4_2\t df_4_3\t df_4_4\t df_4_5\t df_4_6\t df_4_7\t \n",
      "df_4_8\t df_4_9\t feature\t features\t i\t list_of_subjects\t np\t pd\t subject_10\t \n",
      "subject_11\t subject_13\t subject_14\t subject_15\t subject_16\t subject_17\t subject_2\t subject_3\t subject_4\t \n",
      "subject_5\t subject_6\t subject_7\t subject_8\t subject_9\t subject_list\t test_subject\t to_remove\t train_test_split\t \n",
      "x\t \n"
     ]
    }
   ],
   "source": [
    "who"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "x=[2,3,4,5,6,7,8,9,10,11,13,14,15,16,17]\n",
    "cls=[1,2,3,4]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "84000"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "no_of_rows=int(700*120)\n",
    "no_of_rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in cls:\n",
    "    for j in x:\n",
    "        globals()['df_{}_train_{}'.format(i,j)] = globals()['df_{}_{}'.format(i,j)].iloc[:no_of_rows]\n",
    "        #globals()['df_{}_train_{}'.format(i,j)],globals()['df_{}_test_{}'.format(i,j)]=train_test_split(globals()['df_{}_{}'.format(i,j)], test_size=0.3)\n",
    "        #print('subject_'+str(i))\n",
    "        globals()['df_{}_test_{}'.format(i,j)] = globals()['df_{}_{}'.format(i,j)].iloc[no_of_rows:]        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "concat_list=[]\n",
    "for i in cls:\n",
    "    for j in x:\n",
    "        concat_list.append(globals()['df_{}_train_{}'.format(i,j)])\n",
    "#concat_list[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>subject</th>\n",
       "      <th>chest_ACC_x</th>\n",
       "      <th>chest_ACC_y</th>\n",
       "      <th>chest_ACC_z</th>\n",
       "      <th>chest_ECG</th>\n",
       "      <th>chest_EMG</th>\n",
       "      <th>chest_EDA</th>\n",
       "      <th>chest_Temp</th>\n",
       "      <th>chest_Resp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>14786800</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8914</td>\n",
       "      <td>-0.1102</td>\n",
       "      <td>-0.2576</td>\n",
       "      <td>0.030945</td>\n",
       "      <td>-0.003708</td>\n",
       "      <td>5.710983</td>\n",
       "      <td>29.083618</td>\n",
       "      <td>1.191711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14786801</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8926</td>\n",
       "      <td>-0.1086</td>\n",
       "      <td>-0.2544</td>\n",
       "      <td>0.033646</td>\n",
       "      <td>-0.014145</td>\n",
       "      <td>5.719376</td>\n",
       "      <td>29.122437</td>\n",
       "      <td>1.139832</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14786802</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8930</td>\n",
       "      <td>-0.1094</td>\n",
       "      <td>-0.2580</td>\n",
       "      <td>0.033005</td>\n",
       "      <td>0.010208</td>\n",
       "      <td>5.706406</td>\n",
       "      <td>29.115234</td>\n",
       "      <td>1.141357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14786803</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8934</td>\n",
       "      <td>-0.1082</td>\n",
       "      <td>-0.2538</td>\n",
       "      <td>0.031815</td>\n",
       "      <td>0.012634</td>\n",
       "      <td>5.712509</td>\n",
       "      <td>29.126709</td>\n",
       "      <td>1.155090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14786804</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.8930</td>\n",
       "      <td>-0.1096</td>\n",
       "      <td>-0.2570</td>\n",
       "      <td>0.030350</td>\n",
       "      <td>0.002060</td>\n",
       "      <td>5.727005</td>\n",
       "      <td>29.100861</td>\n",
       "      <td>1.133728</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16809094</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.4378</td>\n",
       "      <td>-0.2348</td>\n",
       "      <td>-0.8380</td>\n",
       "      <td>-0.182602</td>\n",
       "      <td>-0.015793</td>\n",
       "      <td>0.484085</td>\n",
       "      <td>31.926239</td>\n",
       "      <td>-1.609802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16809095</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.4378</td>\n",
       "      <td>-0.2338</td>\n",
       "      <td>-0.8394</td>\n",
       "      <td>-0.170609</td>\n",
       "      <td>0.000687</td>\n",
       "      <td>0.473404</td>\n",
       "      <td>31.932190</td>\n",
       "      <td>-1.646423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16809096</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.4388</td>\n",
       "      <td>-0.2338</td>\n",
       "      <td>-0.8386</td>\n",
       "      <td>-0.160812</td>\n",
       "      <td>0.004532</td>\n",
       "      <td>0.463486</td>\n",
       "      <td>31.918823</td>\n",
       "      <td>-1.643372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16809097</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.4398</td>\n",
       "      <td>-0.2374</td>\n",
       "      <td>-0.8390</td>\n",
       "      <td>-0.156326</td>\n",
       "      <td>0.000595</td>\n",
       "      <td>0.459290</td>\n",
       "      <td>31.932190</td>\n",
       "      <td>-1.661682</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>16809098</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>0.4386</td>\n",
       "      <td>-0.2366</td>\n",
       "      <td>-0.8408</td>\n",
       "      <td>-0.154312</td>\n",
       "      <td>-0.009201</td>\n",
       "      <td>0.455475</td>\n",
       "      <td>31.927704</td>\n",
       "      <td>-1.646423</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2022299 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          target  subject  chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  \\\n",
       "14786800       1        2       0.8914      -0.1102      -0.2576   0.030945   \n",
       "14786801       1        2       0.8926      -0.1086      -0.2544   0.033646   \n",
       "14786802       1        2       0.8930      -0.1094      -0.2580   0.033005   \n",
       "14786803       1        2       0.8934      -0.1082      -0.2538   0.031815   \n",
       "14786804       1        2       0.8930      -0.1096      -0.2570   0.030350   \n",
       "...          ...      ...          ...          ...          ...        ...   \n",
       "16809094       4        2       0.4378      -0.2348      -0.8380  -0.182602   \n",
       "16809095       4        2       0.4378      -0.2338      -0.8394  -0.170609   \n",
       "16809096       4        2       0.4388      -0.2338      -0.8386  -0.160812   \n",
       "16809097       4        2       0.4398      -0.2374      -0.8390  -0.156326   \n",
       "16809098       4        2       0.4386      -0.2366      -0.8408  -0.154312   \n",
       "\n",
       "          chest_EMG  chest_EDA  chest_Temp  chest_Resp  \n",
       "14786800  -0.003708   5.710983   29.083618    1.191711  \n",
       "14786801  -0.014145   5.719376   29.122437    1.139832  \n",
       "14786802   0.010208   5.706406   29.115234    1.141357  \n",
       "14786803   0.012634   5.712509   29.126709    1.155090  \n",
       "14786804   0.002060   5.727005   29.100861    1.133728  \n",
       "...             ...        ...         ...         ...  \n",
       "16809094  -0.015793   0.484085   31.926239   -1.609802  \n",
       "16809095   0.000687   0.473404   31.932190   -1.646423  \n",
       "16809096   0.004532   0.463486   31.918823   -1.643372  \n",
       "16809097   0.000595   0.459290   31.932190   -1.661682  \n",
       "16809098  -0.009201   0.455475   31.927704   -1.646423  \n",
       "\n",
       "[2022299 rows x 10 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "subject_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target</th>\n",
       "      <th>subject</th>\n",
       "      <th>chest_ACC_x</th>\n",
       "      <th>chest_ACC_y</th>\n",
       "      <th>chest_ACC_z</th>\n",
       "      <th>chest_ECG</th>\n",
       "      <th>chest_EMG</th>\n",
       "      <th>chest_EDA</th>\n",
       "      <th>chest_Temp</th>\n",
       "      <th>chest_Resp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>14870800</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6296</td>\n",
       "      <td>-0.1086</td>\n",
       "      <td>-0.7042</td>\n",
       "      <td>0.126160</td>\n",
       "      <td>-0.005585</td>\n",
       "      <td>3.750992</td>\n",
       "      <td>28.752167</td>\n",
       "      <td>-2.301025</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14870801</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6296</td>\n",
       "      <td>-0.1058</td>\n",
       "      <td>-0.7094</td>\n",
       "      <td>0.124100</td>\n",
       "      <td>-0.007004</td>\n",
       "      <td>3.757477</td>\n",
       "      <td>28.765045</td>\n",
       "      <td>-2.740479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14870802</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6292</td>\n",
       "      <td>-0.1042</td>\n",
       "      <td>-0.7086</td>\n",
       "      <td>0.120346</td>\n",
       "      <td>0.002335</td>\n",
       "      <td>3.776169</td>\n",
       "      <td>28.745026</td>\n",
       "      <td>-2.276611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14870803</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6266</td>\n",
       "      <td>-0.1022</td>\n",
       "      <td>-0.7086</td>\n",
       "      <td>0.113754</td>\n",
       "      <td>-0.012863</td>\n",
       "      <td>3.753662</td>\n",
       "      <td>28.766479</td>\n",
       "      <td>-2.287292</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>14870804</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.6258</td>\n",
       "      <td>-0.1022</td>\n",
       "      <td>-0.7106</td>\n",
       "      <td>0.109909</td>\n",
       "      <td>-0.002975</td>\n",
       "      <td>3.759766</td>\n",
       "      <td>28.737854</td>\n",
       "      <td>-2.284241</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15587595</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.7148</td>\n",
       "      <td>0.0758</td>\n",
       "      <td>-0.0428</td>\n",
       "      <td>0.308167</td>\n",
       "      <td>0.016617</td>\n",
       "      <td>1.204681</td>\n",
       "      <td>29.716492</td>\n",
       "      <td>-1.144409</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15587596</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.7144</td>\n",
       "      <td>0.0670</td>\n",
       "      <td>-0.0618</td>\n",
       "      <td>0.332840</td>\n",
       "      <td>-0.001740</td>\n",
       "      <td>1.197052</td>\n",
       "      <td>29.762756</td>\n",
       "      <td>-1.118469</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15587597</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.7146</td>\n",
       "      <td>0.0642</td>\n",
       "      <td>-0.0726</td>\n",
       "      <td>0.359528</td>\n",
       "      <td>-0.005814</td>\n",
       "      <td>1.200104</td>\n",
       "      <td>29.715027</td>\n",
       "      <td>-1.078796</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15587598</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.7244</td>\n",
       "      <td>0.0606</td>\n",
       "      <td>-0.0818</td>\n",
       "      <td>0.387680</td>\n",
       "      <td>-0.001602</td>\n",
       "      <td>1.190948</td>\n",
       "      <td>29.717896</td>\n",
       "      <td>-1.025391</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>15587599</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0.7282</td>\n",
       "      <td>0.0506</td>\n",
       "      <td>-0.0948</td>\n",
       "      <td>0.415009</td>\n",
       "      <td>-0.028244</td>\n",
       "      <td>1.198959</td>\n",
       "      <td>29.717896</td>\n",
       "      <td>-0.996399</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>716800 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          target  subject  chest_ACC_x  chest_ACC_y  chest_ACC_z  chest_ECG  \\\n",
       "14870800       1        2       0.6296      -0.1086      -0.7042   0.126160   \n",
       "14870801       1        2       0.6296      -0.1058      -0.7094   0.124100   \n",
       "14870802       1        2       0.6292      -0.1042      -0.7086   0.120346   \n",
       "14870803       1        2       0.6266      -0.1022      -0.7086   0.113754   \n",
       "14870804       1        2       0.6258      -0.1022      -0.7106   0.109909   \n",
       "...          ...      ...          ...          ...          ...        ...   \n",
       "15587595       1        2       0.7148       0.0758      -0.0428   0.308167   \n",
       "15587596       1        2       0.7144       0.0670      -0.0618   0.332840   \n",
       "15587597       1        2       0.7146       0.0642      -0.0726   0.359528   \n",
       "15587598       1        2       0.7244       0.0606      -0.0818   0.387680   \n",
       "15587599       1        2       0.7282       0.0506      -0.0948   0.415009   \n",
       "\n",
       "          chest_EMG  chest_EDA  chest_Temp  chest_Resp  \n",
       "14870800  -0.005585   3.750992   28.752167   -2.301025  \n",
       "14870801  -0.007004   3.757477   28.765045   -2.740479  \n",
       "14870802   0.002335   3.776169   28.745026   -2.276611  \n",
       "14870803  -0.012863   3.753662   28.766479   -2.287292  \n",
       "14870804  -0.002975   3.759766   28.737854   -2.284241  \n",
       "...             ...        ...         ...         ...  \n",
       "15587595   0.016617   1.204681   29.716492   -1.144409  \n",
       "15587596  -0.001740   1.197052   29.762756   -1.118469  \n",
       "15587597  -0.005814   1.200104   29.715027   -1.078796  \n",
       "15587598  -0.001602   1.190948   29.717896   -1.025391  \n",
       "15587599  -0.028244   1.198959   29.717896   -0.996399  \n",
       "\n",
       "[716800 rows x 10 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "concat_list1=[]\n",
    "for i in cls:\n",
    "    for j in x:\n",
    "        concat_list1.append(globals()['df_{}_test_{}'.format(i,j)])\n",
    "concat_list1[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(5040000, 10)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df=pd.concat(concat_list)\n",
    "train_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4    1260000\n",
       "3    1260000\n",
       "2    1260000\n",
       "1    1260000\n",
       "Name: target, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_df.target.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "15"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train_df.subject.unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(26430603, 10)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df=pd.concat(concat_list1)\n",
    "test_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in test_subject:\n",
    "    del(globals()['subject_%s' % i])\n",
    "    \n",
    "for i in range(len(x)):   \n",
    "        del(globals()['df_1_%s' % x[i]])\n",
    "        del(globals()['df_2_%s' % x[i]])\n",
    "        del(globals()['df_3_%s' % x[i]])\n",
    "        del(globals()['df_4_%s' % x[i]])\n",
    "for i in cls:\n",
    "    for j in x:\n",
    "        del(globals()['df_{}_train_{}'.format(i,j)])\n",
    "        del(globals()['df_{}_test_{}'.format(i,j)])\n",
    "del df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "who"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "et = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=2,random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#et = RandomForestClassifier(n_estimators=100, n_jobs=10, verbose=2,random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "et.fit(train_df[feature],train_df['target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time \n",
    "y_pred=et.predict(test_df[feature])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(classification_report(test_df['target'], y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#train_df.to_csv('1_min_train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#test_df.to_csv('1_min_test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train_df.to_csv('30_sec_train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "# test_df.to_csv('30_sec_test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df.to_csv('2_min_train.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_df.to_csv('2_min_test.csv')s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "scores = cross_val_score(et, train_df[feature],train_df['target'], cv=4)\n",
    "print(scores)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 10 folds for each of 6 candidates, totalling 60 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  17.2s\n",
      "[CV] n_neighbors=1 ...................................................\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   17.2s remaining:    0.0s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] .................................... n_neighbors=1, total=  22.0s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  18.1s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  13.1s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  21.9s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  30.5s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  17.4s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  19.3s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  21.8s\n",
      "[CV] n_neighbors=1 ...................................................\n",
      "[CV] .................................... n_neighbors=1, total=  21.4s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  21.4s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  28.9s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  20.3s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  15.2s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  21.3s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  24.5s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  19.0s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  20.7s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  24.1s\n",
      "[CV] n_neighbors=3 ...................................................\n",
      "[CV] .................................... n_neighbors=3, total=  23.5s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  22.2s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  24.9s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  21.1s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  13.9s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  22.0s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  27.2s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  19.0s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  21.3s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  25.0s\n",
      "[CV] n_neighbors=5 ...................................................\n",
      "[CV] .................................... n_neighbors=5, total=  24.3s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  22.7s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  25.2s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  21.6s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  13.8s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  22.1s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  28.4s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  19.4s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  21.6s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  25.3s\n",
      "[CV] n_neighbors=7 ...................................................\n",
      "[CV] .................................... n_neighbors=7, total=  24.4s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  23.3s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  25.8s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  22.1s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  13.8s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  22.5s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  28.0s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  21.1s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  22.1s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  26.2s\n",
      "[CV] n_neighbors=9 ...................................................\n",
      "[CV] .................................... n_neighbors=9, total=  24.9s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  24.0s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  26.5s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  29.7s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  14.2s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  23.2s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  28.7s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  21.7s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  22.7s\n",
      "[CV] n_neighbors=11 ..................................................\n",
      "[CV] ................................... n_neighbors=11, total=  27.1s\n",
      "[CV] n_neighbors=11 ..................................................\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[CV] ................................... n_neighbors=11, total=  25.6s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed: 22.4min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=10, error_score='raise-deprecating',\n",
       "             estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30,\n",
       "                                            metric='minkowski',\n",
       "                                            metric_params=None, n_jobs=None,\n",
       "                                            n_neighbors=5, p=2,\n",
       "                                            weights='uniform'),\n",
       "             iid='warn', n_jobs=None,\n",
       "             param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11]},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring=None, verbose=2)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "# param_grid = { \n",
    "#     'n_estimators': [20],\n",
    "#     'max_features': ['auto'],\n",
    "# #     'max_depth' : [4,5,6,7,8],\n",
    "#     'criterion' :['gini', 'entropy']\n",
    "# }\n",
    "\n",
    "param_grid = { \n",
    "    'n_neighbors': [1,3,5,7,9,11]\n",
    "}\n",
    "\n",
    "clf = KNeighborsClassifier()\n",
    "\n",
    "CV_rfc = GridSearchCV(estimator=clf, param_grid=param_grid, cv= 10,verbose=2)\n",
    "CV_rfc.fit(train_df[feature],train_df['target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'n_neighbors': 1}"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "CV_rfc.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "et = ExtraTreesClassifier(n_estimators=20, n_jobs=10, verbose=2,random_state=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "et.fit(train_df[feature],train_df['target'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time \n",
    "y_pred=et.predict(test_df[feature])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(classification_report(test_df['target'], y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "clf = KNeighborsClassifier(n_neighbors=1)\n",
    "clf.fit(train_df[feature],train_df['target'])\n",
    "y_pred=clf.predict(test_df[feature])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(classification_report(test_df['target'], y_pred))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}