a b/.ipynb_checkpoints/Random Model Stacking-checkpoint.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 16,
6
   "metadata": {
7
    "scrolled": true
8
   },
9
   "outputs": [
10
    {
11
     "name": "stdout",
12
     "output_type": "stream",
13
     "text": [
14
      "CPU times: user 42 µs, sys: 10 µs, total: 52 µs\n",
15
      "Wall time: 56.5 µs\n"
16
     ]
17
    }
18
   ],
19
   "source": [
20
    "%%time\n",
21
    "import pandas as pd\n",
22
    "import numpy as np\n",
23
    "from sklearn.ensemble import ExtraTreesClassifier\n",
24
    "from sklearn.metrics import classification_report\n",
25
    "from sklearn.model_selection import train_test_split\n",
26
    "from sklearn.ensemble import RandomForestClassifier\n",
27
    "from sklearn.linear_model import LogisticRegression\n",
28
    "from sklearn.tree import DecisionTreeClassifier\n",
29
    "from itertools import combinations \n",
30
    "from mlxtend.classifier import StackingClassifier\n",
31
    "from sklearn import model_selection\n",
32
    "\n"
33
   ]
34
  },
35
  {
36
   "cell_type": "code",
37
   "execution_count": 4,
38
   "metadata": {},
39
   "outputs": [
40
    {
41
     "name": "stdout",
42
     "output_type": "stream",
43
     "text": [
44
      "CPU times: user 46.9 s, sys: 10.7 s, total: 57.6 s\n",
45
      "Wall time: 57.6 s\n"
46
     ]
47
    }
48
   ],
49
   "source": [
50
    "%%time\n",
51
    "train = pd.read_csv(\"1_min_train.csv\")\n",
52
    "test = pd.read_csv(\"1_min_test.csv\")"
53
   ]
54
  },
55
  {
56
   "cell_type": "code",
57
   "execution_count": 5,
58
   "metadata": {},
59
   "outputs": [
60
    {
61
     "name": "stdout",
62
     "output_type": "stream",
63
     "text": [
64
      "(2520000, 11)\n",
65
      "(28950603, 11)\n"
66
     ]
67
    }
68
   ],
69
   "source": [
70
    "print(train.shape)\n",
71
    "print(test.shape)"
72
   ]
73
  },
74
  {
75
   "cell_type": "code",
76
   "execution_count": 6,
77
   "metadata": {},
78
   "outputs": [
79
    {
80
     "data": {
81
      "text/plain": [
82
       "['chest_ACC_x',\n",
83
       " 'chest_ACC_y',\n",
84
       " 'chest_ACC_z',\n",
85
       " 'chest_ECG',\n",
86
       " 'chest_EMG',\n",
87
       " 'chest_EDA',\n",
88
       " 'chest_Temp',\n",
89
       " 'chest_Resp']"
90
      ]
91
     },
92
     "execution_count": 6,
93
     "metadata": {},
94
     "output_type": "execute_result"
95
    }
96
   ],
97
   "source": [
98
    "features=train.columns.tolist()\n",
99
    "features = features[3:]\n",
100
    "features"
101
   ]
102
  },
103
  {
104
   "cell_type": "code",
105
   "execution_count": 10,
106
   "metadata": {},
107
   "outputs": [],
108
   "source": [
109
    "clf1 = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=1,random_state=0)\n",
110
    "clf2 = DecisionTreeClassifier()\n",
111
    "clf3 = RandomForestClassifier(n_estimators=10)\n",
112
    "clf4 = LogisticRegression()"
113
   ]
114
  },
115
  {
116
   "cell_type": "code",
117
   "execution_count": 14,
118
   "metadata": {},
119
   "outputs": [],
120
   "source": [
121
    "sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=clf4)"
122
   ]
123
  },
124
  {
125
   "cell_type": "code",
126
   "execution_count": 18,
127
   "metadata": {
128
    "scrolled": true
129
   },
130
   "outputs": [
131
    {
132
     "name": "stderr",
133
     "output_type": "stream",
134
     "text": [
135
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
136
      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
137
      "  \"timeout or by a memory leak.\", UserWarning\n",
138
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  2.5min\n",
139
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  4.2min finished\n",
140
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
141
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   15.9s\n",
142
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   23.4s finished\n",
143
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
144
      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
145
      "  \"timeout or by a memory leak.\", UserWarning\n",
146
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.3min\n",
147
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.4min finished\n",
148
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
149
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   15.5s\n",
150
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   24.3s finished\n",
151
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
152
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.4min\n",
153
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.6min finished\n",
154
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
155
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   16.7s\n",
156
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   25.8s finished\n"
157
     ]
158
    },
159
    {
160
     "name": "stdout",
161
     "output_type": "stream",
162
     "text": [
163
      "Accuracy: 0.49 (+/- 0.10) [ExtraTreesClassifier]\n",
164
      "Accuracy: 0.38 (+/- 0.06) [DecisionTreeClassifier]\n",
165
      "Accuracy: 0.43 (+/- 0.10) [RandomForestClassifier]\n"
166
     ]
167
    },
168
    {
169
     "name": "stderr",
170
     "output_type": "stream",
171
     "text": [
172
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
173
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.3min\n",
174
      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
175
      "  \"timeout or by a memory leak.\", UserWarning\n",
176
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.0min finished\n",
177
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
178
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   20.1s\n",
179
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   30.6s finished\n",
180
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
181
      "  FutureWarning)\n",
182
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
183
      "  \"this warning.\", FutureWarning)\n",
184
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
185
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   11.9s\n",
186
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   17.0s finished\n",
187
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
188
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.4min\n",
189
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.7min finished\n",
190
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
191
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   19.8s\n",
192
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   37.8s finished\n",
193
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
194
      "  FutureWarning)\n",
195
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
196
      "  \"this warning.\", FutureWarning)\n",
197
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
198
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   14.4s\n",
199
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   23.7s finished\n",
200
      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
201
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.6min\n",
202
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.8min finished\n",
203
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
204
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   16.6s\n",
205
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   26.6s finished\n",
206
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
207
      "  FutureWarning)\n",
208
      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
209
      "  \"this warning.\", FutureWarning)\n",
210
      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
211
      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    8.7s\n",
212
      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   13.6s finished\n"
213
     ]
214
    },
215
    {
216
     "name": "stdout",
217
     "output_type": "stream",
218
     "text": [
219
      "Accuracy: 0.37 (+/- 0.06) [LogisticRegression]\n"
220
     ]
221
    }
222
   ],
223
   "source": [
224
    "for clf, label in zip([clf1, clf2, clf3, sclf], ['ExtraTreesClassifier','DecisionTreeClassifier','RandomForestClassifier','LogisticRegression']):\n",
225
    "\n",
226
    "    scores = model_selection.cross_val_score(clf, test[features], test['target'],cv=3, scoring='accuracy')\n",
227
    "    \n",
228
    "    print(\"Accuracy: %0.2f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
229
   ]
230
  },
231
  {
232
   "cell_type": "code",
233
   "execution_count": null,
234
   "metadata": {},
235
   "outputs": [],
236
   "source": []
237
  }
238
 ],
239
 "metadata": {
240
  "kernelspec": {
241
   "display_name": "Python 3",
242
   "language": "python",
243
   "name": "python3"
244
  },
245
  "language_info": {
246
   "codemirror_mode": {
247
    "name": "ipython",
248
    "version": 3
249
   },
250
   "file_extension": ".py",
251
   "mimetype": "text/x-python",
252
   "name": "python",
253
   "nbconvert_exporter": "python",
254
   "pygments_lexer": "ipython3",
255
   "version": "3.5.2"
256
  }
257
 },
258
 "nbformat": 4,
259
 "nbformat_minor": 2
260
}