Switch to side-by-side view

--- a
+++ b/Random Model Stacking.ipynb
@@ -0,0 +1,260 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 42 µs, sys: 10 µs, total: 52 µs\n",
+      "Wall time: 56.5 µs\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from sklearn.ensemble import ExtraTreesClassifier\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from itertools import combinations \n",
+    "from mlxtend.classifier import StackingClassifier\n",
+    "from sklearn import model_selection\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "CPU times: user 46.9 s, sys: 10.7 s, total: 57.6 s\n",
+      "Wall time: 57.6 s\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%time\n",
+    "train = pd.read_csv(\"1_min_train.csv\")\n",
+    "test = pd.read_csv(\"1_min_test.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(2520000, 11)\n",
+      "(28950603, 11)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(train.shape)\n",
+    "print(test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['chest_ACC_x',\n",
+       " 'chest_ACC_y',\n",
+       " 'chest_ACC_z',\n",
+       " 'chest_ECG',\n",
+       " 'chest_EMG',\n",
+       " 'chest_EDA',\n",
+       " 'chest_Temp',\n",
+       " 'chest_Resp']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "features=train.columns.tolist()\n",
+    "features = features[3:]\n",
+    "features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "clf1 = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=1,random_state=0)\n",
+    "clf2 = DecisionTreeClassifier()\n",
+    "clf3 = RandomForestClassifier(n_estimators=10)\n",
+    "clf4 = LogisticRegression()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=clf4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
+      "  \"timeout or by a memory leak.\", UserWarning\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  2.5min\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  4.2min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   15.9s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   23.4s finished\n",
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
+      "  \"timeout or by a memory leak.\", UserWarning\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.3min\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.4min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   15.5s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   24.3s finished\n",
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.4min\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.6min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   16.7s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   25.8s finished\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.49 (+/- 0.10) [ExtraTreesClassifier]\n",
+      "Accuracy: 0.38 (+/- 0.06) [DecisionTreeClassifier]\n",
+      "Accuracy: 0.43 (+/- 0.10) [RandomForestClassifier]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.3min\n",
+      "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n",
+      "  \"timeout or by a memory leak.\", UserWarning\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.0min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   20.1s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   30.6s finished\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "  FutureWarning)\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "  \"this warning.\", FutureWarning)\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   11.9s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   17.0s finished\n",
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.4min\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.7min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   19.8s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   37.8s finished\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "  FutureWarning)\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "  \"this warning.\", FutureWarning)\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   14.4s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   23.7s finished\n",
+      "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:  1.6min\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:  2.8min finished\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:   16.6s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   26.6s finished\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "  FutureWarning)\n",
+      "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "  \"this warning.\", FutureWarning)\n",
+      "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n",
+      "[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    8.7s\n",
+      "[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:   13.6s finished\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Accuracy: 0.37 (+/- 0.06) [LogisticRegression]\n"
+     ]
+    }
+   ],
+   "source": [
+    "for clf, label in zip([clf1, clf2, clf3, sclf], ['ExtraTreesClassifier','DecisionTreeClassifier','RandomForestClassifier','LogisticRegression']):\n",
+    "\n",
+    "    scores = model_selection.cross_val_score(clf, test[features], test['target'],cv=3, scoring='accuracy')\n",
+    "    \n",
+    "    print(\"Accuracy: %0.2f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}