--- a +++ b/Random Model Stacking.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 42 µs, sys: 10 µs, total: 52 µs\n", + "Wall time: 56.5 µs\n" + ] + } + ], + "source": [ + "%%time\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.ensemble import ExtraTreesClassifier\n", + "from sklearn.metrics import classification_report\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from itertools import combinations \n", + "from mlxtend.classifier import StackingClassifier\n", + "from sklearn import model_selection\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 46.9 s, sys: 10.7 s, total: 57.6 s\n", + "Wall time: 57.6 s\n" + ] + } + ], + "source": [ + "%%time\n", + "train = pd.read_csv(\"1_min_train.csv\")\n", + "test = pd.read_csv(\"1_min_test.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2520000, 11)\n", + "(28950603, 11)\n" + ] + } + ], + "source": [ + "print(train.shape)\n", + "print(test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['chest_ACC_x',\n", + " 'chest_ACC_y',\n", + " 'chest_ACC_z',\n", + " 'chest_ECG',\n", + " 'chest_EMG',\n", + " 'chest_EDA',\n", + " 'chest_Temp',\n", + " 'chest_Resp']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "features=train.columns.tolist()\n", + "features = features[3:]\n", + "features" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "clf1 = ExtraTreesClassifier(n_estimators=50, n_jobs=10, verbose=1,random_state=0)\n", + "clf2 = DecisionTreeClassifier()\n", + "clf3 = RandomForestClassifier(n_estimators=10)\n", + "clf4 = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "sclf = StackingClassifier(classifiers=[clf1, clf2, clf3], meta_classifier=clf4)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n", + " \"timeout or by a memory leak.\", UserWarning\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 2.5min\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 4.2min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 15.9s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 23.4s finished\n", + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n", + " \"timeout or by a memory leak.\", UserWarning\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 1.3min\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 2.4min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 15.5s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 24.3s finished\n", + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 1.4min\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 2.6min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 16.7s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 25.8s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.49 (+/- 0.10) [ExtraTreesClassifier]\n", + "Accuracy: 0.38 (+/- 0.06) [DecisionTreeClassifier]\n", + "Accuracy: 0.43 (+/- 0.10) [RandomForestClassifier]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 1.3min\n", + "/home/sf/.local/lib/python3.6/site-packages/joblib/externals/loky/process_executor.py:706: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.\n", + " \"timeout or by a memory leak.\", UserWarning\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 2.0min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 20.1s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 30.6s finished\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 11.9s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 17.0s finished\n", + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 1.4min\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 2.7min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 19.8s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 37.8s finished\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 14.4s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 23.7s finished\n", + "[Parallel(n_jobs=10)]: Using backend LokyBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 1.6min\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 2.8min finished\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 16.6s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 26.6s finished\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", + " FutureWarning)\n", + "/home/sf/.local/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", + " \"this warning.\", FutureWarning)\n", + "[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.\n", + "[Parallel(n_jobs=10)]: Done 30 tasks | elapsed: 8.7s\n", + "[Parallel(n_jobs=10)]: Done 50 out of 50 | elapsed: 13.6s finished\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.37 (+/- 0.06) [LogisticRegression]\n" + ] + } + ], + "source": [ + "for clf, label in zip([clf1, clf2, clf3, sclf], ['ExtraTreesClassifier','DecisionTreeClassifier','RandomForestClassifier','LogisticRegression']):\n", + "\n", + " scores = model_selection.cross_val_score(clf, test[features], test['target'],cv=3, scoring='accuracy')\n", + " \n", + " print(\"Accuracy: %0.2f (+/- %0.2f) [%s]\" % (scores.mean(), scores.std(), label))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}