Discharge-Prediction_eICU / Git / Diff of /Hierarchical/R2-LR-Hierarchical.ipynb

Models:

RaymondKing/

Discharge-Prediction_eICU

Downloads: 1

Diff of /Hierarchical/R2-LR-Hierarchical.ipynb [000000] .. [602e64]

Switch to unified view

 b/Hierarchical/R2-LR-Hierarchical.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/psycopg2/__init__.py:144: UserWarning: The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use \"pip install psycopg2-binary\" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.\n",
+      "  \"\"\")\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "        <script type=\"text/javascript\">\n",
+       "        window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
+       "        if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
+       "        if (typeof require !== 'undefined') {\n",
+       "        require.undef(\"plotly\");\n",
+       "        requirejs.config({\n",
+       "            paths: {\n",
+       "                'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
+       "            }\n",
+       "        });\n",
+       "        require(['plotly'], function(Plotly) {\n",
+       "            window._Plotly = Plotly;\n",
+       "        });\n",
+       "        }\n",
+       "        </script>\n",
+       "        "
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Import libraries\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import psycopg2\n",
+    "import getpass\n",
+    "import pdvega\n",
+    "import plotly.graph_objs as go\n",
+    "\n",
+    "from plotly.offline import iplot, init_notebook_mode\n",
+    "import plotly.io as pio\n",
+    "from plotly.graph_objs import *\n",
+    "\n",
+    "# for configuring connection \n",
+    "from configobj import ConfigObj\n",
+    "import os\n",
+    "\n",
+    "%matplotlib inline\n",
+    "\n",
+    "\n",
+    "import os\n",
+    "\n",
+    "\n",
+    "from sklearn import linear_model\n",
+    "from sklearn import metrics\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "#configure the notebook for use in offline mode\n",
+    "init_notebook_mode(connected=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>hospitalid</th>\n",
+       "      <th>sodium</th>\n",
+       "      <th>electivesurgery</th>\n",
+       "      <th>vent</th>\n",
+       "      <th>dialysis</th>\n",
+       "      <th>gcs</th>\n",
+       "      <th>urine</th>\n",
+       "      <th>wbc</th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>...</th>\n",
+       "      <th>m11_True</th>\n",
+       "      <th>m12_True</th>\n",
+       "      <th>m13_True</th>\n",
+       "      <th>m14_True</th>\n",
+       "      <th>m15_True</th>\n",
+       "      <th>m16_True</th>\n",
+       "      <th>m17_True</th>\n",
+       "      <th>m18_True</th>\n",
+       "      <th>m19_True</th>\n",
+       "      <th>m20_True</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>139.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>14.7</td>\n",
+       "      <td>36.1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>73.0</td>\n",
+       "      <td>134.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>14.1</td>\n",
+       "      <td>39.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>73.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>34.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>137.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>10.9</td>\n",
+       "      <td>36.6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>5.9</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 85 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  hospitalid  sodium  electivesurgery  vent  dialysis   gcs  \\\n",
+       "0           0        59.0   139.0             -1.0   0.0       0.0  15.0   \n",
+       "1           1        73.0   134.0             -1.0   0.0       0.0  13.0   \n",
+       "2           2        73.0    -1.0              1.0   1.0       0.0  15.0   \n",
+       "3           3        63.0   137.0             -1.0   0.0       0.0  15.0   \n",
+       "4           4        63.0   135.0             -1.0   0.0       0.0  15.0   \n",
+       "\n",
+       "   urine   wbc  temperature    ...     m11_True  m12_True  m13_True  m14_True  \\\n",
+       "0   -1.0  14.7         36.1    ...            1         0         0         1   \n",
+       "1   -1.0  14.1         39.3    ...            1         0         0         1   \n",
+       "2   -1.0   8.0         34.8    ...            0         0         1         0   \n",
+       "3   -1.0  10.9         36.6    ...            1         0         1         1   \n",
+       "4   -1.0   5.9         35.0    ...            0         0         1         0   \n",
+       "\n",
+       "   m15_True  m16_True  m17_True  m18_True  m19_True  m20_True  \n",
+       "0         1         0         0         0         1         0  \n",
+       "1         1         0         0         0         1         0  \n",
+       "2         0         1         0         1         0         0  \n",
+       "3         1         0         0         1         1         0  \n",
+       "4         0         0         0         1         0         0  \n",
+       "\n",
+       "[5 rows x 85 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2= pd.read_csv(\"Analysis.csv\")\n",
+    "df2.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(95148, 85)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del df2['hospitalid']\n",
+    "\n",
+    "df2 = df2.drop(df2.columns[[0]], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cols_to_norm=['gcs', 'urine', 'wbc', 'sodium',\n",
+    "       'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n",
+    "       'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n",
+    "       'bilirubin', 'fio2', 'age', 'offset']\n",
+    "\n",
+    "X=df2.drop('destcopy', 1)\n",
+    "y=df2['destcopy']\n",
+    "df_cols = list(X)     #fancy impute removes column names."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**We moved all the pre-processing including splitting>imputation>Standardization to the CV iterations**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['sodium', 'electivesurgery', 'vent', 'dialysis', 'gcs', 'urine', 'wbc',\n",
+       "       'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n",
+       "       'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n",
+       "       'bilirubin', 'fio2', 'age', 'thrombolytics', 'aids', 'hepaticfailure',\n",
+       "       'lymphoma', 'metastaticcancer', 'leukemia', 'immunosuppression',\n",
+       "       'cirrhosis', 'readmit', 'offset', 'destcopy', 'admitsource_1.0',\n",
+       "       'admitsource_2.0', 'admitsource_3.0', 'admitsource_4.0',\n",
+       "       'admitsource_5.0', 'admitsource_6.0', 'admitsource_7.0',\n",
+       "       'admitsource_8.0', 'diaggroup_ARF', 'diaggroup_Asthma-Emphys',\n",
+       "       'diaggroup_CABG', 'diaggroup_CHF', 'diaggroup_CVA', 'diaggroup_CVOther',\n",
+       "       'diaggroup_CardiacArrest', 'diaggroup_ChestPainUnknown',\n",
+       "       'diaggroup_Coma', 'diaggroup_DKA', 'diaggroup_GIBleed',\n",
+       "       'diaggroup_GIObstruction', 'diaggroup_Neuro', 'diaggroup_Other',\n",
+       "       'diaggroup_Overdose', 'diaggroup_PNA', 'diaggroup_RespMedOther',\n",
+       "       'diaggroup_Sepsis', 'diaggroup_Trauma', 'diaggroup_ValveDz',\n",
+       "       'gender_Male', 'gender_Other', 'm1_True', 'm2_True', 'm3_True',\n",
+       "       'm4_True', 'm5_True', 'm6_True', 'm7_True', 'm8_True', 'm9_True',\n",
+       "       'm10_True', 'm11_True', 'm12_True', 'm13_True', 'm14_True', 'm15_True',\n",
+       "       'm16_True', 'm17_True', 'm18_True', 'm19_True', 'm20_True'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import svm\n",
+    "from sklearn.decomposition import TruncatedSVD\n",
+    "from sklearn.metrics import classification_report\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "\n",
+    "from sklearn_hierarchical_classification.classifier import HierarchicalClassifier\n",
+    "from sklearn_hierarchical_classification.constants import ROOT\n",
+    "from sklearn_hierarchical_classification.metrics import h_fbeta_score, multi_labeled"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Random Forest**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class_hierarchy = {\n",
+    "        ROOT: [\"1\", \"A\"],\n",
+    "        \"A\": [\"2\", \"B\"],\n",
+    "        \"B\": [\"3\", \"4\"],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Enabling notebook extension jupyter-js-widgets/extension...\n",
+      "      - Validating: \u001b[32mOK\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "!jupyter nbextension enable --py --sys-prefix widgetsnbextension"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import Counter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n",
+      "/anaconda3/lib/python3.7/site-packages/lightgbm/__init__.py:46: UserWarning:\n",
+      "\n",
+      "Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_9.4.1) compiler.\n",
+      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
+      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
+      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 1:\n",
+      "Accuracy: 0.6075031525851198\n",
+      "f-score: 0.6075031525851198\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.33      0.77      0.82      0.46      0.79      0.62       984\n",
+      "          2       0.90      0.67      0.82      0.77      0.74      0.55      6622\n",
+      "          3       0.32      0.30      0.88      0.31      0.52      0.25      1438\n",
+      "          4       0.16      0.29      0.92      0.20      0.52      0.25       472\n",
+      "\n",
+      "avg / total       0.71      0.61      0.84      0.64      0.70      0.50      9516\n",
+      "\n",
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 2:\n",
+      "Accuracy: 0.5763976460697772\n",
+      "f-score: 0.5763976460697772\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.29      0.82      0.77      0.43      0.80      0.63       984\n",
+      "          2       0.89      0.65      0.82      0.75      0.73      0.52      6622\n",
+      "          3       0.26      0.21      0.90      0.23      0.43      0.17      1438\n",
+      "          4       0.12      0.21      0.92      0.15      0.43      0.18       472\n",
+      "\n",
+      "avg / total       0.70      0.58      0.83      0.61      0.68      0.46      9516\n",
+      "\n",
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 3:\n",
+      "Accuracy: 0.5468684321143338\n",
+      "f-score: 0.5468684321143338\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.30      0.83      0.78      0.45      0.81      0.65       984\n",
+      "          2       0.87      0.61      0.79      0.71      0.69      0.47      6622\n",
+      "          3       0.21      0.18      0.88      0.19      0.40      0.15      1438\n",
+      "          4       0.11      0.24      0.90      0.15      0.46      0.20       472\n",
+      "\n",
+      "avg / total       0.67      0.55      0.81      0.58      0.65      0.43      9516\n",
+      "\n",
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 4:\n",
+      "Accuracy: 0.5755569567044977\n",
+      "f-score: 0.5755569567044977\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.31      0.78      0.80      0.44      0.79      0.62       984\n",
+      "          2       0.87      0.66      0.77      0.75      0.71      0.50      6622\n",
+      "          3       0.23      0.14      0.91      0.18      0.36      0.12      1438\n",
+      "          4       0.10      0.23      0.90      0.14      0.45      0.19       472\n",
+      "\n",
+      "avg / total       0.67      0.58      0.80      0.60      0.66      0.44      9516\n",
+      "\n",
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 5:\n",
+      "Accuracy: 0.6144388398486759\n",
+      "f-score: 0.6144388398486759\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.35      0.82      0.83      0.49      0.82      0.67       984\n",
+      "          2       0.88      0.70      0.79      0.78      0.74      0.55      6622\n",
+      "          3       0.26      0.19      0.90      0.22      0.41      0.16      1438\n",
+      "          4       0.13      0.25      0.91      0.17      0.47      0.21       472\n",
+      "\n",
+      "avg / total       0.70      0.61      0.82      0.64      0.69      0.49      9516\n",
+      "\n",
+      "[('1', 8852), ('2', 59596), ('3', 12940), ('4', 4244)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 6:\n",
+      "Accuracy: 0.6281000420344682\n",
+      "f-score: 0.6281000420344682\n",
+      "[('1', 984), ('2', 6622), ('3', 1438), ('4', 472)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.34      0.79      0.83      0.48      0.81      0.65       984\n",
+      "          2       0.88      0.73      0.78      0.80      0.76      0.57      6622\n",
+      "          3       0.24      0.18      0.90      0.21      0.40      0.15      1438\n",
+      "          4       0.15      0.21      0.94      0.17      0.45      0.18       472\n",
+      "\n",
+      "avg / total       0.69      0.63      0.81      0.65      0.69      0.49      9516\n",
+      "\n",
+      "[('1', 8853), ('2', 59596), ('3', 12940), ('4', 4245)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 7:\n",
+      "Accuracy: 0.6210847172587766\n",
+      "f-score: 0.6210847172587766\n",
+      "[('1', 983), ('2', 6622), ('3', 1438), ('4', 471)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.35      0.77      0.83      0.48      0.80      0.64       983\n",
+      "          2       0.89      0.69      0.81      0.78      0.75      0.56      6622\n",
+      "          3       0.31      0.31      0.88      0.31      0.52      0.26      1438\n",
+      "          4       0.15      0.24      0.93      0.18      0.48      0.21       471\n",
+      "\n",
+      "avg / total       0.71      0.62      0.83      0.65      0.71      0.50      9514\n",
+      "\n",
+      "[('1', 8853), ('2', 59596), ('3', 12940), ('4', 4245)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 8:\n",
+      "Accuracy: 0.6231868824889636\n",
+      "f-score: 0.6231868824889636\n",
+      "[('1', 983), ('2', 6622), ('3', 1438), ('4', 471)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.37      0.75      0.85      0.49      0.80      0.63       983\n",
+      "          2       0.88      0.70      0.79      0.78      0.74      0.55      6622\n",
+      "          3       0.30      0.29      0.88      0.30      0.51      0.24      1438\n",
+      "          4       0.16      0.29      0.92      0.21      0.52      0.25       471\n",
+      "\n",
+      "avg / total       0.71      0.62      0.81      0.65      0.70      0.49      9514\n",
+      "\n",
+      "[('1', 8853), ('2', 59597), ('3', 12941), ('4', 4245)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 9:\n",
+      "Accuracy: 0.6289949537426409\n",
+      "f-score: 0.6289949537426409\n",
+      "[('1', 983), ('2', 6621), ('3', 1437), ('4', 471)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.37      0.80      0.84      0.51      0.82      0.68       983\n",
+      "          2       0.89      0.72      0.80      0.80      0.76      0.57      6621\n",
+      "          3       0.29      0.22      0.90      0.25      0.45      0.19      1437\n",
+      "          4       0.10      0.20      0.91      0.14      0.43      0.17       471\n",
+      "\n",
+      "avg / total       0.71      0.63      0.82      0.65      0.70      0.50      9512\n",
+      "\n",
+      "[('1', 8853), ('2', 59597), ('3', 12941), ('4', 4245)]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:433: FutureWarning:\n",
+      "\n",
+      "Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
+      "\n",
+      "/anaconda3/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:460: FutureWarning:\n",
+      "\n",
+      "Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For fold 10:\n",
+      "Accuracy: 0.6135407905803196\n",
+      "f-score: 0.6135407905803196\n",
+      "[('1', 983), ('2', 6621), ('3', 1437), ('4', 471)]\n",
+      "                   pre       rec       spe        f1       geo       iba       sup\n",
+      "\n",
+      "          1       0.32      0.82      0.80      0.46      0.81      0.66       983\n",
+      "          2       0.88      0.70      0.78      0.78      0.74      0.54      6621\n",
+      "          3       0.34      0.17      0.94      0.23      0.40      0.15      1437\n",
+      "          4       0.13      0.26      0.91      0.17      0.49      0.22       471\n",
+      "\n",
+      "avg / total       0.70      0.61      0.81      0.63      0.68      0.48      9512\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "<Figure size 576x396 with 0 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import KFold\n",
+    "from sklearn import preprocessing\n",
+    "from imblearn.over_sampling import SMOTE\n",
+    "from imblearn.over_sampling import SMOTENC\n",
+    "from sklearn.metrics import f1_score\n",
+    "from imblearn.metrics import classification_report_imbalanced\n",
+    "from fancyimpute import IterativeImputer\n",
+    "from yellowbrick.classifier import ROCAUC\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from hyperopt import hp, tpe\n",
+    "from hyperopt.fmin import fmin\n",
+    "from sklearn.model_selection import cross_val_score, StratifiedKFold\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.metrics import make_scorer\n",
+    "import xgboost as xgb\n",
+    "import lightgbm as lgbm\n",
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "from collections import Counter\n",
+    "import io \n",
+    "\n",
+    "classes=['Death','Home','Nursing Home','Rehabilitation']\n",
+    "\n",
+    "\n",
+    "kf = StratifiedKFold(n_splits=10)\n",
+    "y = y.astype(str)\n",
+    "\n",
+    "for fold, (train_index, test_index) in enumerate(kf.split(X,y), 1):\n",
+    "    X_train = X.iloc[train_index]\n",
+    "    y_train = y.iloc[train_index]  # Based on your code, you might need a ravel call here, but I would look into how you're generating your y\n",
+    "    X_test = X.iloc[test_index]\n",
+    "    y_test = y.iloc[test_index]  # See comment on ravel and  y_train\n",
+    "    \n",
+    "    \n",
+    "#------------------------------IMPUTE Training Set------------------------------------\n",
+    "    \n",
+    "    # Use MICE to fill in each row's missing features\n",
+    "    X_train = pd.DataFrame(IterativeImputer(verbose=False, sample_posterior=True).fit_transform(X_train))\n",
+    "    X_train.columns = df_cols\n",
+    "\n",
+    "#------------------------------IMPUTE Testing Set------------------------------------ \n",
+    "\n",
+    "    # Use MICE to fill in each row's missing features\n",
+    "    X_test = pd.DataFrame(IterativeImputer(verbose=False, sample_posterior=True).fit_transform(X_test))\n",
+    "    X_test.columns = df_cols\n",
+    "    \n",
+    "#------------------------------Standardize Testing Set------------------------------------\n",
+    "    \n",
+    "    std_scale = preprocessing.StandardScaler().fit(X_train[cols_to_norm])\n",
+    "    X_train[cols_to_norm] = std_scale.transform(X_train[cols_to_norm])\n",
+    "    X_test[cols_to_norm] = std_scale.transform(X_test[cols_to_norm])\n",
+    "#------------------------------------------------------------------------------------------\n",
+    "\n",
+    " # Hyperparameters are optimized using hyperopt\n",
+    "\n",
+    "    #sm = SMOTE()\n",
+    "    \n",
+    "    #sm = SMOTENC(random_state=50, categorical_features=[1,2,3,22,23,24,25,26,27,28,29,30,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61, 62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81])\n",
+    "    #X_train_oversampled, y_train_oversampled = sm.fit_sample(X_train, y_train)\n",
+    "    print(sorted(Counter(y_train).items()))\n",
+    "    \n",
+    "    model = linear_model.LogisticRegression(class_weight='balanced') \n",
+    "    clf = HierarchicalClassifier(\n",
+    "        base_estimator=model,\n",
+    "        class_hierarchy=class_hierarchy,\n",
+    "    )\n",
+    "    clf.fit(X_train, y_train)  \n",
+    "    y_pred = clf.predict(X_test)\n",
+    "    visualizer = ROCAUC(model, classes=classes)\n",
+    "    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer\n",
+    "    visualizer.score(X_test, y_test)  # Evaluate the model on the test data\n",
+    "    visualizer.poof(\"LR_Hierarchy_{}.pdf\".format(fold), clear_figure=True) \n",
+    "    print(f'For fold {fold}:')\n",
+    "    print(f'Accuracy: {clf.score(X_test, y_test)}')\n",
+    "    f1=f1_score(y_test, y_pred, average='micro')\n",
+    "    print(f'f-score: {f1}')\n",
+    "    print(sorted(Counter(y_test).items()))\n",
+    "    print(classification_report_imbalanced(y_test, y_pred))\n",
+    "    K= classification_report_imbalanced(y_test, y_pred)\n",
+    "    df = pd.read_fwf(io.StringIO(K))\n",
+    "    df.loc[\"1\":\"1\",\"pre\":\"sup\"].to_csv(\"LR-Hierarchy-D.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n",
+    "    df.loc[\"2\":\"2\",\"pre\":\"sup\"].to_csv(\"LR-Hierarchy-H.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n",
+    "    df.loc[\"3\":\"3\",\"pre\":\"sup\"].to_csv(\"LR-Hierarchy-N.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n",
+    "    df.loc[\"4\":\"4\",\"pre\":\"sup\"].to_csv(\"LR-Hierarchy-R.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n",
+    "    df.iloc[6:7,:].to_csv(\"LR-Hierarchy-avg.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n",
+    "\n",
+    "        #\n",
+    "    \n",
+    "    #\n",
+    "\n",
+    "    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}