--- a +++ b/No-adjustment/R2-XB-U.ipynb @@ -0,0 +1,2115 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " <script type=\"text/javascript\">\n", + " window.PlotlyConfig = {MathJaxConfig: 'local'};\n", + " if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n", + " if (typeof require !== 'undefined') {\n", + " require.undef(\"plotly\");\n", + " requirejs.config({\n", + " paths: {\n", + " 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n", + " }\n", + " });\n", + " require(['plotly'], function(Plotly) {\n", + " window._Plotly = Plotly;\n", + " });\n", + " }\n", + " </script>\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Import libraries\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import getpass\n", + "import pdvega\n", + "import plotly.graph_objs as go\n", + "\n", + "from plotly.offline import iplot, init_notebook_mode\n", + "import plotly.io as pio\n", + "from plotly.graph_objs import *\n", + "\n", + "# for configuring connection \n", + "from configobj import ConfigObj\n", + "import os\n", + "\n", + "%matplotlib inline\n", + "\n", + "\n", + "import os\n", + "\n", + "\n", + "from sklearn import linear_model\n", + "from sklearn import metrics\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#configure the notebook for use in offline mode\n", + "init_notebook_mode(connected=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>hospitalid</th>\n", + " <th>sodium</th>\n", + " <th>electivesurgery</th>\n", + " <th>vent</th>\n", + " <th>dialysis</th>\n", + " <th>gcs</th>\n", + " <th>urine</th>\n", + " <th>wbc</th>\n", + " <th>temperature</th>\n", + " <th>...</th>\n", + " <th>m11_True</th>\n", + " <th>m12_True</th>\n", + " <th>m13_True</th>\n", + " <th>m14_True</th>\n", + " <th>m15_True</th>\n", + " <th>m16_True</th>\n", + " <th>m17_True</th>\n", + " <th>m18_True</th>\n", + " <th>m19_True</th>\n", + " <th>m20_True</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>59.0</td>\n", + " <td>139.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>14.7</td>\n", + " <td>36.1</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>73.0</td>\n", + " <td>134.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>13.0</td>\n", + " <td>-1.0</td>\n", + " <td>14.1</td>\n", + " <td>39.3</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>73.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>8.0</td>\n", + " <td>34.8</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>63.0</td>\n", + " <td>137.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>10.9</td>\n", + " <td>36.6</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>63.0</td>\n", + " <td>135.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>5.9</td>\n", + " <td>35.0</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 85 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 hospitalid sodium electivesurgery vent dialysis gcs \\\n", + "0 0 59.0 139.0 -1.0 0.0 0.0 15.0 \n", + "1 1 73.0 134.0 -1.0 0.0 0.0 13.0 \n", + "2 2 73.0 -1.0 1.0 1.0 0.0 15.0 \n", + "3 3 63.0 137.0 -1.0 0.0 0.0 15.0 \n", + "4 4 63.0 135.0 -1.0 0.0 0.0 15.0 \n", + "\n", + " urine wbc temperature ... m11_True m12_True m13_True m14_True \\\n", + "0 -1.0 14.7 36.1 ... 1 0 0 1 \n", + "1 -1.0 14.1 39.3 ... 1 0 0 1 \n", + "2 -1.0 8.0 34.8 ... 0 0 1 0 \n", + "3 -1.0 10.9 36.6 ... 1 0 1 1 \n", + "4 -1.0 5.9 35.0 ... 0 0 1 0 \n", + "\n", + " m15_True m16_True m17_True m18_True m19_True m20_True \n", + "0 1 0 0 0 1 0 \n", + "1 1 0 0 0 1 0 \n", + "2 0 1 0 1 0 0 \n", + "3 1 0 0 1 1 0 \n", + "4 0 0 0 1 0 0 \n", + "\n", + "[5 rows x 85 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= pd.read_csv(\"analysis.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(95148, 85)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "del df['hospitalid']\n", + "\n", + "df = df.drop(df.columns[[0]], axis=1)\n", + "df = df.drop(df.columns[[63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82]], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sodium 18244\n", + "electivesurgery 74997\n", + "vent 0\n", + "dialysis 0\n", + "gcs 1728\n", + "urine 45829\n", + "wbc 22141\n", + "temperature 4139\n", + "respiratoryrate 582\n", + "heartrate 188\n", + "meanbp 263\n", + "creatinine 18332\n", + "ph 73474\n", + "hematocrit 20021\n", + "albumin 58143\n", + "pao2 73474\n", + "pco2 73474\n", + "bun 18774\n", + "glucose 10909\n", + "bilirubin 60797\n", + "fio2 73474\n", + "age 3356\n", + "thrombolytics 0\n", + "aids 0\n", + "hepaticfailure 0\n", + "lymphoma 0\n", + "metastaticcancer 0\n", + "leukemia 0\n", + "immunosuppression 0\n", + "cirrhosis 0\n", + " ... \n", + "admitsource_1.0 0\n", + "admitsource_2.0 0\n", + "admitsource_3.0 0\n", + "admitsource_4.0 0\n", + "admitsource_5.0 0\n", + "admitsource_6.0 0\n", + "admitsource_7.0 0\n", + "admitsource_8.0 0\n", + "diaggroup_ARF 0\n", + "diaggroup_Asthma-Emphys 0\n", + "diaggroup_CABG 0\n", + "diaggroup_CHF 0\n", + "diaggroup_CVA 0\n", + "diaggroup_CVOther 0\n", + "diaggroup_CardiacArrest 0\n", + "diaggroup_ChestPainUnknown 0\n", + "diaggroup_Coma 0\n", + "diaggroup_DKA 0\n", + "diaggroup_GIBleed 0\n", + "diaggroup_GIObstruction 0\n", + "diaggroup_Neuro 0\n", + "diaggroup_Other 0\n", + "diaggroup_Overdose 0\n", + "diaggroup_PNA 0\n", + "diaggroup_RespMedOther 0\n", + "diaggroup_Sepsis 0\n", + "diaggroup_Trauma 0\n", + "diaggroup_ValveDz 0\n", + "gender_Male 0\n", + "gender_Other 0\n", + "Length: 63, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values_count = df.isnull().sum()\n", + "#df.replace('-1.0', np.nan)\n", + "df = df.replace({-1.0:np.nan, -1.0:np.nan})\n", + "df.head()\n", + "missing_values_count = df.isnull().sum()\n", + "missing_values_count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We moved all the pre-processing including splitting>imputation>Standardization to the CV iterations**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "cols_to_norm=['gcs', 'urine', 'wbc', 'sodium',\n", + " 'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n", + " 'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n", + " 'bilirubin', 'fio2', 'age', 'offset']\n", + "\n", + "X=df.drop('destcopy', 1)\n", + "y=df['destcopy']\n", + "df_cols = list(X) #fancy impute removes column names." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sodium', 'electivesurgery', 'vent', 'dialysis', 'gcs', 'urine', 'wbc',\n", + " 'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n", + " 'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n", + " 'bilirubin', 'fio2', 'age', 'thrombolytics', 'aids', 'hepaticfailure',\n", + " 'lymphoma', 'metastaticcancer', 'leukemia', 'immunosuppression',\n", + " 'cirrhosis', 'readmit', 'offset', 'destcopy', 'admitsource_1.0',\n", + " 'admitsource_2.0', 'admitsource_3.0', 'admitsource_4.0',\n", + " 'admitsource_5.0', 'admitsource_6.0', 'admitsource_7.0',\n", + " 'admitsource_8.0', 'diaggroup_ARF', 'diaggroup_Asthma-Emphys',\n", + " 'diaggroup_CABG', 'diaggroup_CHF', 'diaggroup_CVA', 'diaggroup_CVOther',\n", + " 'diaggroup_CardiacArrest', 'diaggroup_ChestPainUnknown',\n", + " 'diaggroup_Coma', 'diaggroup_DKA', 'diaggroup_GIBleed',\n", + " 'diaggroup_GIObstruction', 'diaggroup_Neuro', 'diaggroup_Other',\n", + " 'diaggroup_Overdose', 'diaggroup_PNA', 'diaggroup_RespMedOther',\n", + " 'diaggroup_Sepsis', 'diaggroup_Trauma', 'diaggroup_ValveDz',\n", + " 'gender_Male', 'gender_Other'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**XGB**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 1:\n", + "Accuracy: 0.7585916973200211\n", + "f-score: 0.7585916973200211\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.54 0.48 0.97 0.51 0.68 0.45 642\n", + " 2 0.79 0.96 0.37 0.87 0.59 0.37 6776\n", + " 3 0.58 0.24 0.96 0.34 0.48 0.21 1716\n", + " 4 0.31 0.01 1.00 0.02 0.10 0.01 381\n", + "\n", + "avg / total 0.72 0.76 0.54 0.71 0.56 0.33 9515\n", + "\n", + "[[ 310 239 89 4]\n", + " [ 102 6493 176 5]\n", + " [ 137 1168 411 0]\n", + " [ 21 325 31 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 2:\n", + "Accuracy: 0.7777193904361535\n", + "f-score: 0.7777193904361535\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.60 0.49 0.96 0.54 0.69 0.45 974\n", + " 2 0.87 0.90 0.48 0.88 0.66 0.45 7520\n", + " 3 0.20 0.27 0.92 0.23 0.49 0.23 697\n", + " 4 0.09 0.01 1.00 0.01 0.08 0.01 324\n", + "\n", + "avg / total 0.76 0.78 0.58 0.77 0.63 0.42 9515\n", + "\n", + "[[ 477 347 149 1]\n", + " [ 199 6735 567 19]\n", + " [ 99 412 186 0]\n", + " [ 17 272 33 2]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 3:\n", + "Accuracy: 0.7480819758276406\n", + "f-score: 0.7480819758276406\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.56 0.96 0.61 0.73 0.52 1247\n", + " 2 0.80 0.92 0.49 0.86 0.67 0.47 6585\n", + " 3 0.40 0.24 0.93 0.30 0.47 0.21 1462\n", + " 4 0.08 0.00 1.00 0.01 0.07 0.00 221\n", + "\n", + "avg / total 0.70 0.75 0.63 0.72 0.63 0.42 9515\n", + "\n", + "[[ 701 380 162 4]\n", + " [ 194 6066 317 8]\n", + " [ 143 969 350 0]\n", + " [ 16 158 46 1]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 4:\n", + "Accuracy: 0.7147661586967945\n", + "f-score: 0.7147661586967945\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.51 0.97 0.58 0.70 0.47 1129\n", + " 2 0.74 0.95 0.35 0.83 0.57 0.35 6348\n", + " 3 0.35 0.15 0.96 0.21 0.38 0.14 1285\n", + " 4 0.36 0.01 1.00 0.01 0.07 0.00 753\n", + "\n", + "avg / total 0.65 0.71 0.56 0.66 0.52 0.31 9515\n", + "\n", + "[[ 573 433 122 1]\n", + " [ 118 6026 202 2]\n", + " [ 124 959 198 4]\n", + " [ 38 672 39 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 5:\n", + "Accuracy: 0.7389385181292696\n", + "f-score: 0.7389385181292696\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.64 0.50 0.96 0.56 0.69 0.46 1085\n", + " 2 0.77 0.95 0.41 0.85 0.63 0.41 6437\n", + " 3 0.50 0.21 0.96 0.29 0.45 0.18 1657\n", + " 4 0.14 0.01 1.00 0.02 0.09 0.01 336\n", + "\n", + "avg / total 0.69 0.74 0.59 0.69 0.58 0.36 9515\n", + "\n", + "[[ 538 391 148 8]\n", + " [ 123 6145 164 5]\n", + " [ 154 1152 345 6]\n", + " [ 26 272 35 3]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 6:\n", + "Accuracy: 0.7128744088281661\n", + "f-score: 0.7128744088281661\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.56 0.50 0.96 0.53 0.70 0.46 785\n", + " 2 0.74 0.96 0.36 0.84 0.59 0.37 6224\n", + " 3 0.57 0.19 0.96 0.29 0.43 0.17 2026\n", + " 4 0.26 0.02 1.00 0.03 0.13 0.01 480\n", + "\n", + "avg / total 0.66 0.71 0.57 0.65 0.54 0.32 9515\n", + "\n", + "[[ 395 284 100 6]\n", + " [ 86 5994 137 7]\n", + " [ 193 1437 386 10]\n", + " [ 35 382 55 8]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 7:\n", + "Accuracy: 0.7487125591171834\n", + "f-score: 0.7487125591171834\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.63 0.45 0.97 0.53 0.67 0.42 853\n", + " 2 0.79 0.95 0.42 0.86 0.63 0.42 6684\n", + " 3 0.44 0.33 0.94 0.37 0.55 0.29 1209\n", + " 4 0.64 0.01 1.00 0.02 0.10 0.01 769\n", + "\n", + "avg / total 0.72 0.75 0.58 0.70 0.58 0.37 9515\n", + "\n", + "[[ 388 305 159 1]\n", + " [ 114 6336 231 3]\n", + " [ 68 748 393 0]\n", + " [ 45 600 117 7]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 8:\n", + "Accuracy: 0.7407251707829743\n", + "f-score: 0.7407251707829743\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.65 0.42 0.98 0.51 0.64 0.39 933\n", + " 2 0.78 0.95 0.38 0.85 0.60 0.38 6645\n", + " 3 0.46 0.22 0.94 0.30 0.46 0.19 1675\n", + " 4 0.13 0.02 1.00 0.03 0.12 0.01 262\n", + "\n", + "avg / total 0.69 0.74 0.55 0.70 0.56 0.33 9515\n", + "\n", + "[[ 395 364 168 6]\n", + " [ 107 6280 245 13]\n", + " [ 91 1208 369 7]\n", + " [ 12 219 27 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 9:\n", + "Accuracy: 0.7276644944292622\n", + "f-score: 0.7276644944292622\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.69 0.47 0.97 0.56 0.68 0.44 1031\n", + " 2 0.76 0.96 0.36 0.85 0.59 0.37 6399\n", + " 3 0.44 0.19 0.95 0.26 0.42 0.17 1495\n", + " 4 0.18 0.01 1.00 0.01 0.08 0.01 589\n", + "\n", + "avg / total 0.66 0.73 0.56 0.67 0.54 0.32 9514\n", + "\n", + "[[ 489 388 150 4]\n", + " [ 88 6150 156 5]\n", + " [ 99 1107 280 9]\n", + " [ 31 497 57 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 10:\n", + "Accuracy: 0.7419592179945343\n", + "f-score: 0.7419592179945343\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.66 0.53 0.96 0.59 0.71 0.49 1157\n", + " 2 0.78 0.95 0.41 0.86 0.62 0.40 6600\n", + " 3 0.34 0.17 0.95 0.23 0.40 0.15 1156\n", + " 4 0.14 0.01 1.00 0.01 0.08 0.01 601\n", + "\n", + "avg / total 0.67 0.74 0.58 0.69 0.57 0.36 9514\n", + "\n", + "[[ 614 393 143 7]\n", + " [ 144 6245 198 13]\n", + " [ 125 831 196 4]\n", + " [ 45 509 43 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 1:\n", + "Accuracy: 0.7585916973200211\n", + "f-score: 0.7585916973200211\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.54 0.48 0.97 0.51 0.68 0.45 642\n", + " 2 0.79 0.96 0.37 0.87 0.59 0.37 6776\n", + " 3 0.58 0.24 0.96 0.34 0.48 0.21 1716\n", + " 4 0.31 0.01 1.00 0.02 0.10 0.01 381\n", + "\n", + "avg / total 0.72 0.76 0.54 0.71 0.56 0.33 9515\n", + "\n", + "[[ 310 239 89 4]\n", + " [ 102 6493 176 5]\n", + " [ 137 1168 411 0]\n", + " [ 21 325 31 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 2:\n", + "Accuracy: 0.7777193904361535\n", + "f-score: 0.7777193904361535\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.60 0.49 0.96 0.54 0.69 0.45 974\n", + " 2 0.87 0.90 0.48 0.88 0.66 0.45 7520\n", + " 3 0.20 0.27 0.92 0.23 0.49 0.23 697\n", + " 4 0.09 0.01 1.00 0.01 0.08 0.01 324\n", + "\n", + "avg / total 0.76 0.78 0.58 0.77 0.63 0.42 9515\n", + "\n", + "[[ 477 347 149 1]\n", + " [ 199 6735 567 19]\n", + " [ 99 412 186 0]\n", + " [ 17 272 33 2]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 3:\n", + "Accuracy: 0.7480819758276406\n", + "f-score: 0.7480819758276406\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.56 0.96 0.61 0.73 0.52 1247\n", + " 2 0.80 0.92 0.49 0.86 0.67 0.47 6585\n", + " 3 0.40 0.24 0.93 0.30 0.47 0.21 1462\n", + " 4 0.08 0.00 1.00 0.01 0.07 0.00 221\n", + "\n", + "avg / total 0.70 0.75 0.63 0.72 0.63 0.42 9515\n", + "\n", + "[[ 701 380 162 4]\n", + " [ 194 6066 317 8]\n", + " [ 143 969 350 0]\n", + " [ 16 158 46 1]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 4:\n", + "Accuracy: 0.7147661586967945\n", + "f-score: 0.7147661586967945\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.51 0.97 0.58 0.70 0.47 1129\n", + " 2 0.74 0.95 0.35 0.83 0.57 0.35 6348\n", + " 3 0.35 0.15 0.96 0.21 0.38 0.14 1285\n", + " 4 0.36 0.01 1.00 0.01 0.07 0.00 753\n", + "\n", + "avg / total 0.65 0.71 0.56 0.66 0.52 0.31 9515\n", + "\n", + "[[ 573 433 122 1]\n", + " [ 118 6026 202 2]\n", + " [ 124 959 198 4]\n", + " [ 38 672 39 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 5:\n", + "Accuracy: 0.7389385181292696\n", + "f-score: 0.7389385181292696\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.64 0.50 0.96 0.56 0.69 0.46 1085\n", + " 2 0.77 0.95 0.41 0.85 0.63 0.41 6437\n", + " 3 0.50 0.21 0.96 0.29 0.45 0.18 1657\n", + " 4 0.14 0.01 1.00 0.02 0.09 0.01 336\n", + "\n", + "avg / total 0.69 0.74 0.59 0.69 0.58 0.36 9515\n", + "\n", + "[[ 538 391 148 8]\n", + " [ 123 6145 164 5]\n", + " [ 154 1152 345 6]\n", + " [ 26 272 35 3]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 6:\n", + "Accuracy: 0.7128744088281661\n", + "f-score: 0.7128744088281661\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.56 0.50 0.96 0.53 0.70 0.46 785\n", + " 2 0.74 0.96 0.36 0.84 0.59 0.37 6224\n", + " 3 0.57 0.19 0.96 0.29 0.43 0.17 2026\n", + " 4 0.26 0.02 1.00 0.03 0.13 0.01 480\n", + "\n", + "avg / total 0.66 0.71 0.57 0.65 0.54 0.32 9515\n", + "\n", + "[[ 395 284 100 6]\n", + " [ 86 5994 137 7]\n", + " [ 193 1437 386 10]\n", + " [ 35 382 55 8]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 7:\n", + "Accuracy: 0.7487125591171834\n", + "f-score: 0.7487125591171834\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.63 0.45 0.97 0.53 0.67 0.42 853\n", + " 2 0.79 0.95 0.42 0.86 0.63 0.42 6684\n", + " 3 0.44 0.33 0.94 0.37 0.55 0.29 1209\n", + " 4 0.64 0.01 1.00 0.02 0.10 0.01 769\n", + "\n", + "avg / total 0.72 0.75 0.58 0.70 0.58 0.37 9515\n", + "\n", + "[[ 388 305 159 1]\n", + " [ 114 6336 231 3]\n", + " [ 68 748 393 0]\n", + " [ 45 600 117 7]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 8:\n", + "Accuracy: 0.7407251707829743\n", + "f-score: 0.7407251707829743\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.65 0.42 0.98 0.51 0.64 0.39 933\n", + " 2 0.78 0.95 0.38 0.85 0.60 0.38 6645\n", + " 3 0.46 0.22 0.94 0.30 0.46 0.19 1675\n", + " 4 0.13 0.02 1.00 0.03 0.12 0.01 262\n", + "\n", + "avg / total 0.69 0.74 0.55 0.70 0.56 0.33 9515\n", + "\n", + "[[ 395 364 168 6]\n", + " [ 107 6280 245 13]\n", + " [ 91 1208 369 7]\n", + " [ 12 219 27 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 9:\n", + "Accuracy: 0.7276644944292622\n", + "f-score: 0.7276644944292622\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.69 0.47 0.97 0.56 0.68 0.44 1031\n", + " 2 0.76 0.96 0.36 0.85 0.59 0.37 6399\n", + " 3 0.44 0.19 0.95 0.26 0.42 0.17 1495\n", + " 4 0.18 0.01 1.00 0.01 0.08 0.01 589\n", + "\n", + "avg / total 0.66 0.73 0.56 0.67 0.54 0.32 9514\n", + "\n", + "[[ 489 388 150 4]\n", + " [ 88 6150 156 5]\n", + " [ 99 1107 280 9]\n", + " [ 31 497 57 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 10:\n", + "Accuracy: 0.7419592179945343\n", + "f-score: 0.7419592179945343\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.66 0.53 0.96 0.59 0.71 0.49 1157\n", + " 2 0.78 0.95 0.41 0.86 0.62 0.40 6600\n", + " 3 0.34 0.17 0.95 0.23 0.40 0.15 1156\n", + " 4 0.14 0.01 1.00 0.01 0.08 0.01 601\n", + "\n", + "avg / total 0.67 0.74 0.58 0.69 0.57 0.36 9514\n", + "\n", + "[[ 614 393 143 7]\n", + " [ 144 6245 198 13]\n", + " [ 125 831 196 4]\n", + " [ 45 509 43 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 1:\n", + "Accuracy: 0.7585916973200211\n", + "f-score: 0.7585916973200211\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.54 0.48 0.97 0.51 0.68 0.45 642\n", + " 2 0.79 0.96 0.37 0.87 0.59 0.37 6776\n", + " 3 0.58 0.24 0.96 0.34 0.48 0.21 1716\n", + " 4 0.31 0.01 1.00 0.02 0.10 0.01 381\n", + "\n", + "avg / total 0.72 0.76 0.54 0.71 0.56 0.33 9515\n", + "\n", + "[[ 310 239 89 4]\n", + " [ 102 6493 176 5]\n", + " [ 137 1168 411 0]\n", + " [ 21 325 31 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 2:\n", + "Accuracy: 0.7777193904361535\n", + "f-score: 0.7777193904361535\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.60 0.49 0.96 0.54 0.69 0.45 974\n", + " 2 0.87 0.90 0.48 0.88 0.66 0.45 7520\n", + " 3 0.20 0.27 0.92 0.23 0.49 0.23 697\n", + " 4 0.09 0.01 1.00 0.01 0.08 0.01 324\n", + "\n", + "avg / total 0.76 0.78 0.58 0.77 0.63 0.42 9515\n", + "\n", + "[[ 477 347 149 1]\n", + " [ 199 6735 567 19]\n", + " [ 99 412 186 0]\n", + " [ 17 272 33 2]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 3:\n", + "Accuracy: 0.7480819758276406\n", + "f-score: 0.7480819758276406\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.56 0.96 0.61 0.73 0.52 1247\n", + " 2 0.80 0.92 0.49 0.86 0.67 0.47 6585\n", + " 3 0.40 0.24 0.93 0.30 0.47 0.21 1462\n", + " 4 0.08 0.00 1.00 0.01 0.07 0.00 221\n", + "\n", + "avg / total 0.70 0.75 0.63 0.72 0.63 0.42 9515\n", + "\n", + "[[ 701 380 162 4]\n", + " [ 194 6066 317 8]\n", + " [ 143 969 350 0]\n", + " [ 16 158 46 1]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 4:\n", + "Accuracy: 0.7147661586967945\n", + "f-score: 0.7147661586967945\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.67 0.51 0.97 0.58 0.70 0.47 1129\n", + " 2 0.74 0.95 0.35 0.83 0.57 0.35 6348\n", + " 3 0.35 0.15 0.96 0.21 0.38 0.14 1285\n", + " 4 0.36 0.01 1.00 0.01 0.07 0.00 753\n", + "\n", + "avg / total 0.65 0.71 0.56 0.66 0.52 0.31 9515\n", + "\n", + "[[ 573 433 122 1]\n", + " [ 118 6026 202 2]\n", + " [ 124 959 198 4]\n", + " [ 38 672 39 4]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For fold 5:\n", + "Accuracy: 0.7389385181292696\n", + "f-score: 0.7389385181292696\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.64 0.50 0.96 0.56 0.69 0.46 1085\n", + " 2 0.77 0.95 0.41 0.85 0.63 0.41 6437\n", + " 3 0.50 0.21 0.96 0.29 0.45 0.18 1657\n", + " 4 0.14 0.01 1.00 0.02 0.09 0.01 336\n", + "\n", + "avg / total 0.69 0.74 0.59 0.69 0.58 0.36 9515\n", + "\n", + "[[ 538 391 148 8]\n", + " [ 123 6145 164 5]\n", + " [ 154 1152 345 6]\n", + " [ 26 272 35 3]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:42: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:43: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n", + "/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning:\n", + "\n", + "\n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + "\n" + ] + } + ], + "source": [ + "%matplotlib inline\n", + "from sklearn.model_selection import KFold\n", + "from sklearn import preprocessing\n", + "from imblearn.over_sampling import SMOTE\n", + "from sklearn.metrics import f1_score\n", + "from imblearn.metrics import classification_report_imbalanced\n", + "# explicitly require this experimental feature\n", + "\n", + "from yellowbrick.classifier import ROCAUC\n", + "from sklearn.linear_model import LogisticRegression\n", + "from numpy import loadtxt\n", + "import os\n", + "os.environ['KMP_DUPLICATE_LIB_OK']='True'\n", + "from xgboost import XGBClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score\n", + "import io \n", + "\n", + "\n", + "classes=['Death','Home','Nursing Home','Rehabilitation']\n", + "\n", + "\n", + "kf = KFold(n_splits=10)\n", + "\n", + "\n", + "for i in range (1,11):\n", + "\n", + " for fold, (train_index, test_index) in enumerate(kf.split(X), 1):\n", + " X_train = X.iloc[train_index]\n", + " y_train = y.iloc[train_index] # Based on your code, you might need a ravel call here, but I would look into how you're generating your y\n", + " X_test = X.iloc[test_index]\n", + " y_test = y.iloc[test_index] # See comment on ravel and y_train\n", + "\n", + "\n", + " #------------------------------IMPUTE Training Set------------------------------------\n", + "\n", + " \n", + "\n", + " #------------------------------Standardize Testing Set------------------------------------\n", + "\n", + " std_scale = preprocessing.StandardScaler().fit(X_train[cols_to_norm])\n", + " X_train[cols_to_norm] = std_scale.transform(X_train[cols_to_norm])\n", + " X_test[cols_to_norm] = std_scale.transform(X_test[cols_to_norm])\n", + " #------------------------------------------------------------------------------------------\n", + "\n", + " # Hyperparameters are optimized using hyperopt\n", + "\n", + " #sm = SMOTE()\n", + " #X_train_oversampled, y_train_oversampled = sm.fit_sample(X_train, y_train)\n", + " model = XGBClassifier(max_depth=8, gamma=0.063, colsample_bytree=0.71)\n", + " model.fit(X_train, y_train) \n", + " y_pred = model.predict(X_test)\n", + " visualizer = ROCAUC(model, classes=classes)\n", + " visualizer.fit(X_train, y_train) # Fit the training data to the visualizer\n", + " visualizer.score(X_test, y_test) # Evaluate the model on the test data\n", + " visualizer.poof(\"XGB_Unbalanced_{}_{}.pdf\".format(i, fold), clear_figure=True) \n", + " print(f'For fold {fold}:')\n", + " print(f'Accuracy: {model.score(X_test, y_test)}')\n", + " f1=f1_score(y_test, y_pred, average='micro')\n", + " print(f'f-score: {f1}')\n", + " print(classification_report_imbalanced(y_test, y_pred))\n", + " K= classification_report_imbalanced(y_test, y_pred)\n", + " df = pd.read_fwf(io.StringIO(K))\n", + " df.loc[\"1\":\"1\",\"pre\":\"sup\"].to_csv(\"XGB-U-D.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"2\":\"2\",\"pre\":\"sup\"].to_csv(\"XGB-U-H.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"3\":\"3\",\"pre\":\"sup\"].to_csv(\"XGB-U-N.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"4\":\"4\",\"pre\":\"sup\"].to_csv(\"XGB-U-R.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.iloc[6:7,:].to_csv(\"XGB-U-avg.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " from sklearn.metrics import confusion_matrix\n", + " print(confusion_matrix(y_test, y_pred))\n", + "\n", + " #\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}