--- a +++ b/SMOTE-NC/R2-XGB-SMOTENC.ipynb @@ -0,0 +1,1911 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>" + ], + "text/vnd.plotly.v1+html": [ + "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Import libraries\n", + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import getpass\n", + "import pdvega\n", + "import plotly.graph_objs as go\n", + "\n", + "from plotly.offline import iplot, init_notebook_mode\n", + "import plotly.io as pio\n", + "from plotly.graph_objs import *\n", + "\n", + "# for configuring connection \n", + "from configobj import ConfigObj\n", + "import os\n", + "\n", + "%matplotlib inline\n", + "\n", + "\n", + "import os\n", + "\n", + "\n", + "from sklearn import linear_model\n", + "from sklearn import metrics\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "#configure the notebook for use in offline mode\n", + "init_notebook_mode(connected=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Unnamed: 0</th>\n", + " <th>hospitalid</th>\n", + " <th>sodium</th>\n", + " <th>electivesurgery</th>\n", + " <th>vent</th>\n", + " <th>dialysis</th>\n", + " <th>gcs</th>\n", + " <th>urine</th>\n", + " <th>wbc</th>\n", + " <th>temperature</th>\n", + " <th>...</th>\n", + " <th>m11_True</th>\n", + " <th>m12_True</th>\n", + " <th>m13_True</th>\n", + " <th>m14_True</th>\n", + " <th>m15_True</th>\n", + " <th>m16_True</th>\n", + " <th>m17_True</th>\n", + " <th>m18_True</th>\n", + " <th>m19_True</th>\n", + " <th>m20_True</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>59.0</td>\n", + " <td>139.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>14.7</td>\n", + " <td>36.1</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>73.0</td>\n", + " <td>134.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>13.0</td>\n", + " <td>-1.0</td>\n", + " <td>14.1</td>\n", + " <td>39.3</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>73.0</td>\n", + " <td>-1.0</td>\n", + " <td>1.0</td>\n", + " <td>1.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>8.0</td>\n", + " <td>34.8</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>63.0</td>\n", + " <td>137.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>10.9</td>\n", + " <td>36.6</td>\n", + " <td>...</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>63.0</td>\n", + " <td>135.0</td>\n", + " <td>-1.0</td>\n", + " <td>0.0</td>\n", + " <td>0.0</td>\n", + " <td>15.0</td>\n", + " <td>-1.0</td>\n", + " <td>5.9</td>\n", + " <td>35.0</td>\n", + " <td>...</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 85 columns</p>\n", + "</div>" + ], + "text/plain": [ + " Unnamed: 0 hospitalid sodium electivesurgery vent dialysis gcs \\\n", + "0 0 59.0 139.0 -1.0 0.0 0.0 15.0 \n", + "1 1 73.0 134.0 -1.0 0.0 0.0 13.0 \n", + "2 2 73.0 -1.0 1.0 1.0 0.0 15.0 \n", + "3 3 63.0 137.0 -1.0 0.0 0.0 15.0 \n", + "4 4 63.0 135.0 -1.0 0.0 0.0 15.0 \n", + "\n", + " urine wbc temperature ... m11_True m12_True m13_True m14_True \\\n", + "0 -1.0 14.7 36.1 ... 1 0 0 1 \n", + "1 -1.0 14.1 39.3 ... 1 0 0 1 \n", + "2 -1.0 8.0 34.8 ... 0 0 1 0 \n", + "3 -1.0 10.9 36.6 ... 1 0 1 1 \n", + "4 -1.0 5.9 35.0 ... 0 0 1 0 \n", + "\n", + " m15_True m16_True m17_True m18_True m19_True m20_True \n", + "0 1 0 0 0 1 0 \n", + "1 1 0 0 0 1 0 \n", + "2 0 1 0 1 0 0 \n", + "3 1 0 0 1 1 0 \n", + "4 0 0 0 1 0 0 \n", + "\n", + "[5 rows x 85 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df= pd.read_csv(\"analysis.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(95148, 85)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "del df['hospitalid']\n", + "\n", + "df = df.drop(df.columns[[0]], axis=1)\n", + "df = df.drop(df.columns[[63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82]], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "sodium 18244\n", + "electivesurgery 74997\n", + "vent 0\n", + "dialysis 0\n", + "gcs 1728\n", + "urine 45829\n", + "wbc 22141\n", + "temperature 4139\n", + "respiratoryrate 582\n", + "heartrate 188\n", + "meanbp 263\n", + "creatinine 18332\n", + "ph 73474\n", + "hematocrit 20021\n", + "albumin 58143\n", + "pao2 73474\n", + "pco2 73474\n", + "bun 18774\n", + "glucose 10909\n", + "bilirubin 60797\n", + "fio2 73474\n", + "age 3356\n", + "thrombolytics 0\n", + "aids 0\n", + "hepaticfailure 0\n", + "lymphoma 0\n", + "metastaticcancer 0\n", + "leukemia 0\n", + "immunosuppression 0\n", + "cirrhosis 0\n", + " ... \n", + "admitsource_1.0 0\n", + "admitsource_2.0 0\n", + "admitsource_3.0 0\n", + "admitsource_4.0 0\n", + "admitsource_5.0 0\n", + "admitsource_6.0 0\n", + "admitsource_7.0 0\n", + "admitsource_8.0 0\n", + "diaggroup_ARF 0\n", + "diaggroup_Asthma-Emphys 0\n", + "diaggroup_CABG 0\n", + "diaggroup_CHF 0\n", + "diaggroup_CVA 0\n", + "diaggroup_CVOther 0\n", + "diaggroup_CardiacArrest 0\n", + "diaggroup_ChestPainUnknown 0\n", + "diaggroup_Coma 0\n", + "diaggroup_DKA 0\n", + "diaggroup_GIBleed 0\n", + "diaggroup_GIObstruction 0\n", + "diaggroup_Neuro 0\n", + "diaggroup_Other 0\n", + "diaggroup_Overdose 0\n", + "diaggroup_PNA 0\n", + "diaggroup_RespMedOther 0\n", + "diaggroup_Sepsis 0\n", + "diaggroup_Trauma 0\n", + "diaggroup_ValveDz 0\n", + "gender_Male 0\n", + "gender_Other 0\n", + "Length: 63, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "missing_values_count = df.isnull().sum()\n", + "#df.replace('-1.0', np.nan)\n", + "df = df.replace({-1.0:np.nan, -1.0:np.nan})\n", + "df.head()\n", + "missing_values_count = df.isnull().sum()\n", + "missing_values_count" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We moved all the pre-processing including splitting>imputation>Standardization to the CV iterations**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "cols_to_norm=['gcs', 'urine', 'wbc', 'sodium',\n", + " 'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n", + " 'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n", + " 'bilirubin', 'fio2', 'age', 'offset']\n", + "\n", + "X=df.drop('destcopy', 1)\n", + "y=df['destcopy']\n", + "df_cols = list(X) #fancy impute removes column names." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['sodium', 'electivesurgery', 'vent', 'dialysis', 'gcs', 'urine', 'wbc',\n", + " 'temperature', 'respiratoryrate', 'heartrate', 'meanbp', 'creatinine',\n", + " 'ph', 'hematocrit', 'albumin', 'pao2', 'pco2', 'bun', 'glucose',\n", + " 'bilirubin', 'fio2', 'age', 'thrombolytics', 'aids', 'hepaticfailure',\n", + " 'lymphoma', 'metastaticcancer', 'leukemia', 'immunosuppression',\n", + " 'cirrhosis', 'readmit', 'offset', 'destcopy', 'admitsource_1.0',\n", + " 'admitsource_2.0', 'admitsource_3.0', 'admitsource_4.0',\n", + " 'admitsource_5.0', 'admitsource_6.0', 'admitsource_7.0',\n", + " 'admitsource_8.0', 'diaggroup_ARF', 'diaggroup_Asthma-Emphys',\n", + " 'diaggroup_CABG', 'diaggroup_CHF', 'diaggroup_CVA', 'diaggroup_CVOther',\n", + " 'diaggroup_CardiacArrest', 'diaggroup_ChestPainUnknown',\n", + " 'diaggroup_Coma', 'diaggroup_DKA', 'diaggroup_GIBleed',\n", + " 'diaggroup_GIObstruction', 'diaggroup_Neuro', 'diaggroup_Other',\n", + " 'diaggroup_Overdose', 'diaggroup_PNA', 'diaggroup_RespMedOther',\n", + " 'diaggroup_Sepsis', 'diaggroup_Trauma', 'diaggroup_ValveDz',\n", + " 'gender_Male', 'gender_Other'],\n", + " dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**XGB**" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\llois\\Anaconda\\lib\\site-packages\\sklearn\\externals\\six.py:31: DeprecationWarning:\n", + "\n", + "The module is deprecated in version 0.21 and will be removed in version 0.23 since we've dropped support for Python 2.7. Please rely on the official version of six (https://pypi.org/project/six/).\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7169732002101944\n", + "f-score: 0.7169732002101944\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.39 0.48 0.95 0.43 0.68 0.44 642\n", + " 2 0.79 0.90 0.41 0.84 0.61 0.39 6776\n", + " 3 0.46 0.20 0.95 0.28 0.44 0.18 1716\n", + " 4 0.15 0.10 0.98 0.12 0.31 0.09 381\n", + "\n", + "avg / total 0.68 0.72 0.57 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7043615344193379\n", + "f-score: 0.7043615344193379\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.51 0.93 0.49 0.69 0.46 974\n", + " 2 0.87 0.79 0.57 0.83 0.67 0.46 7520\n", + " 3 0.16 0.30 0.87 0.20 0.51 0.24 697\n", + " 4 0.10 0.10 0.97 0.10 0.30 0.08 324\n", + "\n", + "avg / total 0.75 0.70 0.64 0.73 0.65 0.43 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7086705202312139\n", + "f-score: 0.7086705202312139\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.59 0.93 0.58 0.74 0.53 1247\n", + " 2 0.81 0.86 0.54 0.83 0.68 0.48 6585\n", + " 3 0.33 0.23 0.91 0.27 0.46 0.20 1462\n", + " 4 0.10 0.10 0.98 0.10 0.31 0.08 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6671571203363111\n", + "f-score: 0.6671571203363111\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.58 0.92 0.52 0.73 0.51 1129\n", + " 2 0.78 0.84 0.51 0.81 0.66 0.45 6348\n", + " 3 0.30 0.19 0.93 0.23 0.42 0.16 1285\n", + " 4 0.22 0.13 0.96 0.17 0.36 0.12 753\n", + "\n", + "avg / total 0.63 0.67 0.65 0.65 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6906988964792433\n", + "f-score: 0.6906988964792433\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.53 0.55 0.94 0.54 0.72 0.49 1085\n", + " 2 0.78 0.87 0.50 0.82 0.66 0.45 6437\n", + " 3 0.40 0.19 0.94 0.26 0.43 0.17 1657\n", + " 4 0.12 0.16 0.96 0.14 0.39 0.14 336\n", + "\n", + "avg / total 0.66 0.69 0.64 0.67 0.61 0.39 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6855491329479769\n", + "f-score: 0.6855491329479769\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.55 0.93 0.47 0.71 0.49 785\n", + " 2 0.76 0.90 0.46 0.82 0.64 0.43 6224\n", + " 3 0.50 0.22 0.94 0.31 0.46 0.19 2026\n", + " 4 0.19 0.08 0.98 0.11 0.28 0.07 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.65 0.59 0.37 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7142406726221755\n", + "f-score: 0.7142406726221755\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.50 0.51 0.95 0.51 0.70 0.47 853\n", + " 2 0.80 0.90 0.46 0.84 0.64 0.43 6684\n", + " 3 0.35 0.26 0.93 0.29 0.49 0.22 1209\n", + " 4 0.26 0.08 0.98 0.12 0.28 0.07 769\n", + "\n", + "avg / total 0.67 0.71 0.61 0.69 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7141355754072517\n", + "f-score: 0.7141355754072517\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.55 0.46 0.96 0.50 0.66 0.42 933\n", + " 2 0.78 0.91 0.41 0.84 0.61 0.39 6645\n", + " 3 0.39 0.20 0.93 0.26 0.43 0.17 1675\n", + " 4 0.10 0.06 0.98 0.08 0.25 0.06 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6835190245953332\n", + "f-score: 0.6835190245953332\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.54 0.94 0.53 0.71 0.49 1031\n", + " 2 0.78 0.87 0.50 0.82 0.66 0.46 6399\n", + " 3 0.35 0.20 0.93 0.25 0.43 0.17 1495\n", + " 4 0.16 0.13 0.96 0.14 0.35 0.11 589\n", + "\n", + "avg / total 0.65 0.68 0.65 0.66 0.61 0.39 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6930838763926844\n", + "f-score: 0.6930838763926844\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.61 0.92 0.56 0.75 0.54 1157\n", + " 2 0.81 0.85 0.56 0.83 0.69 0.48 6600\n", + " 3 0.30 0.22 0.93 0.25 0.45 0.19 1156\n", + " 4 0.12 0.09 0.96 0.11 0.30 0.08 601\n", + "\n", + "avg / total 0.67 0.69 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7202312138728324\n", + "f-score: 0.7202312138728324\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.49 0.95 0.45 0.68 0.44 642\n", + " 2 0.79 0.91 0.41 0.84 0.61 0.39 6776\n", + " 3 0.45 0.21 0.94 0.29 0.44 0.18 1716\n", + " 4 0.18 0.11 0.98 0.14 0.33 0.10 381\n", + "\n", + "avg / total 0.68 0.72 0.57 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7106673673147662\n", + "f-score: 0.7106673673147662\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.54 0.93 0.50 0.71 0.48 974\n", + " 2 0.88 0.80 0.58 0.84 0.68 0.48 7520\n", + " 3 0.17 0.31 0.88 0.22 0.52 0.26 697\n", + " 4 0.09 0.08 0.97 0.09 0.28 0.07 324\n", + "\n", + "avg / total 0.76 0.71 0.66 0.73 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7089858118759853\n", + "f-score: 0.7089858118759853\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.58 0.93 0.57 0.73 0.52 1247\n", + " 2 0.81 0.86 0.55 0.83 0.68 0.48 6585\n", + " 3 0.34 0.25 0.91 0.29 0.48 0.21 1462\n", + " 4 0.06 0.06 0.98 0.06 0.24 0.05 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.44 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6689437729900157\n", + "f-score: 0.6689437729900157\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.57 0.91 0.52 0.72 0.51 1129\n", + " 2 0.78 0.84 0.52 0.81 0.67 0.46 6348\n", + " 3 0.31 0.20 0.93 0.24 0.43 0.17 1285\n", + " 4 0.23 0.13 0.96 0.16 0.35 0.11 753\n", + "\n", + "avg / total 0.64 0.67 0.66 0.65 0.62 0.40 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6909090909090909\n", + "f-score: 0.6909090909090909\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.54 0.56 0.94 0.55 0.73 0.51 1085\n", + " 2 0.79 0.87 0.51 0.83 0.66 0.46 6437\n", + " 3 0.41 0.20 0.94 0.27 0.43 0.17 1657\n", + " 4 0.08 0.11 0.95 0.09 0.32 0.09 336\n", + "\n", + "avg / total 0.67 0.69 0.65 0.67 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6831318970047294\n", + "f-score: 0.6831318970047294\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.53 0.93 0.47 0.70 0.47 785\n", + " 2 0.76 0.90 0.45 0.82 0.64 0.43 6224\n", + " 3 0.50 0.21 0.94 0.30 0.45 0.19 2026\n", + " 4 0.15 0.07 0.98 0.10 0.26 0.06 480\n", + "\n", + "avg / total 0.64 0.68 0.62 0.65 0.59 0.36 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7130846032580137\n", + "f-score: 0.7130846032580137\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.51 0.95 0.50 0.69 0.46 853\n", + " 2 0.80 0.89 0.47 0.84 0.65 0.43 6684\n", + " 3 0.36 0.28 0.93 0.31 0.51 0.24 1209\n", + " 4 0.29 0.08 0.98 0.13 0.29 0.07 769\n", + "\n", + "avg / total 0.67 0.71 0.61 0.69 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7142406726221755\n", + "f-score: 0.7142406726221755\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.54 0.47 0.96 0.50 0.67 0.43 933\n", + " 2 0.78 0.91 0.41 0.84 0.61 0.39 6645\n", + " 3 0.40 0.20 0.94 0.26 0.43 0.17 1675\n", + " 4 0.08 0.05 0.98 0.06 0.23 0.05 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6776329619508094\n", + "f-score: 0.6776329619508094\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.54 0.93 0.51 0.71 0.48 1031\n", + " 2 0.78 0.86 0.52 0.82 0.67 0.46 6399\n", + " 3 0.35 0.21 0.93 0.26 0.44 0.18 1495\n", + " 4 0.15 0.12 0.95 0.13 0.34 0.10 589\n", + "\n", + "avg / total 0.65 0.68 0.65 0.66 0.61 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6961320159764558\n", + "f-score: 0.6961320159764558\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.61 0.92 0.56 0.75 0.55 1157\n", + " 2 0.81 0.85 0.56 0.83 0.69 0.49 6600\n", + " 3 0.29 0.22 0.93 0.25 0.45 0.19 1156\n", + " 4 0.16 0.09 0.97 0.12 0.30 0.08 601\n", + "\n", + "avg / total 0.67 0.70 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7162375197057278\n", + "f-score: 0.7162375197057278\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.41 0.50 0.95 0.45 0.69 0.45 642\n", + " 2 0.79 0.91 0.40 0.84 0.60 0.38 6776\n", + " 3 0.45 0.18 0.95 0.26 0.42 0.16 1716\n", + " 4 0.16 0.11 0.98 0.13 0.32 0.10 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.68 0.56 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7158171308460326\n", + "f-score: 0.7158171308460325\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.55 0.93 0.51 0.71 0.49 974\n", + " 2 0.88 0.80 0.59 0.84 0.69 0.48 7520\n", + " 3 0.18 0.32 0.88 0.23 0.53 0.27 697\n", + " 4 0.07 0.07 0.97 0.07 0.26 0.06 324\n", + "\n", + "avg / total 0.76 0.72 0.66 0.74 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7233841303205465\n", + "f-score: 0.7233841303205465\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.59 0.58 0.94 0.58 0.74 0.53 1247\n", + " 2 0.81 0.88 0.53 0.84 0.68 0.48 6585\n", + " 3 0.36 0.23 0.92 0.28 0.46 0.20 1462\n", + " 4 0.11 0.06 0.99 0.08 0.25 0.06 221\n", + "\n", + "avg / total 0.69 0.72 0.65 0.70 0.65 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.670940620073568\n", + "f-score: 0.670940620073568\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.50 0.60 0.92 0.54 0.74 0.54 1129\n", + " 2 0.78 0.84 0.52 0.81 0.66 0.45 6348\n", + " 3 0.31 0.20 0.93 0.24 0.43 0.17 1285\n", + " 4 0.21 0.12 0.96 0.16 0.34 0.11 753\n", + "\n", + "avg / total 0.64 0.67 0.66 0.65 0.61 0.40 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6832369942196532\n", + "f-score: 0.6832369942196532\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.56 0.93 0.53 0.72 0.50 1085\n", + " 2 0.79 0.86 0.51 0.82 0.66 0.46 6437\n", + " 3 0.37 0.19 0.93 0.25 0.42 0.16 1657\n", + " 4 0.10 0.13 0.96 0.11 0.35 0.11 336\n", + "\n", + "avg / total 0.66 0.68 0.65 0.66 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6956384655806621\n", + "f-score: 0.6956384655806621\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.44 0.54 0.94 0.49 0.71 0.49 785\n", + " 2 0.76 0.91 0.46 0.83 0.65 0.44 6224\n", + " 3 0.55 0.25 0.94 0.34 0.48 0.22 2026\n", + " 4 0.15 0.07 0.98 0.09 0.26 0.06 480\n", + "\n", + "avg / total 0.66 0.70 0.63 0.66 0.60 0.38 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7121387283236994\n", + "f-score: 0.7121387283236994\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.52 0.95 0.51 0.70 0.47 853\n", + " 2 0.79 0.89 0.45 0.84 0.64 0.42 6684\n", + " 3 0.33 0.24 0.93 0.28 0.47 0.21 1209\n", + " 4 0.32 0.09 0.98 0.14 0.30 0.08 769\n", + "\n", + "avg / total 0.67 0.71 0.60 0.68 0.59 0.37 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7149763531266421\n", + "f-score: 0.714976353126642\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.55 0.47 0.96 0.51 0.67 0.43 933\n", + " 2 0.78 0.91 0.40 0.84 0.60 0.38 6645\n", + " 3 0.41 0.19 0.94 0.26 0.43 0.17 1675\n", + " 4 0.06 0.04 0.98 0.05 0.20 0.04 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6824679419802396\n", + "f-score: 0.6824679419802396\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.55 0.94 0.53 0.72 0.49 1031\n", + " 2 0.78 0.87 0.51 0.82 0.66 0.46 6399\n", + " 3 0.33 0.21 0.92 0.26 0.44 0.18 1495\n", + " 4 0.18 0.13 0.96 0.15 0.35 0.11 589\n", + "\n", + "avg / total 0.65 0.68 0.65 0.66 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6994954803447551\n", + "f-score: 0.6994954803447551\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.61 0.92 0.55 0.75 0.54 1157\n", + " 2 0.81 0.85 0.55 0.83 0.69 0.49 6600\n", + " 3 0.31 0.22 0.93 0.26 0.45 0.19 1156\n", + " 4 0.15 0.09 0.97 0.11 0.29 0.08 601\n", + "\n", + "avg / total 0.67 0.70 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.72044140830268\n", + "f-score: 0.72044140830268\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.49 0.95 0.45 0.68 0.44 642\n", + " 2 0.79 0.91 0.41 0.84 0.61 0.39 6776\n", + " 3 0.47 0.21 0.95 0.29 0.45 0.19 1716\n", + " 4 0.16 0.10 0.98 0.12 0.31 0.09 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7086705202312139\n", + "f-score: 0.7086705202312139\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.54 0.93 0.51 0.71 0.48 974\n", + " 2 0.88 0.80 0.58 0.84 0.68 0.48 7520\n", + " 3 0.16 0.29 0.88 0.20 0.50 0.24 697\n", + " 4 0.09 0.09 0.97 0.09 0.29 0.08 324\n", + "\n", + "avg / total 0.76 0.71 0.65 0.73 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7070940620073568\n", + "f-score: 0.7070940620073568\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.56 0.58 0.93 0.57 0.73 0.52 1247\n", + " 2 0.81 0.86 0.54 0.83 0.68 0.48 6585\n", + " 3 0.32 0.22 0.92 0.26 0.45 0.19 1462\n", + " 4 0.08 0.08 0.98 0.08 0.27 0.07 221\n", + "\n", + "avg / total 0.68 0.71 0.66 0.69 0.64 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6684182869153967\n", + "f-score: 0.6684182869153967\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.59 0.91 0.53 0.73 0.52 1129\n", + " 2 0.78 0.85 0.52 0.81 0.66 0.45 6348\n", + " 3 0.27 0.18 0.93 0.21 0.40 0.15 1285\n", + " 4 0.22 0.12 0.97 0.15 0.34 0.10 753\n", + "\n", + "avg / total 0.63 0.67 0.65 0.65 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6949027850761955\n", + "f-score: 0.6949027850761955\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.54 0.93 0.53 0.71 0.49 1085\n", + " 2 0.79 0.88 0.50 0.83 0.66 0.46 6437\n", + " 3 0.42 0.20 0.94 0.27 0.44 0.18 1657\n", + " 4 0.11 0.13 0.96 0.12 0.35 0.12 336\n", + "\n", + "avg / total 0.67 0.69 0.64 0.67 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6911192853389385\n", + "f-score: 0.6911192853389385\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.44 0.55 0.94 0.49 0.72 0.50 785\n", + " 2 0.76 0.91 0.47 0.83 0.65 0.44 6224\n", + " 3 0.51 0.23 0.94 0.32 0.47 0.20 2026\n", + " 4 0.16 0.07 0.98 0.10 0.27 0.07 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.66 0.60 0.38 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7078297425118234\n", + "f-score: 0.7078297425118234\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.50 0.95 0.49 0.69 0.45 853\n", + " 2 0.80 0.89 0.47 0.84 0.64 0.43 6684\n", + " 3 0.33 0.27 0.92 0.30 0.50 0.23 1209\n", + " 4 0.26 0.07 0.98 0.12 0.27 0.07 769\n", + "\n", + "avg / total 0.67 0.71 0.61 0.68 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7168681029952706\n", + "f-score: 0.7168681029952706\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.56 0.45 0.96 0.50 0.66 0.41 933\n", + " 2 0.78 0.91 0.41 0.84 0.61 0.39 6645\n", + " 3 0.40 0.21 0.93 0.28 0.44 0.18 1675\n", + " 4 0.11 0.07 0.98 0.09 0.27 0.06 262\n", + "\n", + "avg / total 0.68 0.72 0.57 0.69 0.58 0.35 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6847803237334454\n", + "f-score: 0.6847803237334454\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.50 0.54 0.93 0.52 0.71 0.49 1031\n", + " 2 0.79 0.87 0.52 0.83 0.67 0.47 6399\n", + " 3 0.35 0.23 0.92 0.27 0.46 0.19 1495\n", + " 4 0.19 0.12 0.97 0.15 0.34 0.11 589\n", + "\n", + "avg / total 0.65 0.68 0.66 0.66 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6996005886062645\n", + "f-score: 0.6996005886062645\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.61 0.92 0.56 0.75 0.54 1157\n", + " 2 0.82 0.85 0.56 0.83 0.69 0.49 6600\n", + " 3 0.30 0.22 0.93 0.26 0.46 0.19 1156\n", + " 4 0.14 0.08 0.97 0.10 0.28 0.07 601\n", + "\n", + "avg / total 0.67 0.70 0.68 0.68 0.64 0.44 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7178139779295849\n", + "f-score: 0.7178139779295849\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.51 0.95 0.46 0.69 0.46 642\n", + " 2 0.79 0.90 0.41 0.84 0.61 0.39 6776\n", + " 3 0.45 0.21 0.95 0.28 0.44 0.18 1716\n", + " 4 0.14 0.08 0.98 0.10 0.29 0.07 381\n", + "\n", + "avg / total 0.68 0.72 0.57 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7165528113504992\n", + "f-score: 0.7165528113504992\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.53 0.94 0.50 0.70 0.47 974\n", + " 2 0.88 0.80 0.58 0.84 0.68 0.48 7520\n", + " 3 0.18 0.32 0.88 0.23 0.53 0.27 697\n", + " 4 0.10 0.09 0.97 0.10 0.30 0.08 324\n", + "\n", + "avg / total 0.76 0.72 0.65 0.74 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7057277982133473\n", + "f-score: 0.7057277982133473\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.56 0.59 0.93 0.57 0.74 0.53 1247\n", + " 2 0.81 0.85 0.54 0.83 0.68 0.48 6585\n", + " 3 0.33 0.23 0.92 0.27 0.46 0.20 1462\n", + " 4 0.10 0.10 0.98 0.10 0.30 0.08 221\n", + "\n", + "avg / total 0.68 0.71 0.66 0.69 0.64 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6705202312138728\n", + "f-score: 0.6705202312138728\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.58 0.91 0.53 0.73 0.51 1129\n", + " 2 0.78 0.85 0.52 0.81 0.66 0.45 6348\n", + " 3 0.32 0.19 0.94 0.24 0.42 0.16 1285\n", + " 4 0.20 0.12 0.96 0.15 0.35 0.11 753\n", + "\n", + "avg / total 0.64 0.67 0.66 0.65 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6850236468733578\n", + "f-score: 0.6850236468733578\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.53 0.55 0.94 0.54 0.72 0.50 1085\n", + " 2 0.78 0.86 0.51 0.82 0.66 0.45 6437\n", + " 3 0.41 0.22 0.93 0.28 0.45 0.19 1657\n", + " 4 0.09 0.12 0.95 0.10 0.35 0.11 336\n", + "\n", + "avg / total 0.67 0.69 0.65 0.67 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6862848134524435\n", + "f-score: 0.6862848134524435\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.52 0.93 0.46 0.70 0.47 785\n", + " 2 0.76 0.90 0.46 0.82 0.64 0.43 6224\n", + " 3 0.52 0.23 0.94 0.32 0.46 0.20 2026\n", + " 4 0.18 0.09 0.98 0.12 0.30 0.08 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.65 0.59 0.37 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7088807146610615\n", + "f-score: 0.7088807146610615\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.50 0.95 0.49 0.69 0.45 853\n", + " 2 0.80 0.89 0.46 0.84 0.64 0.43 6684\n", + " 3 0.33 0.26 0.93 0.29 0.49 0.22 1209\n", + " 4 0.27 0.09 0.98 0.13 0.29 0.08 769\n", + "\n", + "avg / total 0.67 0.71 0.61 0.68 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7135049921177089\n", + "f-score: 0.713504992117709\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.45 0.96 0.50 0.66 0.41 933\n", + " 2 0.78 0.91 0.40 0.84 0.61 0.39 6645\n", + " 3 0.38 0.19 0.93 0.25 0.42 0.16 1675\n", + " 4 0.10 0.07 0.98 0.09 0.27 0.06 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6846752154719361\n", + "f-score: 0.6846752154719361\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.53 0.94 0.52 0.70 0.48 1031\n", + " 2 0.78 0.87 0.51 0.83 0.66 0.46 6399\n", + " 3 0.35 0.21 0.93 0.27 0.44 0.18 1495\n", + " 4 0.18 0.13 0.96 0.15 0.35 0.11 589\n", + "\n", + "avg / total 0.65 0.68 0.65 0.66 0.61 0.39 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6931889846541939\n", + "f-score: 0.6931889846541939\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.50 0.61 0.91 0.55 0.75 0.54 1157\n", + " 2 0.81 0.85 0.55 0.83 0.69 0.48 6600\n", + " 3 0.28 0.20 0.93 0.23 0.43 0.17 1156\n", + " 4 0.14 0.09 0.96 0.11 0.29 0.08 601\n", + "\n", + "avg / total 0.67 0.69 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7158171308460326\n", + "f-score: 0.7158171308460325\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.40 0.48 0.95 0.44 0.68 0.44 642\n", + " 2 0.79 0.90 0.40 0.84 0.60 0.38 6776\n", + " 3 0.46 0.21 0.95 0.29 0.44 0.18 1716\n", + " 4 0.12 0.07 0.98 0.09 0.27 0.07 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7128744088281661\n", + "f-score: 0.7128744088281661\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.54 0.93 0.50 0.71 0.48 974\n", + " 2 0.88 0.80 0.58 0.84 0.68 0.48 7520\n", + " 3 0.17 0.32 0.88 0.22 0.53 0.26 697\n", + " 4 0.08 0.07 0.97 0.07 0.26 0.06 324\n", + "\n", + "avg / total 0.76 0.71 0.65 0.73 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.708039936941671\n", + "f-score: 0.708039936941671\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.58 0.93 0.58 0.74 0.52 1247\n", + " 2 0.81 0.86 0.54 0.83 0.68 0.48 6585\n", + " 3 0.33 0.23 0.91 0.27 0.46 0.19 1462\n", + " 4 0.08 0.09 0.98 0.08 0.29 0.08 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6690488702049395\n", + "f-score: 0.6690488702049395\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.57 0.92 0.52 0.72 0.51 1129\n", + " 2 0.78 0.84 0.52 0.81 0.66 0.45 6348\n", + " 3 0.32 0.21 0.93 0.25 0.44 0.18 1285\n", + " 4 0.21 0.12 0.96 0.15 0.34 0.11 753\n", + "\n", + "avg / total 0.64 0.67 0.66 0.65 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6901734104046243\n", + "f-score: 0.6901734104046243\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.55 0.93 0.53 0.72 0.50 1085\n", + " 2 0.78 0.87 0.50 0.83 0.66 0.45 6437\n", + " 3 0.41 0.19 0.94 0.26 0.42 0.16 1657\n", + " 4 0.11 0.15 0.96 0.13 0.37 0.13 336\n", + "\n", + "avg / total 0.66 0.69 0.64 0.67 0.61 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6906988964792433\n", + "f-score: 0.6906988964792433\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.44 0.55 0.94 0.49 0.72 0.50 785\n", + " 2 0.76 0.91 0.46 0.83 0.64 0.43 6224\n", + " 3 0.51 0.22 0.94 0.31 0.46 0.20 2026\n", + " 4 0.20 0.09 0.98 0.12 0.29 0.08 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.65 0.59 0.37 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7046768260641093\n", + "f-score: 0.7046768260641093\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.46 0.49 0.94 0.48 0.68 0.44 853\n", + " 2 0.80 0.88 0.48 0.84 0.65 0.44 6684\n", + " 3 0.33 0.26 0.92 0.29 0.49 0.23 1209\n", + " 4 0.29 0.10 0.98 0.15 0.32 0.09 769\n", + "\n", + "avg / total 0.67 0.70 0.62 0.68 0.61 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.711823436678928\n", + "f-score: 0.711823436678928\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.55 0.46 0.96 0.50 0.67 0.42 933\n", + " 2 0.78 0.91 0.41 0.84 0.61 0.39 6645\n", + " 3 0.37 0.19 0.93 0.25 0.42 0.16 1675\n", + " 4 0.07 0.05 0.98 0.06 0.22 0.04 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6871978137481606\n", + "f-score: 0.6871978137481606\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.55 0.94 0.53 0.71 0.49 1031\n", + " 2 0.79 0.87 0.51 0.83 0.67 0.46 6399\n", + " 3 0.35 0.22 0.93 0.27 0.45 0.19 1495\n", + " 4 0.19 0.13 0.96 0.15 0.35 0.11 589\n", + "\n", + "avg / total 0.65 0.69 0.65 0.66 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6958166911919277\n", + "f-score: 0.6958166911919277\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.60 0.93 0.56 0.74 0.53 1157\n", + " 2 0.81 0.85 0.56 0.83 0.69 0.49 6600\n", + " 3 0.29 0.23 0.92 0.26 0.46 0.20 1156\n", + " 4 0.13 0.08 0.96 0.10 0.27 0.07 601\n", + "\n", + "avg / total 0.67 0.70 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7193904361534419\n", + "f-score: 0.7193904361534419\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.41 0.47 0.95 0.44 0.67 0.43 642\n", + " 2 0.79 0.91 0.40 0.85 0.60 0.38 6776\n", + " 3 0.45 0.20 0.95 0.28 0.44 0.18 1716\n", + " 4 0.16 0.09 0.98 0.12 0.30 0.08 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7104571728849185\n", + "f-score: 0.7104571728849185\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.54 0.93 0.51 0.71 0.49 974\n", + " 2 0.88 0.80 0.59 0.84 0.68 0.48 7520\n", + " 3 0.17 0.32 0.87 0.22 0.53 0.26 697\n", + " 4 0.08 0.07 0.97 0.07 0.26 0.06 324\n", + "\n", + "avg / total 0.76 0.71 0.66 0.73 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7130846032580137\n", + "f-score: 0.7130846032580137\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.60 0.93 0.59 0.75 0.54 1247\n", + " 2 0.81 0.87 0.54 0.84 0.68 0.48 6585\n", + " 3 0.33 0.22 0.92 0.27 0.45 0.19 1462\n", + " 4 0.10 0.09 0.98 0.10 0.30 0.08 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6694692590646348\n", + "f-score: 0.6694692590646348\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.60 0.92 0.54 0.74 0.53 1129\n", + " 2 0.78 0.84 0.52 0.81 0.66 0.45 6348\n", + " 3 0.30 0.20 0.93 0.24 0.43 0.17 1285\n", + " 4 0.20 0.10 0.96 0.14 0.32 0.09 753\n", + "\n", + "avg / total 0.63 0.67 0.66 0.65 0.61 0.40 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6890173410404624\n", + "f-score: 0.6890173410404624\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.57 0.93 0.54 0.73 0.51 1085\n", + " 2 0.79 0.87 0.50 0.82 0.66 0.45 6437\n", + " 3 0.41 0.19 0.94 0.26 0.42 0.16 1657\n", + " 4 0.10 0.14 0.96 0.12 0.36 0.12 336\n", + "\n", + "avg / total 0.66 0.69 0.64 0.67 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6895428271150814\n", + "f-score: 0.6895428271150814\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.55 0.93 0.48 0.72 0.50 785\n", + " 2 0.76 0.91 0.47 0.83 0.65 0.44 6224\n", + " 3 0.52 0.22 0.95 0.31 0.45 0.19 2026\n", + " 4 0.19 0.10 0.98 0.13 0.31 0.09 480\n", + "\n", + "avg / total 0.66 0.69 0.63 0.65 0.60 0.38 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.6986862848134524\n", + "f-score: 0.6986862848134524\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.46 0.51 0.94 0.48 0.69 0.46 853\n", + " 2 0.80 0.87 0.48 0.83 0.65 0.44 6684\n", + " 3 0.32 0.26 0.92 0.29 0.49 0.23 1209\n", + " 4 0.23 0.09 0.97 0.13 0.30 0.08 769\n", + "\n", + "avg / total 0.66 0.70 0.62 0.68 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7104571728849185\n", + "f-score: 0.7104571728849185\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.55 0.47 0.96 0.51 0.67 0.43 933\n", + " 2 0.78 0.90 0.41 0.83 0.61 0.38 6645\n", + " 3 0.39 0.19 0.93 0.26 0.42 0.17 1675\n", + " 4 0.11 0.08 0.98 0.09 0.29 0.07 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6823628337187303\n", + "f-score: 0.6823628337187303\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.53 0.94 0.52 0.71 0.48 1031\n", + " 2 0.79 0.86 0.53 0.82 0.67 0.47 6399\n", + " 3 0.33 0.22 0.92 0.26 0.45 0.19 1495\n", + " 4 0.20 0.16 0.96 0.17 0.39 0.14 589\n", + "\n", + "avg / total 0.65 0.68 0.66 0.66 0.62 0.41 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6949758250998529\n", + "f-score: 0.6949758250998529\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.60 0.91 0.54 0.74 0.53 1157\n", + " 2 0.81 0.85 0.56 0.83 0.69 0.49 6600\n", + " 3 0.29 0.21 0.93 0.24 0.44 0.18 1156\n", + " 4 0.16 0.10 0.96 0.12 0.31 0.09 601\n", + "\n", + "avg / total 0.67 0.69 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7212821860220704\n", + "f-score: 0.7212821860220704\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.51 0.95 0.46 0.70 0.46 642\n", + " 2 0.79 0.91 0.41 0.85 0.61 0.39 6776\n", + " 3 0.47 0.19 0.95 0.27 0.43 0.17 1716\n", + " 4 0.17 0.10 0.98 0.13 0.32 0.09 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7012086179716237\n", + "f-score: 0.7012086179716237\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.53 0.93 0.50 0.70 0.47 974\n", + " 2 0.88 0.79 0.59 0.83 0.68 0.47 7520\n", + " 3 0.15 0.30 0.87 0.20 0.51 0.24 697\n", + " 4 0.06 0.06 0.97 0.06 0.24 0.05 324\n", + "\n", + "avg / total 0.76 0.70 0.66 0.72 0.66 0.44 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7096163951655281\n", + "f-score: 0.7096163951655281\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.58 0.58 0.94 0.58 0.74 0.53 1247\n", + " 2 0.81 0.86 0.54 0.83 0.68 0.48 6585\n", + " 3 0.32 0.23 0.91 0.27 0.46 0.20 1462\n", + " 4 0.09 0.07 0.98 0.08 0.27 0.06 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.44 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6687335785601681\n", + "f-score: 0.6687335785601681\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.59 0.91 0.53 0.73 0.52 1129\n", + " 2 0.78 0.85 0.52 0.81 0.66 0.45 6348\n", + " 3 0.30 0.18 0.93 0.23 0.41 0.16 1285\n", + " 4 0.20 0.12 0.96 0.15 0.35 0.11 753\n", + "\n", + "avg / total 0.63 0.67 0.66 0.65 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6735680504466631\n", + "f-score: 0.6735680504466631\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.49 0.55 0.93 0.52 0.71 0.49 1085\n", + " 2 0.79 0.84 0.53 0.81 0.67 0.46 6437\n", + " 3 0.38 0.22 0.92 0.28 0.45 0.19 1657\n", + " 4 0.10 0.15 0.95 0.12 0.38 0.13 336\n", + "\n", + "avg / total 0.66 0.67 0.66 0.66 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6888071466106148\n", + "f-score: 0.6888071466106148\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.53 0.93 0.47 0.71 0.48 785\n", + " 2 0.76 0.91 0.46 0.83 0.65 0.44 6224\n", + " 3 0.52 0.23 0.94 0.32 0.46 0.20 2026\n", + " 4 0.16 0.07 0.98 0.10 0.27 0.07 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.65 0.59 0.37 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7138202837624803\n", + "f-score: 0.7138202837624804\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.51 0.95 0.50 0.69 0.46 853\n", + " 2 0.80 0.90 0.46 0.84 0.64 0.43 6684\n", + " 3 0.35 0.26 0.93 0.29 0.49 0.22 1209\n", + " 4 0.29 0.08 0.98 0.13 0.29 0.08 769\n", + "\n", + "avg / total 0.67 0.71 0.61 0.69 0.60 0.38 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7114030478192328\n", + "f-score: 0.7114030478192328\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.55 0.45 0.96 0.50 0.66 0.41 933\n", + " 2 0.78 0.90 0.41 0.84 0.61 0.39 6645\n", + " 3 0.40 0.22 0.93 0.28 0.45 0.19 1675\n", + " 4 0.07 0.05 0.98 0.06 0.21 0.04 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.684359890687408\n", + "f-score: 0.684359890687408\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.50 0.53 0.94 0.52 0.71 0.48 1031\n", + " 2 0.79 0.87 0.52 0.83 0.67 0.46 6399\n", + " 3 0.36 0.23 0.92 0.28 0.46 0.19 1495\n", + " 4 0.18 0.13 0.96 0.15 0.35 0.11 589\n", + "\n", + "avg / total 0.65 0.68 0.65 0.66 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6882488963632541\n", + "f-score: 0.6882488963632541\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.62 0.92 0.56 0.75 0.55 1157\n", + " 2 0.81 0.83 0.57 0.82 0.69 0.49 6600\n", + " 3 0.30 0.22 0.93 0.26 0.45 0.19 1156\n", + " 4 0.15 0.13 0.95 0.14 0.36 0.12 601\n", + "\n", + "avg / total 0.67 0.69 0.68 0.68 0.65 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7191802417235943\n", + "f-score: 0.7191802417235943\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.41 0.52 0.95 0.46 0.70 0.47 642\n", + " 2 0.79 0.90 0.41 0.84 0.61 0.39 6776\n", + " 3 0.47 0.20 0.95 0.28 0.43 0.17 1716\n", + " 4 0.17 0.11 0.98 0.13 0.32 0.10 381\n", + "\n", + "avg / total 0.68 0.72 0.57 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7133998949027851\n", + "f-score: 0.7133998949027851\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.54 0.93 0.51 0.71 0.48 974\n", + " 2 0.88 0.80 0.58 0.84 0.69 0.48 7520\n", + " 3 0.16 0.29 0.88 0.21 0.51 0.24 697\n", + " 4 0.09 0.08 0.97 0.09 0.28 0.07 324\n", + "\n", + "avg / total 0.76 0.71 0.66 0.73 0.66 0.45 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7060430898581188\n", + "f-score: 0.7060430898581188\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.59 0.93 0.58 0.74 0.53 1247\n", + " 2 0.81 0.85 0.55 0.83 0.68 0.48 6585\n", + " 3 0.33 0.24 0.91 0.28 0.47 0.20 1462\n", + " 4 0.08 0.08 0.98 0.08 0.27 0.07 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.69 0.65 0.44 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6672622175512349\n", + "f-score: 0.6672622175512349\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.57 0.91 0.52 0.72 0.50 1129\n", + " 2 0.77 0.85 0.51 0.81 0.66 0.44 6348\n", + " 3 0.29 0.18 0.93 0.22 0.41 0.15 1285\n", + " 4 0.20 0.11 0.96 0.14 0.32 0.10 753\n", + "\n", + "avg / total 0.63 0.67 0.65 0.64 0.60 0.38 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6905937992643195\n", + "f-score: 0.6905937992643195\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.54 0.94 0.53 0.71 0.49 1085\n", + " 2 0.79 0.87 0.50 0.83 0.66 0.45 6437\n", + " 3 0.43 0.21 0.94 0.28 0.44 0.18 1657\n", + " 4 0.11 0.15 0.95 0.12 0.37 0.13 336\n", + "\n", + "avg / total 0.67 0.69 0.65 0.67 0.62 0.40 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.687651077246453\n", + "f-score: 0.687651077246453\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.42 0.53 0.93 0.47 0.71 0.48 785\n", + " 2 0.76 0.90 0.46 0.83 0.65 0.43 6224\n", + " 3 0.52 0.23 0.94 0.32 0.47 0.20 2026\n", + " 4 0.16 0.08 0.98 0.10 0.27 0.07 480\n", + "\n", + "avg / total 0.65 0.69 0.63 0.65 0.59 0.37 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7031003678402522\n", + "f-score: 0.7031003678402522\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.47 0.53 0.94 0.50 0.70 0.47 853\n", + " 2 0.80 0.87 0.49 0.84 0.66 0.45 6684\n", + " 3 0.33 0.27 0.92 0.29 0.49 0.23 1209\n", + " 4 0.28 0.11 0.97 0.16 0.33 0.10 769\n", + "\n", + "avg / total 0.67 0.70 0.63 0.68 0.61 0.39 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7104571728849185\n", + "f-score: 0.7104571728849185\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.53 0.45 0.96 0.49 0.66 0.41 933\n", + " 2 0.78 0.90 0.40 0.84 0.60 0.38 6645\n", + " 3 0.40 0.19 0.94 0.26 0.42 0.16 1675\n", + " 4 0.11 0.09 0.98 0.10 0.29 0.08 262\n", + "\n", + "avg / total 0.67 0.71 0.57 0.68 0.57 0.34 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6803657767500526\n", + "f-score: 0.6803657767500526\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.55 0.94 0.53 0.72 0.49 1031\n", + " 2 0.79 0.86 0.53 0.82 0.67 0.47 6399\n", + " 3 0.32 0.22 0.91 0.26 0.45 0.19 1495\n", + " 4 0.20 0.14 0.96 0.16 0.36 0.12 589\n", + "\n", + "avg / total 0.65 0.68 0.66 0.66 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6965524490224931\n", + "f-score: 0.6965524490224931\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.60 0.92 0.55 0.74 0.54 1157\n", + " 2 0.81 0.85 0.55 0.83 0.69 0.48 6600\n", + " 3 0.31 0.22 0.93 0.25 0.45 0.19 1156\n", + " 4 0.16 0.11 0.96 0.13 0.33 0.10 601\n", + "\n", + "avg / total 0.67 0.70 0.67 0.68 0.64 0.43 9514\n", + "\n", + "[(1, 59442), (2, 59442), (3, 59442), (4, 59442)]\n", + "For fold 1:\n", + "Accuracy: 0.7225433526011561\n", + "f-score: 0.7225433526011561\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.41 0.50 0.95 0.45 0.69 0.45 642\n", + " 2 0.79 0.91 0.41 0.85 0.61 0.39 6776\n", + " 3 0.47 0.19 0.95 0.27 0.43 0.17 1716\n", + " 4 0.19 0.12 0.98 0.14 0.34 0.10 381\n", + "\n", + "avg / total 0.68 0.72 0.56 0.69 0.57 0.34 9515\n", + "\n", + "[(1, 58698), (2, 58698), (3, 58698), (4, 58698)]\n", + "For fold 2:\n", + "Accuracy: 0.7017341040462428\n", + "f-score: 0.7017341040462428\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.46 0.52 0.93 0.49 0.70 0.47 974\n", + " 2 0.88 0.79 0.58 0.83 0.68 0.47 7520\n", + " 3 0.16 0.31 0.87 0.21 0.52 0.26 697\n", + " 4 0.06 0.06 0.97 0.06 0.23 0.05 324\n", + "\n", + "avg / total 0.75 0.70 0.65 0.72 0.65 0.44 9515\n", + "\n", + "[(1, 59633), (2, 59633), (3, 59633), (4, 59633)]\n", + "For fold 3:\n", + "Accuracy: 0.7066736731476616\n", + "f-score: 0.7066736731476616\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.58 0.58 0.94 0.58 0.73 0.52 1247\n", + " 2 0.81 0.86 0.54 0.83 0.68 0.48 6585\n", + " 3 0.33 0.24 0.91 0.28 0.47 0.20 1462\n", + " 4 0.10 0.10 0.98 0.10 0.31 0.09 221\n", + "\n", + "avg / total 0.69 0.71 0.66 0.70 0.65 0.43 9515\n", + "\n", + "[(1, 59870), (2, 59870), (3, 59870), (4, 59870)]\n", + "For fold 4:\n", + "Accuracy: 0.6660010509721492\n", + "f-score: 0.6660010509721492\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.48 0.59 0.91 0.53 0.74 0.53 1129\n", + " 2 0.78 0.84 0.52 0.81 0.66 0.45 6348\n", + " 3 0.29 0.18 0.93 0.22 0.41 0.15 1285\n", + " 4 0.18 0.11 0.96 0.14 0.32 0.10 753\n", + "\n", + "avg / total 0.63 0.67 0.66 0.64 0.61 0.39 9515\n", + "\n", + "[(1, 59781), (2, 59781), (3, 59781), (4, 59781)]\n", + "For fold 5:\n", + "Accuracy: 0.6910141881240147\n", + "f-score: 0.6910141881240147\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.53 0.94 0.53 0.71 0.48 1085\n", + " 2 0.78 0.88 0.49 0.83 0.66 0.45 6437\n", + " 3 0.40 0.19 0.94 0.26 0.42 0.16 1657\n", + " 4 0.09 0.11 0.96 0.10 0.32 0.10 336\n", + "\n", + "avg / total 0.66 0.69 0.64 0.67 0.61 0.39 9515\n", + "\n", + "[(1, 59994), (2, 59994), (3, 59994), (4, 59994)]\n", + "For fold 6:\n", + "Accuracy: 0.6917498686284813\n", + "f-score: 0.6917498686284813\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.44 0.56 0.94 0.49 0.72 0.50 785\n", + " 2 0.76 0.90 0.47 0.83 0.65 0.44 6224\n", + " 3 0.53 0.25 0.94 0.34 0.48 0.22 2026\n", + " 4 0.17 0.08 0.98 0.11 0.29 0.07 480\n", + "\n", + "avg / total 0.66 0.69 0.64 0.66 0.60 0.38 9515\n", + "\n", + "[(1, 59534), (2, 59534), (3, 59534), (4, 59534)]\n", + "For fold 7:\n", + "Accuracy: 0.7042564372044141\n", + "f-score: 0.7042564372044141\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.46 0.52 0.94 0.49 0.70 0.47 853\n", + " 2 0.80 0.87 0.50 0.84 0.66 0.45 6684\n", + " 3 0.34 0.29 0.92 0.31 0.51 0.25 1209\n", + " 4 0.30 0.11 0.98 0.16 0.33 0.10 769\n", + "\n", + "avg / total 0.67 0.70 0.63 0.68 0.62 0.40 9515\n", + "\n", + "[(1, 59573), (2, 59573), (3, 59573), (4, 59573)]\n", + "For fold 8:\n", + "Accuracy: 0.7128744088281661\n", + "f-score: 0.7128744088281661\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.57 0.46 0.96 0.51 0.67 0.42 933\n", + " 2 0.78 0.90 0.42 0.84 0.61 0.39 6645\n", + " 3 0.38 0.22 0.92 0.27 0.45 0.19 1675\n", + " 4 0.12 0.07 0.98 0.09 0.27 0.06 262\n", + "\n", + "avg / total 0.67 0.71 0.58 0.68 0.58 0.35 9515\n", + "\n", + "[(1, 59819), (2, 59819), (3, 59819), (4, 59819)]\n", + "For fold 9:\n", + "Accuracy: 0.6898255202858945\n", + "f-score: 0.6898255202858945\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.51 0.54 0.94 0.53 0.71 0.49 1031\n", + " 2 0.79 0.87 0.51 0.83 0.67 0.46 6399\n", + " 3 0.35 0.21 0.93 0.27 0.45 0.18 1495\n", + " 4 0.22 0.15 0.97 0.18 0.38 0.13 589\n", + "\n", + "avg / total 0.65 0.69 0.65 0.67 0.62 0.40 9514\n", + "\n", + "[(1, 59618), (2, 59618), (3, 59618), (4, 59618)]\n", + "For fold 10:\n", + "Accuracy: 0.6950809333613622\n", + "f-score: 0.6950809333613622\n", + " pre rec spe f1 geo iba sup\n", + "\n", + " 1 0.52 0.60 0.92 0.56 0.75 0.54 1157\n", + " 2 0.81 0.85 0.56 0.83 0.69 0.49 6600\n", + " 3 0.28 0.21 0.93 0.24 0.44 0.18 1156\n", + " 4 0.18 0.13 0.96 0.15 0.35 0.11 601\n", + "\n", + "avg / total 0.67 0.70 0.67 0.68 0.64 0.43 9514\n", + "\n" + ] + }, + { + "data": { + "text/plain": [ + "<Figure size 576x396 with 0 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.model_selection import KFold\n", + "from sklearn import preprocessing\n", + "from imblearn.over_sampling import SMOTENC\n", + "from sklearn.metrics import f1_score\n", + "from imblearn.metrics import classification_report_imbalanced\n", + "from yellowbrick.classifier import ROCAUC\n", + "# explicitly require this experimental feature\n", + "from sklearn.experimental import enable_iterative_imputer # noqa\n", + "# now you can import normally from sklearn.impute\n", + "from sklearn.impute import IterativeImputer\n", + "from sklearn.linear_model import LogisticRegression\n", + "from numpy import loadtxt\n", + "import os\n", + "os.environ['KMP_DUPLICATE_LIB_OK']='True'\n", + "from xgboost import XGBClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score\n", + "import io \n", + "\n", + "classes=['Death','Home','Nursing Home','Rehabilitation']\n", + "\n", + "\n", + "\n", + "kf = KFold(n_splits=10)\n", + "\n", + "\n", + "for i in range (1,11):\n", + "\n", + " for fold, (train_index, test_index) in enumerate(kf.split(X), 1):\n", + " X_train = X.iloc[train_index]\n", + " y_train = y.iloc[train_index] # Based on your code, you might need a ravel call here, but I would look into how you're generating your y\n", + " X_test = X.iloc[test_index]\n", + " y_test = y.iloc[test_index] # See comment on ravel and y_train\n", + " \n", + " \n", + " #------------------------------IMPUTE Training Set------------------------------------\n", + "\n", + " # Use MICE to fill in each row's missing features\n", + " X_train = pd.DataFrame(IterativeImputer(verbose=False, sample_posterior=True).fit_transform(X_train))\n", + " X_train.columns = df_cols\n", + "\n", + " #------------------------------IMPUTE Testing Set------------------------------------ \n", + "\n", + " # Use MICE to fill in each row's missing features\n", + " X_test = pd.DataFrame(IterativeImputer(verbose=False, sample_posterior=True).fit_transform(X_test))\n", + " X_test.columns = df_cols\n", + "\n", + "\n", + " #------------------------------Standardize Testing Set------------------------------------\n", + "\n", + " std_scale = preprocessing.StandardScaler().fit(X_train[cols_to_norm])\n", + " X_train[cols_to_norm] = std_scale.transform(X_train[cols_to_norm])\n", + " X_test[cols_to_norm] = std_scale.transform(X_test[cols_to_norm])\n", + " #------------------------------------------------------------------------------------------\n", + "\n", + " # Hyperparameters are optimized using hyperopt\n", + "\n", + " #sm = SMOTE()\n", + "\n", + " sm = SMOTENC(random_state=50, categorical_features=[1,2,3,22,23,24,25,26,27,28,29,30,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61]) \n", + " X_train_oversampled, y_train_oversampled = sm.fit_sample(X_train, y_train)\n", + " print(sorted(Counter(y_train_oversampled).items()))\n", + " model = XGBClassifier(max_depth=8, gamma=0.063, colsample_bytree=0.71) \n", + " model.fit(X_train_oversampled, y_train_oversampled) \n", + " y_pred = model.predict(X_test.values)\n", + " visualizer = ROCAUC(model, classes=classes)\n", + " visualizer.fit(X_train_oversampled, y_train_oversampled) # Fit the training data to the visualizer\n", + " visualizer.score(X_test.values, y_test) # Evaluate the model on the test data\n", + " visualizer.poof(\"XB_SMOTENC_{}_{}.pdf\".format(i, fold), clear_figure=True) \n", + " print(f'For fold {fold}:')\n", + " print(f'Accuracy: {model.score(X_test.values, y_test)}')\n", + " f1=f1_score(y_test, y_pred, average='micro')\n", + " print(f'f-score: {f1}')\n", + " print(classification_report_imbalanced(y_test, y_pred))\n", + " K= classification_report_imbalanced(y_test, y_pred)\n", + " df = pd.read_fwf(io.StringIO(K))\n", + " df.loc[\"1\":\"1\",\"pre\":\"sup\"].to_csv(\"XGB-SMOTENC-D.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"2\":\"2\",\"pre\":\"sup\"].to_csv(\"XGB-SMOTENC-H.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"3\":\"3\",\"pre\":\"sup\"].to_csv(\"XGB-SMOTENC-N.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.loc[\"4\":\"4\",\"pre\":\"sup\"].to_csv(\"XGB-SMOTENC-R.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + " df.iloc[6:7,:].to_csv(\"XGB-SMOTENC-avg.csv\" , sep=',', encoding='utf-8', doublequote=False, index=False, mode=\"a\", header=False)\n", + "\n", + " #\n", + "\n", + "\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}