{ "cells": [ { "cell_type": "code", "execution_count": 142, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.compose import make_column_selector, make_column_transformer\n", "from sklearn.pipeline import make_pipeline\n", "from sklearn.decomposition import PCA\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from xgboost import XGBClassifier\n", "from sklearn.metrics import classification_report, ConfusionMatrixDisplay, roc_auc_score" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 57, "outputs": [ { "data": { "text/plain": " gender age hypertension heart_disease ever_married work_type \\\nid \n9046 Male 67.0 0 1 Yes Private \n51676 Female 61.0 0 0 Yes Self-employed \n31112 Male 80.0 0 1 Yes Private \n60182 Female 49.0 0 0 Yes Private \n1665 Female 79.0 1 0 Yes Self-employed \n... ... ... ... ... ... ... \n18234 Female 80.0 1 0 Yes Private \n44873 Female 81.0 0 0 Yes Self-employed \n19723 Female 35.0 0 0 Yes Self-employed \n37544 Male 51.0 0 0 Yes Private \n44679 Female 44.0 0 0 Yes Govt_job \n\n Residence_type avg_glucose_level bmi smoking_status stroke \nid \n9046 Urban 228.69 36.6 formerly smoked 1 \n51676 Rural 202.21 NaN never smoked 1 \n31112 Rural 105.92 32.5 never smoked 1 \n60182 Urban 171.23 34.4 smokes 1 \n1665 Rural 174.12 24.0 never smoked 1 \n... ... ... ... ... ... \n18234 Urban 83.75 NaN never smoked 0 \n44873 Urban 125.20 40.0 never smoked 0 \n19723 Rural 82.99 30.6 never smoked 0 \n37544 Rural 166.29 25.6 formerly smoked 0 \n44679 Urban 85.28 26.2 Unknown 0 \n\n[5110 rows x 11 columns]", "text/html": "
\n | gender | \nage | \nhypertension | \nheart_disease | \never_married | \nwork_type | \nResidence_type | \navg_glucose_level | \nbmi | \nsmoking_status | \nstroke | \n
---|---|---|---|---|---|---|---|---|---|---|---|
id | \n\n | \n | \n | \n | \n | \n | \n | \n | \n | \n | \n |
9046 | \nMale | \n67.0 | \n0 | \n1 | \nYes | \nPrivate | \nUrban | \n228.69 | \n36.6 | \nformerly smoked | \n1 | \n
51676 | \nFemale | \n61.0 | \n0 | \n0 | \nYes | \nSelf-employed | \nRural | \n202.21 | \nNaN | \nnever smoked | \n1 | \n
31112 | \nMale | \n80.0 | \n0 | \n1 | \nYes | \nPrivate | \nRural | \n105.92 | \n32.5 | \nnever smoked | \n1 | \n
60182 | \nFemale | \n49.0 | \n0 | \n0 | \nYes | \nPrivate | \nUrban | \n171.23 | \n34.4 | \nsmokes | \n1 | \n
1665 | \nFemale | \n79.0 | \n1 | \n0 | \nYes | \nSelf-employed | \nRural | \n174.12 | \n24.0 | \nnever smoked | \n1 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
18234 | \nFemale | \n80.0 | \n1 | \n0 | \nYes | \nPrivate | \nUrban | \n83.75 | \nNaN | \nnever smoked | \n0 | \n
44873 | \nFemale | \n81.0 | \n0 | \n0 | \nYes | \nSelf-employed | \nUrban | \n125.20 | \n40.0 | \nnever smoked | \n0 | \n
19723 | \nFemale | \n35.0 | \n0 | \n0 | \nYes | \nSelf-employed | \nRural | \n82.99 | \n30.6 | \nnever smoked | \n0 | \n
37544 | \nMale | \n51.0 | \n0 | \n0 | \nYes | \nPrivate | \nRural | \n166.29 | \n25.6 | \nformerly smoked | \n0 | \n
44679 | \nFemale | \n44.0 | \n0 | \n0 | \nYes | \nGovt_job | \nUrban | \n85.28 | \n26.2 | \nUnknown | \n0 | \n
5110 rows × 11 columns
\n\n | age | \nhypertension | \nheart_disease | \navg_glucose_level | \nbmi | \nstroke | \n
---|---|---|---|---|---|---|
count | \n5110.000000 | \n5110.000000 | \n5110.000000 | \n5110.000000 | \n4909.000000 | \n5110.000000 | \n
mean | \n43.226614 | \n0.097456 | \n0.054012 | \n106.147677 | \n28.893237 | \n0.048728 | \n
std | \n22.612647 | \n0.296607 | \n0.226063 | \n45.283560 | \n7.854067 | \n0.215320 | \n
min | \n0.080000 | \n0.000000 | \n0.000000 | \n55.120000 | \n10.300000 | \n0.000000 | \n
25% | \n25.000000 | \n0.000000 | \n0.000000 | \n77.245000 | \n23.500000 | \n0.000000 | \n
50% | \n45.000000 | \n0.000000 | \n0.000000 | \n91.885000 | \n28.100000 | \n0.000000 | \n
75% | \n61.000000 | \n0.000000 | \n0.000000 | \n114.090000 | \n33.100000 | \n0.000000 | \n
max | \n82.000000 | \n1.000000 | \n1.000000 | \n271.740000 | \n97.600000 | \n1.000000 | \n