{ "cells": [ { "cell_type": "markdown", "id": "9af65cb9-8a84-47e4-8bea-1547afe46a15", "metadata": {}, "source": [ "DIABETIES" ] }, { "cell_type": "code", "execution_count": 2, "id": "3fba3c9b-5e48-4771-a28a-4ec54fc4bc1b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Pregnancies 0\n", "Glucose 0\n", "BloodPressure 0\n", "SkinThickness 0\n", "Insulin 0\n", "BMI 0\n", "DiabetesPedigreeFunction 0\n", "Age 0\n", "Outcome 0\n", "dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import svm\n", "from sklearn.metrics import accuracy_score\n", "dataset= pd.read_csv(r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\datasets\\diabetes.csv')\n", "dataset.head(5)\n", "dataset.isna().sum()" ] }, { "cell_type": "code", "execution_count": 4, "id": "e99dd297-c606-4501-80c8-fa87d89fc237", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The reduced dataframe has 9 columns.\n" ] } ], "source": [ "# removing highly correlated features\n", "\n", "corr_matrix = dataset.corr().abs() \n", "\n", "mask = np.triu(np.ones_like(corr_matrix, dtype = bool))\n", "tri_df = corr_matrix.mask(mask)\n", "\n", "to_drop = [x for x in tri_df.columns if any(tri_df[x] > 0.92)]\n", "\n", "df = dataset.drop(to_drop, axis = 1)\n", "\n", "print(f\"The reduced dataframe has {df.shape[1]} columns.\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "64693a8e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Pregnancies | \n", "Glucose | \n", "BloodPressure | \n", "SkinThickness | \n", "Insulin | \n", "BMI | \n", "DiabetesPedigreeFunction | \n", "Age | \n", "Outcome | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "6 | \n", "148 | \n", "72 | \n", "35 | \n", "0 | \n", "33.6 | \n", "0.627 | \n", "50 | \n", "1 | \n", "
1 | \n", "1 | \n", "85 | \n", "66 | \n", "29 | \n", "0 | \n", "26.6 | \n", "0.351 | \n", "31 | \n", "0 | \n", "
2 | \n", "8 | \n", "183 | \n", "64 | \n", "0 | \n", "0 | \n", "23.3 | \n", "0.672 | \n", "32 | \n", "1 | \n", "
3 | \n", "1 | \n", "89 | \n", "66 | \n", "23 | \n", "94 | \n", "28.1 | \n", "0.167 | \n", "21 | \n", "0 | \n", "
4 | \n", "0 | \n", "137 | \n", "40 | \n", "35 | \n", "168 | \n", "43.1 | \n", "2.288 | \n", "33 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
763 | \n", "10 | \n", "101 | \n", "76 | \n", "48 | \n", "180 | \n", "32.9 | \n", "0.171 | \n", "63 | \n", "0 | \n", "
764 | \n", "2 | \n", "122 | \n", "70 | \n", "27 | \n", "0 | \n", "36.8 | \n", "0.340 | \n", "27 | \n", "0 | \n", "
765 | \n", "5 | \n", "121 | \n", "72 | \n", "23 | \n", "112 | \n", "26.2 | \n", "0.245 | \n", "30 | \n", "0 | \n", "
766 | \n", "1 | \n", "126 | \n", "60 | \n", "0 | \n", "0 | \n", "30.1 | \n", "0.349 | \n", "47 | \n", "1 | \n", "
767 | \n", "1 | \n", "93 | \n", "70 | \n", "31 | \n", "0 | \n", "30.4 | \n", "0.315 | \n", "23 | \n", "0 | \n", "
768 rows × 9 columns
\n", "GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['gini', 'entropy'],\n", " 'max_depth': range(2, 32),\n", " 'min_samples_leaf': range(1, 10),\n", " 'min_samples_split': range(2, 10),\n", " 'splitter': ['best', 'random']},\n", " verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['gini', 'entropy'],\n", " 'max_depth': range(2, 32),\n", " 'min_samples_leaf': range(1, 10),\n", " 'min_samples_split': range(2, 10),\n", " 'splitter': ['best', 'random']},\n", " verbose=1)
DecisionTreeClassifier()
DecisionTreeClassifier()
DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", " min_samples_split=6, splitter='random')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", " min_samples_split=6, splitter='random')
\n", " | Model | \n", "Score | \n", "
---|---|---|
0 | \n", "Logistic Regression | \n", "75.32 | \n", "
1 | \n", "KNN | \n", "71.43 | \n", "
3 | \n", "Decision Tree Classifier | \n", "68.83 | \n", "
2 | \n", "SVM | \n", "64.29 | \n", "