--- a +++ b/heart_disease.ipynb @@ -0,0 +1,1896 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "41bc3d29-1e2f-41ec-b919-37d014f4769b", + "metadata": {}, + "source": [ + "HEART DISEASE\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "596f49af-da22-40fd-b931-972608d711f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>age</th>\n", + " <th>sex</th>\n", + " <th>cp</th>\n", + " <th>trestbps</th>\n", + " <th>chol</th>\n", + " <th>fbs</th>\n", + " <th>restecg</th>\n", + " <th>thalach</th>\n", + " <th>exang</th>\n", + " <th>oldpeak</th>\n", + " <th>slope</th>\n", + " <th>ca</th>\n", + " <th>thal</th>\n", + " <th>target</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>63</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>145</td>\n", + " <td>233</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>150</td>\n", + " <td>0</td>\n", + " <td>2.3</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>37</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>130</td>\n", + " <td>250</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>187</td>\n", + " <td>0</td>\n", + " <td>3.5</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>41</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>130</td>\n", + " <td>204</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>172</td>\n", + " <td>0</td>\n", + " <td>1.4</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>56</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>120</td>\n", + " <td>236</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>178</td>\n", + " <td>0</td>\n", + " <td>0.8</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>57</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>120</td>\n", + " <td>354</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>163</td>\n", + " <td>1</td>\n", + " <td>0.6</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>57</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>140</td>\n", + " <td>192</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>148</td>\n", + " <td>0</td>\n", + " <td>0.4</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>6</th>\n", + " <td>56</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>140</td>\n", + " <td>294</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>153</td>\n", + " <td>0</td>\n", + " <td>1.3</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>7</th>\n", + " <td>44</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>120</td>\n", + " <td>263</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>173</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>8</th>\n", + " <td>52</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>172</td>\n", + " <td>199</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>162</td>\n", + " <td>0</td>\n", + " <td>0.5</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>9</th>\n", + " <td>57</td>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>150</td>\n", + " <td>168</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>1.6</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>10</th>\n", + " <td>54</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>140</td>\n", + " <td>239</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>160</td>\n", + " <td>0</td>\n", + " <td>1.2</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>11</th>\n", + " <td>48</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>130</td>\n", + " <td>275</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>139</td>\n", + " <td>0</td>\n", + " <td>0.2</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>12</th>\n", + " <td>49</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>130</td>\n", + " <td>266</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>171</td>\n", + " <td>0</td>\n", + " <td>0.6</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>13</th>\n", + " <td>64</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>110</td>\n", + " <td>211</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>144</td>\n", + " <td>1</td>\n", + " <td>1.8</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>14</th>\n", + " <td>58</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>150</td>\n", + " <td>283</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>162</td>\n", + " <td>0</td>\n", + " <td>1.0</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "0 63 1 3 145 233 1 0 150 0 2.3 \n", + "1 37 1 2 130 250 0 1 187 0 3.5 \n", + "2 41 0 1 130 204 0 0 172 0 1.4 \n", + "3 56 1 1 120 236 0 1 178 0 0.8 \n", + "4 57 0 0 120 354 0 1 163 1 0.6 \n", + "5 57 1 0 140 192 0 1 148 0 0.4 \n", + "6 56 0 1 140 294 0 0 153 0 1.3 \n", + "7 44 1 1 120 263 0 1 173 0 0.0 \n", + "8 52 1 2 172 199 1 1 162 0 0.5 \n", + "9 57 1 2 150 168 0 1 174 0 1.6 \n", + "10 54 1 0 140 239 0 1 160 0 1.2 \n", + "11 48 0 2 130 275 0 1 139 0 0.2 \n", + "12 49 1 1 130 266 0 1 171 0 0.6 \n", + "13 64 1 3 110 211 0 0 144 1 1.8 \n", + "14 58 0 3 150 283 1 0 162 0 1.0 \n", + "\n", + " slope ca thal target \n", + "0 0 0 1 1 \n", + "1 0 0 2 1 \n", + "2 2 0 2 1 \n", + "3 2 0 2 1 \n", + "4 2 0 2 1 \n", + "5 1 0 1 1 \n", + "6 1 0 2 1 \n", + "7 2 0 3 1 \n", + "8 2 0 3 1 \n", + "9 2 0 2 1 \n", + "10 2 0 2 1 \n", + "11 2 0 2 1 \n", + "12 2 0 2 1 \n", + "13 1 0 2 1 \n", + "14 2 0 2 1 " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "\n", + "\n", + "df= pd.read_csv(r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\datasets\\heart.csv')\n", + "df.head(15)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7bd28756", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(303, 14)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "3244edd5-3dd2-47f9-85da-b9cc26fed0d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "age 0\n", + "sex 0\n", + "cp 0\n", + "trestbps 0\n", + "chol 0\n", + "fbs 0\n", + "restecg 0\n", + "thalach 0\n", + "exang 0\n", + "oldpeak 0\n", + "slope 0\n", + "ca 0\n", + "thal 0\n", + "target 0\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "348dde1b-1ff9-4d98-91a9-29688f5b0933", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The reduced dataframe has 14 columns.\n" + ] + } + ], + "source": [ + "# removing highly correlated features\n", + "\n", + "corr_matrix = df.corr().abs() \n", + "\n", + "mask = np.triu(np.ones_like(corr_matrix, dtype = bool))\n", + "tri_df = corr_matrix.mask(mask)\n", + "\n", + "to_drop = [x for x in tri_df.columns if any(tri_df[x] > 0.92)]\n", + "\n", + "df = df.drop(to_drop, axis = 1)\n", + "\n", + "print(f\"The reduced dataframe has {df.shape[1]} columns.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "16a87f6e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(303, 14)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "ade36649-a20a-4bf2-8368-64f70cd000f2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(303, 13) (242, 13) (61, 13)\n" + ] + } + ], + "source": [ + "A = df.drop(columns='target', axis=1)\n", + "B = df['target']\n", + "A_training, A_testing, B_training, B_testing = train_test_split(A, B, test_size=0.2, stratify=B, random_state=2)\n", + "\n", + "print(A.shape, A_training.shape, A_testing.shape)" + ] + }, + { + "cell_type": "markdown", + "id": "34f600fd-1faf-4a62-8aba-9a6d0fa38644", + "metadata": {}, + "source": [ + "LogisticRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fce2ced2-6375-4077-921e-2f568056fffe", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8512396694214877\n", + "0.819672131147541\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Pranshu Saini\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + } + ], + "source": [ + "# fitting data to model\n", + "\n", + "from sklearn.linear_model import LogisticRegression\n", + "\n", + "log_reg = LogisticRegression()\n", + "log_reg.fit(A_training, B_training)\n", + "B_pred = log_reg.predict(A_testing)\n", + "# accuracy score\n", + "\n", + "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n", + "\n", + "print(accuracy_score(B_training, log_reg.predict(A_training)))\n", + "\n", + "log_reg_acc = accuracy_score(B_testing, log_reg.predict(A_testing))\n", + "print(log_reg_acc)" + ] + }, + { + "cell_type": "markdown", + "id": "f1a6ab2c-fc10-4f0b-8b07-1e01f8ce1243", + "metadata": {}, + "source": [ + "K Neighbors Classifier (KNN)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f8aecca7-018d-41ba-bb3c-2a96cf73b007", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.78099173553719\n", + "0.6229508196721312\n" + ] + } + ], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "knn = KNeighborsClassifier()\n", + "knn.fit(A_training, B_training)\n", + "# model predictions \n", + "\n", + "B_pred = knn.predict(A_testing)\n", + "# accuracy score\n", + "\n", + "print(accuracy_score(B_training, knn.predict(A_training)))\n", + "\n", + "knn_acc = accuracy_score(B_testing, knn.predict(A_testing))\n", + "print(knn_acc)" + ] + }, + { + "cell_type": "markdown", + "id": "dd05e4cf-6aaf-468e-9eef-6f1b27d99560", + "metadata": {}, + "source": [ + "Support Vector Machine (SVM)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "35cc692b-d57f-4738-badf-5fd6c02c2889", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'C': 20, 'gamma': 0.0001}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "svc = SVC(probability=True)\n", + "parameters = {\n", + " 'gamma' : [0.0001, 0.001, 0.01, 0.1],\n", + " 'C' : [0.01, 0.05, 0.5, 0.1, 1, 10, 15, 20]\n", + "}\n", + "\n", + "grid_search = GridSearchCV(svc, parameters)\n", + "grid_search.fit(A_training, B_training)\n", + "# best parameters\n", + "\n", + "grid_search.best_params_\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "cb090811-a42c-47ec-b7c7-43889430b93d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6981292517006803" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# best score \n", + "\n", + "grid_search.best_score_\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "639e3132-7346-40d5-becf-6d230a9004fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0\n", + "0.5409836065573771\n", + " precision recall f1-score support\n", + "\n", + " 0 0.50 0.43 0.46 28\n", + " 1 0.57 0.64 0.60 33\n", + "\n", + " accuracy 0.54 61\n", + " macro avg 0.53 0.53 0.53 61\n", + "weighted avg 0.54 0.54 0.54 61\n", + "\n" + ] + } + ], + "source": [ + "svc = SVC(C = 10, gamma = 0.01, probability=True)\n", + "svc.fit(A_training, B_training)\n", + "# model predictions \n", + "\n", + "B_pred = svc.predict(A_testing)\n", + "# accuracy score\n", + "\n", + "print(accuracy_score(B_training, svc.predict(A_training)))\n", + "\n", + "svc_acc = accuracy_score(B_testing, svc.predict(A_testing))\n", + "print(svc_acc)\n", + "# classification report\n", + "\n", + "print(classification_report(B_testing, B_pred))" + ] + }, + { + "cell_type": "markdown", + "id": "0e3059b1-2e79-46bd-9195-98cbd044b75c", + "metadata": {}, + "source": [ + "DECISION TREE" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79b6cc36-eab3-4502-861f-0c4576170ffd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 8640 candidates, totalling 43200 fits\n" + ] + }, + { + "data": { + "text/html": [ + "<style>#sk-container-id-1 {\n", + " /* Definition of color scheme common for light and dark mode */\n", + " --sklearn-color-text: black;\n", + " --sklearn-color-line: gray;\n", + " /* Definition of color scheme for unfitted estimators */\n", + " --sklearn-color-unfitted-level-0: #fff5e6;\n", + " --sklearn-color-unfitted-level-1: #f6e4d2;\n", + " --sklearn-color-unfitted-level-2: #ffe0b3;\n", + " --sklearn-color-unfitted-level-3: chocolate;\n", + " /* Definition of color scheme for fitted estimators */\n", + " --sklearn-color-fitted-level-0: #f0f8ff;\n", + " --sklearn-color-fitted-level-1: #d4ebff;\n", + " --sklearn-color-fitted-level-2: #b3dbfd;\n", + " --sklearn-color-fitted-level-3: cornflowerblue;\n", + "\n", + " /* Specific color for light theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-icon: #696969;\n", + "\n", + " @media (prefers-color-scheme: dark) {\n", + " /* Redefinition of color scheme for dark theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-icon: #878787;\n", + " }\n", + "}\n", + "\n", + "#sk-container-id-1 {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "#sk-container-id-1 pre {\n", + " padding: 0;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-hidden--visually {\n", + " border: 0;\n", + " clip: rect(1px 1px 1px 1px);\n", + " clip: rect(1px, 1px, 1px, 1px);\n", + " height: 1px;\n", + " margin: -1px;\n", + " overflow: hidden;\n", + " padding: 0;\n", + " position: absolute;\n", + " width: 1px;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-dashed-wrapped {\n", + " border: 1px dashed var(--sklearn-color-line);\n", + " margin: 0 0.4em 0.5em 0.4em;\n", + " box-sizing: border-box;\n", + " padding-bottom: 0.4em;\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-container {\n", + " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", + " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", + " so we also need the `!important` here to be able to override the\n", + " default hidden behavior on the sphinx rendered scikit-learn.org.\n", + " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", + " display: inline-block !important;\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-text-repr-fallback {\n", + " display: none;\n", + "}\n", + "\n", + "div.sk-parallel-item,\n", + "div.sk-serial,\n", + "div.sk-item {\n", + " /* draw centered vertical line to link estimators */\n", + " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", + " background-size: 2px 100%;\n", + " background-repeat: no-repeat;\n", + " background-position: center center;\n", + "}\n", + "\n", + "/* Parallel-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item::after {\n", + " content: \"\";\n", + " width: 100%;\n", + " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", + " flex-grow: 1;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel {\n", + " display: flex;\n", + " align-items: stretch;\n", + " justify-content: center;\n", + " background-color: var(--sklearn-color-background);\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item {\n", + " display: flex;\n", + " flex-direction: column;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n", + " align-self: flex-end;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n", + " align-self: flex-start;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n", + " width: 0;\n", + "}\n", + "\n", + "/* Serial-specific style estimator block */\n", + "\n", + "#sk-container-id-1 div.sk-serial {\n", + " display: flex;\n", + " flex-direction: column;\n", + " align-items: center;\n", + " background-color: var(--sklearn-color-background);\n", + " padding-right: 1em;\n", + " padding-left: 1em;\n", + "}\n", + "\n", + "\n", + "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", + "clickable and can be expanded/collapsed.\n", + "- Pipeline and ColumnTransformer use this feature and define the default style\n", + "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", + "*/\n", + "\n", + "/* Pipeline and ColumnTransformer style (default) */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable {\n", + " /* Default theme specific background. It is overwritten whether we have a\n", + " specific estimator or a Pipeline/ColumnTransformer */\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "/* Toggleable label */\n", + "#sk-container-id-1 label.sk-toggleable__label {\n", + " cursor: pointer;\n", + " display: block;\n", + " width: 100%;\n", + " margin-bottom: 0;\n", + " padding: 0.5em;\n", + " box-sizing: border-box;\n", + " text-align: center;\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n", + " /* Arrow on the left of the label */\n", + " content: \"▸\";\n", + " float: left;\n", + " margin-right: 0.25em;\n", + " color: var(--sklearn-color-icon);\n", + "}\n", + "\n", + "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "/* Toggleable content - dropdown */\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content {\n", + " max-height: 0;\n", + " max-width: 0;\n", + " overflow: hidden;\n", + " text-align: left;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content pre {\n", + " margin: 0.2em;\n", + " border-radius: 0.25em;\n", + " color: var(--sklearn-color-text);\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", + " /* Expand drop-down */\n", + " max-height: 200px;\n", + " max-width: 100%;\n", + " overflow: auto;\n", + "}\n", + "\n", + "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", + " content: \"▾\";\n", + "}\n", + "\n", + "/* Pipeline/ColumnTransformer-specific style */\n", + "\n", + "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator-specific style */\n", + "\n", + "/* Colorize estimator box */\n", + "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n", + "#sk-container-id-1 div.sk-label label {\n", + " /* The background is the default theme color */\n", + " color: var(--sklearn-color-text-on-default-background);\n", + "}\n", + "\n", + "/* On hover, darken the color of the background */\n", + "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "/* Label box, darken color on hover, fitted */\n", + "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator label */\n", + "\n", + "#sk-container-id-1 div.sk-label label {\n", + " font-family: monospace;\n", + " font-weight: bold;\n", + " display: inline-block;\n", + " line-height: 1.2em;\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-label-container {\n", + " text-align: center;\n", + "}\n", + "\n", + "/* Estimator-specific */\n", + "#sk-container-id-1 div.sk-estimator {\n", + " font-family: monospace;\n", + " border: 1px dotted var(--sklearn-color-border-box);\n", + " border-radius: 0.25em;\n", + " box-sizing: border-box;\n", + " margin-bottom: 0.5em;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "/* on hover */\n", + "#sk-container-id-1 div.sk-estimator:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-1 div.sk-estimator.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", + "\n", + "/* Common style for \"i\" and \"?\" */\n", + "\n", + ".sk-estimator-doc-link,\n", + "a:link.sk-estimator-doc-link,\n", + "a:visited.sk-estimator-doc-link {\n", + " float: right;\n", + " font-size: smaller;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1em;\n", + " height: 1em;\n", + " width: 1em;\n", + " text-decoration: none !important;\n", + " margin-left: 1ex;\n", + " /* unfitted */\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted,\n", + "a:link.sk-estimator-doc-link.fitted,\n", + "a:visited.sk-estimator-doc-link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "/* Span, style for the box shown on hovering the info icon */\n", + ".sk-estimator-doc-link span {\n", + " display: none;\n", + " z-index: 9999;\n", + " position: relative;\n", + " font-weight: normal;\n", + " right: .2ex;\n", + " padding: .5ex;\n", + " margin: .5ex;\n", + " width: min-content;\n", + " min-width: 20ex;\n", + " max-width: 50ex;\n", + " color: var(--sklearn-color-text);\n", + " box-shadow: 2pt 2pt 4pt #999;\n", + " /* unfitted */\n", + " background: var(--sklearn-color-unfitted-level-0);\n", + " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted span {\n", + " /* fitted */\n", + " background: var(--sklearn-color-fitted-level-0);\n", + " border: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link:hover span {\n", + " display: block;\n", + "}\n", + "\n", + "/* \"?\"-specific style due to the `<a>` HTML tag */\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link {\n", + " float: right;\n", + " font-size: 1rem;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1rem;\n", + " height: 1rem;\n", + " width: 1rem;\n", + " text-decoration: none;\n", + " /* unfitted */\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "#sk-container-id-1 a.estimator_doc_link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", + " param_grid={'criterion': ['gini', 'entropy'],\n", + " 'max_depth': range(2, 32),\n", + " 'min_samples_leaf': range(1, 10),\n", + " 'min_samples_split': range(2, 10),\n", + " 'splitter': ['best', 'random']},\n", + " verbose=1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" ><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", + " param_grid={'criterion': ['gini', 'entropy'],\n", + " 'max_depth': range(2, 32),\n", + " 'min_samples_leaf': range(1, 10),\n", + " 'min_samples_split': range(2, 10),\n", + " 'splitter': ['best', 'random']},\n", + " verbose=1)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" ><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: DecisionTreeClassifier</label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-3\" type=\"checkbox\" ><label for=\"sk-estimator-id-3\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div></div></div></div></div></div></div></div>" + ], + "text/plain": [ + "GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", + " param_grid={'criterion': ['gini', 'entropy'],\n", + " 'max_depth': range(2, 32),\n", + " 'min_samples_leaf': range(1, 10),\n", + " 'min_samples_split': range(2, 10),\n", + " 'splitter': ['best', 'random']},\n", + " verbose=1)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "\n", + "dtc = DecisionTreeClassifier()\n", + "\n", + "parameters = {\n", + " 'criterion' : ['gini', 'entropy'],\n", + " 'max_depth' : range(2, 32, 1),\n", + " 'min_samples_leaf' : range(1, 10, 1),\n", + " 'min_samples_split' : range(2, 10, 1),\n", + " 'splitter' : ['best', 'random']\n", + "}\n", + "\n", + "grid_search_dt = GridSearchCV(dtc, parameters, cv = 5, n_jobs = -1, verbose = 1)\n", + "grid_search_dt.fit(A_training, B_training)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "362b8867-1ec8-4eba-84b0-c98486db2d2f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<style>#sk-container-id-2 {\n", + " /* Definition of color scheme common for light and dark mode */\n", + " --sklearn-color-text: black;\n", + " --sklearn-color-line: gray;\n", + " /* Definition of color scheme for unfitted estimators */\n", + " --sklearn-color-unfitted-level-0: #fff5e6;\n", + " --sklearn-color-unfitted-level-1: #f6e4d2;\n", + " --sklearn-color-unfitted-level-2: #ffe0b3;\n", + " --sklearn-color-unfitted-level-3: chocolate;\n", + " /* Definition of color scheme for fitted estimators */\n", + " --sklearn-color-fitted-level-0: #f0f8ff;\n", + " --sklearn-color-fitted-level-1: #d4ebff;\n", + " --sklearn-color-fitted-level-2: #b3dbfd;\n", + " --sklearn-color-fitted-level-3: cornflowerblue;\n", + "\n", + " /* Specific color for light theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n", + " --sklearn-color-icon: #696969;\n", + "\n", + " @media (prefers-color-scheme: dark) {\n", + " /* Redefinition of color scheme for dark theme */\n", + " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n", + " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n", + " --sklearn-color-icon: #878787;\n", + " }\n", + "}\n", + "\n", + "#sk-container-id-2 {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "#sk-container-id-2 pre {\n", + " padding: 0;\n", + "}\n", + "\n", + "#sk-container-id-2 input.sk-hidden--visually {\n", + " border: 0;\n", + " clip: rect(1px 1px 1px 1px);\n", + " clip: rect(1px, 1px, 1px, 1px);\n", + " height: 1px;\n", + " margin: -1px;\n", + " overflow: hidden;\n", + " padding: 0;\n", + " position: absolute;\n", + " width: 1px;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-dashed-wrapped {\n", + " border: 1px dashed var(--sklearn-color-line);\n", + " margin: 0 0.4em 0.5em 0.4em;\n", + " box-sizing: border-box;\n", + " padding-bottom: 0.4em;\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-container {\n", + " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n", + " but bootstrap.min.css set `[hidden] { display: none !important; }`\n", + " so we also need the `!important` here to be able to override the\n", + " default hidden behavior on the sphinx rendered scikit-learn.org.\n", + " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n", + " display: inline-block !important;\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-text-repr-fallback {\n", + " display: none;\n", + "}\n", + "\n", + "div.sk-parallel-item,\n", + "div.sk-serial,\n", + "div.sk-item {\n", + " /* draw centered vertical line to link estimators */\n", + " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n", + " background-size: 2px 100%;\n", + " background-repeat: no-repeat;\n", + " background-position: center center;\n", + "}\n", + "\n", + "/* Parallel-specific style estimator block */\n", + "\n", + "#sk-container-id-2 div.sk-parallel-item::after {\n", + " content: \"\";\n", + " width: 100%;\n", + " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n", + " flex-grow: 1;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-parallel {\n", + " display: flex;\n", + " align-items: stretch;\n", + " justify-content: center;\n", + " background-color: var(--sklearn-color-background);\n", + " position: relative;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-parallel-item {\n", + " display: flex;\n", + " flex-direction: column;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-parallel-item:first-child::after {\n", + " align-self: flex-end;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-parallel-item:last-child::after {\n", + " align-self: flex-start;\n", + " width: 50%;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-parallel-item:only-child::after {\n", + " width: 0;\n", + "}\n", + "\n", + "/* Serial-specific style estimator block */\n", + "\n", + "#sk-container-id-2 div.sk-serial {\n", + " display: flex;\n", + " flex-direction: column;\n", + " align-items: center;\n", + " background-color: var(--sklearn-color-background);\n", + " padding-right: 1em;\n", + " padding-left: 1em;\n", + "}\n", + "\n", + "\n", + "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n", + "clickable and can be expanded/collapsed.\n", + "- Pipeline and ColumnTransformer use this feature and define the default style\n", + "- Estimators will overwrite some part of the style using the `sk-estimator` class\n", + "*/\n", + "\n", + "/* Pipeline and ColumnTransformer style (default) */\n", + "\n", + "#sk-container-id-2 div.sk-toggleable {\n", + " /* Default theme specific background. It is overwritten whether we have a\n", + " specific estimator or a Pipeline/ColumnTransformer */\n", + " background-color: var(--sklearn-color-background);\n", + "}\n", + "\n", + "/* Toggleable label */\n", + "#sk-container-id-2 label.sk-toggleable__label {\n", + " cursor: pointer;\n", + " display: block;\n", + " width: 100%;\n", + " margin-bottom: 0;\n", + " padding: 0.5em;\n", + " box-sizing: border-box;\n", + " text-align: center;\n", + "}\n", + "\n", + "#sk-container-id-2 label.sk-toggleable__label-arrow:before {\n", + " /* Arrow on the left of the label */\n", + " content: \"▸\";\n", + " float: left;\n", + " margin-right: 0.25em;\n", + " color: var(--sklearn-color-icon);\n", + "}\n", + "\n", + "#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {\n", + " color: var(--sklearn-color-text);\n", + "}\n", + "\n", + "/* Toggleable content - dropdown */\n", + "\n", + "#sk-container-id-2 div.sk-toggleable__content {\n", + " max-height: 0;\n", + " max-width: 0;\n", + " overflow: hidden;\n", + " text-align: left;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-toggleable__content.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-toggleable__content pre {\n", + " margin: 0.2em;\n", + " border-radius: 0.25em;\n", + " color: var(--sklearn-color-text);\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-toggleable__content.fitted pre {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n", + " /* Expand drop-down */\n", + " max-height: 200px;\n", + " max-width: 100%;\n", + " overflow: auto;\n", + "}\n", + "\n", + "#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n", + " content: \"▾\";\n", + "}\n", + "\n", + "/* Pipeline/ColumnTransformer-specific style */\n", + "\n", + "#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator-specific style */\n", + "\n", + "/* Colorize estimator box */\n", + "#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-label label.sk-toggleable__label,\n", + "#sk-container-id-2 div.sk-label label {\n", + " /* The background is the default theme color */\n", + " color: var(--sklearn-color-text-on-default-background);\n", + "}\n", + "\n", + "/* On hover, darken the color of the background */\n", + "#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "/* Label box, darken color on hover, fitted */\n", + "#sk-container-id-2 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n", + " color: var(--sklearn-color-text);\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Estimator label */\n", + "\n", + "#sk-container-id-2 div.sk-label label {\n", + " font-family: monospace;\n", + " font-weight: bold;\n", + " display: inline-block;\n", + " line-height: 1.2em;\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-label-container {\n", + " text-align: center;\n", + "}\n", + "\n", + "/* Estimator-specific */\n", + "#sk-container-id-2 div.sk-estimator {\n", + " font-family: monospace;\n", + " border: 1px dotted var(--sklearn-color-border-box);\n", + " border-radius: 0.25em;\n", + " box-sizing: border-box;\n", + " margin-bottom: 0.5em;\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-0);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-estimator.fitted {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-0);\n", + "}\n", + "\n", + "/* on hover */\n", + "#sk-container-id-2 div.sk-estimator:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-2);\n", + "}\n", + "\n", + "#sk-container-id-2 div.sk-estimator.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-2);\n", + "}\n", + "\n", + "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n", + "\n", + "/* Common style for \"i\" and \"?\" */\n", + "\n", + ".sk-estimator-doc-link,\n", + "a:link.sk-estimator-doc-link,\n", + "a:visited.sk-estimator-doc-link {\n", + " float: right;\n", + " font-size: smaller;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1em;\n", + " height: 1em;\n", + " width: 1em;\n", + " text-decoration: none !important;\n", + " margin-left: 1ex;\n", + " /* unfitted */\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted,\n", + "a:link.sk-estimator-doc-link.fitted,\n", + "a:visited.sk-estimator-doc-link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n", + ".sk-estimator-doc-link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover,\n", + "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n", + ".sk-estimator-doc-link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "/* Span, style for the box shown on hovering the info icon */\n", + ".sk-estimator-doc-link span {\n", + " display: none;\n", + " z-index: 9999;\n", + " position: relative;\n", + " font-weight: normal;\n", + " right: .2ex;\n", + " padding: .5ex;\n", + " margin: .5ex;\n", + " width: min-content;\n", + " min-width: 20ex;\n", + " max-width: 50ex;\n", + " color: var(--sklearn-color-text);\n", + " box-shadow: 2pt 2pt 4pt #999;\n", + " /* unfitted */\n", + " background: var(--sklearn-color-unfitted-level-0);\n", + " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link.fitted span {\n", + " /* fitted */\n", + " background: var(--sklearn-color-fitted-level-0);\n", + " border: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "\n", + ".sk-estimator-doc-link:hover span {\n", + " display: block;\n", + "}\n", + "\n", + "/* \"?\"-specific style due to the `<a>` HTML tag */\n", + "\n", + "#sk-container-id-2 a.estimator_doc_link {\n", + " float: right;\n", + " font-size: 1rem;\n", + " line-height: 1em;\n", + " font-family: monospace;\n", + " background-color: var(--sklearn-color-background);\n", + " border-radius: 1rem;\n", + " height: 1rem;\n", + " width: 1rem;\n", + " text-decoration: none;\n", + " /* unfitted */\n", + " color: var(--sklearn-color-unfitted-level-1);\n", + " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n", + "}\n", + "\n", + "#sk-container-id-2 a.estimator_doc_link.fitted {\n", + " /* fitted */\n", + " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n", + " color: var(--sklearn-color-fitted-level-1);\n", + "}\n", + "\n", + "/* On hover */\n", + "#sk-container-id-2 a.estimator_doc_link:hover {\n", + " /* unfitted */\n", + " background-color: var(--sklearn-color-unfitted-level-3);\n", + " color: var(--sklearn-color-background);\n", + " text-decoration: none;\n", + "}\n", + "\n", + "#sk-container-id-2 a.estimator_doc_link.fitted:hover {\n", + " /* fitted */\n", + " background-color: var(--sklearn-color-fitted-level-3);\n", + "}\n", + "</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", + " min_samples_split=6, splitter='random')</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-4\" type=\"checkbox\" checked><label for=\"sk-estimator-id-4\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\"> DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", + " min_samples_split=6, splitter='random')</pre></div> </div></div></div></div>" + ], + "text/plain": [ + "DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", + " min_samples_split=6, splitter='random')" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "dtc = DecisionTreeClassifier(criterion= 'entropy', max_depth= 19, min_samples_leaf= 4, min_samples_split= 6, splitter= 'random')\n", + "dtc.fit(A_training, B_training)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "2ac9e062-42ce-4baa-81f6-b004efa79279", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.9132231404958677\n", + "0.6885245901639344\n" + ] + } + ], + "source": [ + "B_pred = dtc.predict(A_testing)\n", + "# accuracy score\n", + "\n", + "print(accuracy_score(B_training, dtc.predict(A_training)))\n", + "\n", + "dtc_acc = accuracy_score(B_testing, dtc.predict(A_testing))\n", + "print(dtc_acc)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "23941f55-8363-4bdd-a552-1bbb83f1c206", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.67 0.64 0.65 28\n", + " 1 0.71 0.73 0.72 33\n", + "\n", + " accuracy 0.69 61\n", + " macro avg 0.69 0.69 0.69 61\n", + "weighted avg 0.69 0.69 0.69 61\n", + "\n" + ] + } + ], + "source": [ + "# classification report\n", + "\n", + "print(classification_report(B_testing, B_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "029bfdba-3d0e-4d3a-9ce1-06fe8de61f82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Model</th>\n", + " <th>Score</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Logistic Regression</td>\n", + " <td>81.97</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Decision Tree Classifier</td>\n", + " <td>68.85</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>KNN</td>\n", + " <td>62.30</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>SVM</td>\n", + " <td>54.10</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Model Score\n", + "0 Logistic Regression 81.97\n", + "3 Decision Tree Classifier 68.85\n", + "1 KNN 62.30\n", + "2 SVM 54.10" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "models = pd.DataFrame({\n", + " 'Model': ['Logistic Regression', 'KNN', 'SVM', 'Decision Tree Classifier'],\n", + " 'Score': [100*round(log_reg_acc,4), 100*round(knn_acc,4), 100*round(svc_acc,4), 100*round(dtc_acc,4)]\n", + "})\n", + "models.sort_values(by = 'Score', ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "12de1159", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", + "165 67 1 0 160 286 0 0 108 1 1.5 \n", + "166 67 1 0 120 229 0 0 129 1 2.6 \n", + "167 62 0 0 140 268 0 0 160 0 3.6 \n", + "168 63 1 0 130 254 0 0 147 0 1.4 \n", + "169 53 1 0 140 203 1 0 155 1 3.1 \n", + ".. ... ... .. ... ... ... ... ... ... ... \n", + "298 57 0 0 140 241 0 1 123 1 0.2 \n", + "299 45 1 3 110 264 0 1 132 0 1.2 \n", + "300 68 1 0 144 193 1 1 141 0 3.4 \n", + "301 57 1 0 130 131 0 1 115 1 1.2 \n", + "302 57 0 1 130 236 0 0 174 0 0.0 \n", + "\n", + " slope ca thal target \n", + "165 1 3 2 0 \n", + "166 1 2 3 0 \n", + "167 0 2 2 0 \n", + "168 1 1 3 0 \n", + "169 0 0 3 0 \n", + ".. ... .. ... ... \n", + "298 1 0 3 0 \n", + "299 1 0 3 0 \n", + "300 1 2 3 0 \n", + "301 1 1 3 0 \n", + "302 1 1 2 0 \n", + "\n", + "[138 rows x 14 columns]\n" + ] + } + ], + "source": [ + "filtered_df = df[df['target'] == 0]\n", + "C = pd.DataFrame(filtered_df) \n", + "print(C)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ea84040-eb10-42f2-97c7-f91b52129aef", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Pranshu Saini\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "\n", + "a = [63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]\n", + "b=[67 , 1, 0, 160, 286, 0, 0, 108, 1, 1.5, 1, 3, 2 ]\n", + "a_reshaped = np.array().reshape(1, -1)\n", + "\n", + "B = log_reg.predict(a_reshaped)\n", + "print(B)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e66f5d96-79a7-4ac9-a01b-e753181af587", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7552b5a0-d407-4bf9-98e0-56b8bd7c3245", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "filename = r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\model\\heart_disease_model.pkl'\n", + "pickle.dump(log_reg, open(filename, 'wb'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f01bd6a-95cc-4383-a17c-0e0ae3ff2e69", + "metadata": {}, + "outputs": [], + "source": [ + "'''import pickle\n", + "def load_model(path):\n", + " with open(path, 'rb') as file:\n", + " model = pickle.load(file)\n", + "heart_model = load_model(r'C:\\Users\\DELL\\Desktop\\app\\heart_disease_model.pkl')\n", + "def predict(inputs):\n", + " return heart_model.predict(inputs)'''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3765e3cf-d221-4413-a91e-9b7d4d49bd3c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8fc26d0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "66257935", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}