{ "cells": [ { "cell_type": "markdown", "id": "41bc3d29-1e2f-41ec-b919-37d014f4769b", "metadata": {}, "source": [ "HEART DISEASE\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "596f49af-da22-40fd-b931-972608d711f9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
agesexcptrestbpscholfbsrestecgthalachexangoldpeakslopecathaltarget
063131452331015002.30011
137121302500118703.50021
241011302040017201.42021
356111202360117800.82021
457001203540116310.62021
557101401920114800.41011
656011402940015301.31021
744111202630117300.02031
852121721991116200.52031
957121501680117401.62021
1054101402390116001.22021
1148021302750113900.22021
1249111302660117100.62021
1364131102110014411.81021
1458031502831016201.02021
\n", "
" ], "text/plain": [ " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", "0 63 1 3 145 233 1 0 150 0 2.3 \n", "1 37 1 2 130 250 0 1 187 0 3.5 \n", "2 41 0 1 130 204 0 0 172 0 1.4 \n", "3 56 1 1 120 236 0 1 178 0 0.8 \n", "4 57 0 0 120 354 0 1 163 1 0.6 \n", "5 57 1 0 140 192 0 1 148 0 0.4 \n", "6 56 0 1 140 294 0 0 153 0 1.3 \n", "7 44 1 1 120 263 0 1 173 0 0.0 \n", "8 52 1 2 172 199 1 1 162 0 0.5 \n", "9 57 1 2 150 168 0 1 174 0 1.6 \n", "10 54 1 0 140 239 0 1 160 0 1.2 \n", "11 48 0 2 130 275 0 1 139 0 0.2 \n", "12 49 1 1 130 266 0 1 171 0 0.6 \n", "13 64 1 3 110 211 0 0 144 1 1.8 \n", "14 58 0 3 150 283 1 0 162 0 1.0 \n", "\n", " slope ca thal target \n", "0 0 0 1 1 \n", "1 0 0 2 1 \n", "2 2 0 2 1 \n", "3 2 0 2 1 \n", "4 2 0 2 1 \n", "5 1 0 1 1 \n", "6 1 0 2 1 \n", "7 2 0 3 1 \n", "8 2 0 3 1 \n", "9 2 0 2 1 \n", "10 2 0 2 1 \n", "11 2 0 2 1 \n", "12 2 0 2 1 \n", "13 1 0 2 1 \n", "14 2 0 2 1 " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score\n", "\n", "\n", "df= pd.read_csv(r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\datasets\\heart.csv')\n", "df.head(15)\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "7bd28756", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(303, 14)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 3, "id": "3244edd5-3dd2-47f9-85da-b9cc26fed0d7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age 0\n", "sex 0\n", "cp 0\n", "trestbps 0\n", "chol 0\n", "fbs 0\n", "restecg 0\n", "thalach 0\n", "exang 0\n", "oldpeak 0\n", "slope 0\n", "ca 0\n", "thal 0\n", "target 0\n", "dtype: int64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isna().sum()" ] }, { "cell_type": "code", "execution_count": 4, "id": "348dde1b-1ff9-4d98-91a9-29688f5b0933", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The reduced dataframe has 14 columns.\n" ] } ], "source": [ "# removing highly correlated features\n", "\n", "corr_matrix = df.corr().abs() \n", "\n", "mask = np.triu(np.ones_like(corr_matrix, dtype = bool))\n", "tri_df = corr_matrix.mask(mask)\n", "\n", "to_drop = [x for x in tri_df.columns if any(tri_df[x] > 0.92)]\n", "\n", "df = df.drop(to_drop, axis = 1)\n", "\n", "print(f\"The reduced dataframe has {df.shape[1]} columns.\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "16a87f6e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(303, 14)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "ade36649-a20a-4bf2-8368-64f70cd000f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(303, 13) (242, 13) (61, 13)\n" ] } ], "source": [ "A = df.drop(columns='target', axis=1)\n", "B = df['target']\n", "A_training, A_testing, B_training, B_testing = train_test_split(A, B, test_size=0.2, stratify=B, random_state=2)\n", "\n", "print(A.shape, A_training.shape, A_testing.shape)" ] }, { "cell_type": "markdown", "id": "34f600fd-1faf-4a62-8aba-9a6d0fa38644", "metadata": {}, "source": [ "LogisticRegression" ] }, { "cell_type": "code", "execution_count": 7, "id": "fce2ced2-6375-4077-921e-2f568056fffe", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8512396694214877\n", "0.819672131147541\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Pranshu Saini\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] } ], "source": [ "# fitting data to model\n", "\n", "from sklearn.linear_model import LogisticRegression\n", "\n", "log_reg = LogisticRegression()\n", "log_reg.fit(A_training, B_training)\n", "B_pred = log_reg.predict(A_testing)\n", "# accuracy score\n", "\n", "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n", "\n", "print(accuracy_score(B_training, log_reg.predict(A_training)))\n", "\n", "log_reg_acc = accuracy_score(B_testing, log_reg.predict(A_testing))\n", "print(log_reg_acc)" ] }, { "cell_type": "markdown", "id": "f1a6ab2c-fc10-4f0b-8b07-1e01f8ce1243", "metadata": {}, "source": [ "K Neighbors Classifier (KNN)\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "f8aecca7-018d-41ba-bb3c-2a96cf73b007", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.78099173553719\n", "0.6229508196721312\n" ] } ], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "\n", "knn = KNeighborsClassifier()\n", "knn.fit(A_training, B_training)\n", "# model predictions \n", "\n", "B_pred = knn.predict(A_testing)\n", "# accuracy score\n", "\n", "print(accuracy_score(B_training, knn.predict(A_training)))\n", "\n", "knn_acc = accuracy_score(B_testing, knn.predict(A_testing))\n", "print(knn_acc)" ] }, { "cell_type": "markdown", "id": "dd05e4cf-6aaf-468e-9eef-6f1b27d99560", "metadata": {}, "source": [ "Support Vector Machine (SVM)" ] }, { "cell_type": "code", "execution_count": 9, "id": "35cc692b-d57f-4738-badf-5fd6c02c2889", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 20, 'gamma': 0.0001}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.svm import SVC\n", "from sklearn.model_selection import GridSearchCV\n", "\n", "svc = SVC(probability=True)\n", "parameters = {\n", " 'gamma' : [0.0001, 0.001, 0.01, 0.1],\n", " 'C' : [0.01, 0.05, 0.5, 0.1, 1, 10, 15, 20]\n", "}\n", "\n", "grid_search = GridSearchCV(svc, parameters)\n", "grid_search.fit(A_training, B_training)\n", "# best parameters\n", "\n", "grid_search.best_params_\n", "\n" ] }, { "cell_type": "code", "execution_count": 10, "id": "cb090811-a42c-47ec-b7c7-43889430b93d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6981292517006803" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# best score \n", "\n", "grid_search.best_score_\n", "\n" ] }, { "cell_type": "code", "execution_count": 11, "id": "639e3132-7346-40d5-becf-6d230a9004fb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0\n", "0.5409836065573771\n", " precision recall f1-score support\n", "\n", " 0 0.50 0.43 0.46 28\n", " 1 0.57 0.64 0.60 33\n", "\n", " accuracy 0.54 61\n", " macro avg 0.53 0.53 0.53 61\n", "weighted avg 0.54 0.54 0.54 61\n", "\n" ] } ], "source": [ "svc = SVC(C = 10, gamma = 0.01, probability=True)\n", "svc.fit(A_training, B_training)\n", "# model predictions \n", "\n", "B_pred = svc.predict(A_testing)\n", "# accuracy score\n", "\n", "print(accuracy_score(B_training, svc.predict(A_training)))\n", "\n", "svc_acc = accuracy_score(B_testing, svc.predict(A_testing))\n", "print(svc_acc)\n", "# classification report\n", "\n", "print(classification_report(B_testing, B_pred))" ] }, { "cell_type": "markdown", "id": "0e3059b1-2e79-46bd-9195-98cbd044b75c", "metadata": {}, "source": [ "DECISION TREE" ] }, { "cell_type": "code", "execution_count": 12, "id": "79b6cc36-eab3-4502-861f-0c4576170ffd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 5 folds for each of 8640 candidates, totalling 43200 fits\n" ] }, { "data": { "text/html": [ "
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n",
       "             param_grid={'criterion': ['gini', 'entropy'],\n",
       "                         'max_depth': range(2, 32),\n",
       "                         'min_samples_leaf': range(1, 10),\n",
       "                         'min_samples_split': range(2, 10),\n",
       "                         'splitter': ['best', 'random']},\n",
       "             verbose=1)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['gini', 'entropy'],\n", " 'max_depth': range(2, 32),\n", " 'min_samples_leaf': range(1, 10),\n", " 'min_samples_split': range(2, 10),\n", " 'splitter': ['best', 'random']},\n", " verbose=1)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "\n", "dtc = DecisionTreeClassifier()\n", "\n", "parameters = {\n", " 'criterion' : ['gini', 'entropy'],\n", " 'max_depth' : range(2, 32, 1),\n", " 'min_samples_leaf' : range(1, 10, 1),\n", " 'min_samples_split' : range(2, 10, 1),\n", " 'splitter' : ['best', 'random']\n", "}\n", "\n", "grid_search_dt = GridSearchCV(dtc, parameters, cv = 5, n_jobs = -1, verbose = 1)\n", "grid_search_dt.fit(A_training, B_training)" ] }, { "cell_type": "code", "execution_count": 13, "id": "362b8867-1ec8-4eba-84b0-c98486db2d2f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n",
       "                       min_samples_split=6, splitter='random')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", " min_samples_split=6, splitter='random')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "dtc = DecisionTreeClassifier(criterion= 'entropy', max_depth= 19, min_samples_leaf= 4, min_samples_split= 6, splitter= 'random')\n", "dtc.fit(A_training, B_training)" ] }, { "cell_type": "code", "execution_count": 14, "id": "2ac9e062-42ce-4baa-81f6-b004efa79279", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9132231404958677\n", "0.6885245901639344\n" ] } ], "source": [ "B_pred = dtc.predict(A_testing)\n", "# accuracy score\n", "\n", "print(accuracy_score(B_training, dtc.predict(A_training)))\n", "\n", "dtc_acc = accuracy_score(B_testing, dtc.predict(A_testing))\n", "print(dtc_acc)" ] }, { "cell_type": "code", "execution_count": 15, "id": "23941f55-8363-4bdd-a552-1bbb83f1c206", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " 0 0.67 0.64 0.65 28\n", " 1 0.71 0.73 0.72 33\n", "\n", " accuracy 0.69 61\n", " macro avg 0.69 0.69 0.69 61\n", "weighted avg 0.69 0.69 0.69 61\n", "\n" ] } ], "source": [ "# classification report\n", "\n", "print(classification_report(B_testing, B_pred))" ] }, { "cell_type": "code", "execution_count": 16, "id": "029bfdba-3d0e-4d3a-9ce1-06fe8de61f82", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelScore
0Logistic Regression81.97
3Decision Tree Classifier68.85
1KNN62.30
2SVM54.10
\n", "
" ], "text/plain": [ " Model Score\n", "0 Logistic Regression 81.97\n", "3 Decision Tree Classifier 68.85\n", "1 KNN 62.30\n", "2 SVM 54.10" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "models = pd.DataFrame({\n", " 'Model': ['Logistic Regression', 'KNN', 'SVM', 'Decision Tree Classifier'],\n", " 'Score': [100*round(log_reg_acc,4), 100*round(knn_acc,4), 100*round(svc_acc,4), 100*round(dtc_acc,4)]\n", "})\n", "models.sort_values(by = 'Score', ascending = False)" ] }, { "cell_type": "code", "execution_count": 17, "id": "12de1159", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", "165 67 1 0 160 286 0 0 108 1 1.5 \n", "166 67 1 0 120 229 0 0 129 1 2.6 \n", "167 62 0 0 140 268 0 0 160 0 3.6 \n", "168 63 1 0 130 254 0 0 147 0 1.4 \n", "169 53 1 0 140 203 1 0 155 1 3.1 \n", ".. ... ... .. ... ... ... ... ... ... ... \n", "298 57 0 0 140 241 0 1 123 1 0.2 \n", "299 45 1 3 110 264 0 1 132 0 1.2 \n", "300 68 1 0 144 193 1 1 141 0 3.4 \n", "301 57 1 0 130 131 0 1 115 1 1.2 \n", "302 57 0 1 130 236 0 0 174 0 0.0 \n", "\n", " slope ca thal target \n", "165 1 3 2 0 \n", "166 1 2 3 0 \n", "167 0 2 2 0 \n", "168 1 1 3 0 \n", "169 0 0 3 0 \n", ".. ... .. ... ... \n", "298 1 0 3 0 \n", "299 1 0 3 0 \n", "300 1 2 3 0 \n", "301 1 1 3 0 \n", "302 1 1 2 0 \n", "\n", "[138 rows x 14 columns]\n" ] } ], "source": [ "filtered_df = df[df['target'] == 0]\n", "C = pd.DataFrame(filtered_df) \n", "print(C)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9ea84040-eb10-42f2-97c7-f91b52129aef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Pranshu Saini\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\sklearn\\base.py:493: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n", " warnings.warn(\n" ] } ], "source": [ "import numpy as np\n", "\n", "a = [63, 1, 3, 145, 233, 1, 0, 150, 0, 2.3, 0, 0, 1]\n", "b=[67 , 1, 0, 160, 286, 0, 0, 108, 1, 1.5, 1, 3, 2 ]\n", "a_reshaped = np.array().reshape(1, -1)\n", "\n", "B = log_reg.predict(a_reshaped)\n", "print(B)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e66f5d96-79a7-4ac9-a01b-e753181af587", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 20, "id": "7552b5a0-d407-4bf9-98e0-56b8bd7c3245", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "filename = r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\model\\heart_disease_model.pkl'\n", "pickle.dump(log_reg, open(filename, 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "id": "4f01bd6a-95cc-4383-a17c-0e0ae3ff2e69", "metadata": {}, "outputs": [], "source": [ "'''import pickle\n", "def load_model(path):\n", " with open(path, 'rb') as file:\n", " model = pickle.load(file)\n", "heart_model = load_model(r'C:\\Users\\DELL\\Desktop\\app\\heart_disease_model.pkl')\n", "def predict(inputs):\n", " return heart_model.predict(inputs)'''" ] }, { "cell_type": "code", "execution_count": null, "id": "3765e3cf-d221-4413-a91e-9b7d4d49bd3c", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "a8fc26d0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "66257935", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }