[71cba4]: / Heart_Disease_Prediction.ipynb

Download this file

278 lines (277 with data), 11.4 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ab17373c-d5c2-41f8-9494-8e779bdc3ef8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 5 rows:\n",
      "    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \\\n",
      "0   63    1   3       145   233    1        0      150      0      2.3      0   \n",
      "1   37    1   2       130   250    0        1      187      0      3.5      0   \n",
      "2   41    0   1       130   204    0        0      172      0      1.4      2   \n",
      "3   56    1   1       120   236    0        1      178      0      0.8      2   \n",
      "4   57    0   0       120   354    0        1      163      1      0.6      2   \n",
      "\n",
      "   ca  thal  target  \n",
      "0   0     1       1  \n",
      "1   0     2       1  \n",
      "2   0     2       1  \n",
      "3   0     2       1  \n",
      "4   0     2       1  \n",
      "Last 5 rows:\n",
      "      age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \\\n",
      "298   57    0   0       140   241    0        1      123      1      0.2   \n",
      "299   45    1   3       110   264    0        1      132      0      1.2   \n",
      "300   68    1   0       144   193    1        1      141      0      3.4   \n",
      "301   57    1   0       130   131    0        1      115      1      1.2   \n",
      "302   57    0   1       130   236    0        0      174      0      0.0   \n",
      "\n",
      "     slope  ca  thal  target  \n",
      "298      1   0     3       0  \n",
      "299      1   0     3       0  \n",
      "300      1   2     3       0  \n",
      "301      1   1     3       0  \n",
      "302      1   1     2       0  \n",
      "Shape: (303, 14)\n",
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 303 entries, 0 to 302\n",
      "Data columns (total 14 columns):\n",
      " #   Column    Non-Null Count  Dtype  \n",
      "---  ------    --------------  -----  \n",
      " 0   age       303 non-null    int64  \n",
      " 1   sex       303 non-null    int64  \n",
      " 2   cp        303 non-null    int64  \n",
      " 3   trestbps  303 non-null    int64  \n",
      " 4   chol      303 non-null    int64  \n",
      " 5   fbs       303 non-null    int64  \n",
      " 6   restecg   303 non-null    int64  \n",
      " 7   thalach   303 non-null    int64  \n",
      " 8   exang     303 non-null    int64  \n",
      " 9   oldpeak   303 non-null    float64\n",
      " 10  slope     303 non-null    int64  \n",
      " 11  ca        303 non-null    int64  \n",
      " 12  thal      303 non-null    int64  \n",
      " 13  target    303 non-null    int64  \n",
      "dtypes: float64(1), int64(13)\n",
      "memory usage: 33.3 KB\n",
      "Info:\n",
      " None\n",
      "Missing values:\n",
      " age         0\n",
      "sex         0\n",
      "cp          0\n",
      "trestbps    0\n",
      "chol        0\n",
      "fbs         0\n",
      "restecg     0\n",
      "thalach     0\n",
      "exang       0\n",
      "oldpeak     0\n",
      "slope       0\n",
      "ca          0\n",
      "thal        0\n",
      "target      0\n",
      "dtype: int64\n",
      "Statistical measures:\n",
      "               age         sex          cp    trestbps        chol         fbs  \\\n",
      "count  303.000000  303.000000  303.000000  303.000000  303.000000  303.000000   \n",
      "mean    54.366337    0.683168    0.966997  131.623762  246.264026    0.148515   \n",
      "std      9.082101    0.466011    1.032052   17.538143   51.830751    0.356198   \n",
      "min     29.000000    0.000000    0.000000   94.000000  126.000000    0.000000   \n",
      "25%     47.500000    0.000000    0.000000  120.000000  211.000000    0.000000   \n",
      "50%     55.000000    1.000000    1.000000  130.000000  240.000000    0.000000   \n",
      "75%     61.000000    1.000000    2.000000  140.000000  274.500000    0.000000   \n",
      "max     77.000000    1.000000    3.000000  200.000000  564.000000    1.000000   \n",
      "\n",
      "          restecg     thalach       exang     oldpeak       slope          ca  \\\n",
      "count  303.000000  303.000000  303.000000  303.000000  303.000000  303.000000   \n",
      "mean     0.528053  149.646865    0.326733    1.039604    1.399340    0.729373   \n",
      "std      0.525860   22.905161    0.469794    1.161075    0.616226    1.022606   \n",
      "min      0.000000   71.000000    0.000000    0.000000    0.000000    0.000000   \n",
      "25%      0.000000  133.500000    0.000000    0.000000    1.000000    0.000000   \n",
      "50%      1.000000  153.000000    0.000000    0.800000    1.000000    0.000000   \n",
      "75%      1.000000  166.000000    1.000000    1.600000    2.000000    1.000000   \n",
      "max      2.000000  202.000000    1.000000    6.200000    2.000000    4.000000   \n",
      "\n",
      "             thal      target  \n",
      "count  303.000000  303.000000  \n",
      "mean     2.313531    0.544554  \n",
      "std      0.612277    0.498835  \n",
      "min      0.000000    0.000000  \n",
      "25%      2.000000    0.000000  \n",
      "50%      2.000000    1.000000  \n",
      "75%      3.000000    1.000000  \n",
      "max      3.000000    1.000000  \n",
      "Target distribution:\n",
      " target\n",
      "1    165\n",
      "0    138\n",
      "Name: count, dtype: int64\n",
      "Features:\n",
      "      age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \\\n",
      "0     63    1   3       145   233    1        0      150      0      2.3   \n",
      "1     37    1   2       130   250    0        1      187      0      3.5   \n",
      "2     41    0   1       130   204    0        0      172      0      1.4   \n",
      "3     56    1   1       120   236    0        1      178      0      0.8   \n",
      "4     57    0   0       120   354    0        1      163      1      0.6   \n",
      "..   ...  ...  ..       ...   ...  ...      ...      ...    ...      ...   \n",
      "298   57    0   0       140   241    0        1      123      1      0.2   \n",
      "299   45    1   3       110   264    0        1      132      0      1.2   \n",
      "300   68    1   0       144   193    1        1      141      0      3.4   \n",
      "301   57    1   0       130   131    0        1      115      1      1.2   \n",
      "302   57    0   1       130   236    0        0      174      0      0.0   \n",
      "\n",
      "     slope  ca  thal  \n",
      "0        0   0     1  \n",
      "1        0   0     2  \n",
      "2        2   0     2  \n",
      "3        2   0     2  \n",
      "4        2   0     2  \n",
      "..     ...  ..   ...  \n",
      "298      1   0     3  \n",
      "299      1   0     3  \n",
      "300      1   2     3  \n",
      "301      1   1     3  \n",
      "302      1   1     2  \n",
      "\n",
      "[303 rows x 13 columns]\n",
      "Target:\n",
      " 0      1\n",
      "1      1\n",
      "2      1\n",
      "3      1\n",
      "4      1\n",
      "      ..\n",
      "298    0\n",
      "299    0\n",
      "300    0\n",
      "301    0\n",
      "302    0\n",
      "Name: target, Length: 303, dtype: int64\n",
      "Shapes: (303, 13) (242, 13) (61, 13)\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n",
    "try:\n",
    "    heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n",
    "except Exception as e:\n",
    "    print(f\"Error loading CSV: {e}\")\n",
    "    raise\n",
    "\n",
    "print(\"First 5 rows:\\n\", heart_data.head())\n",
    "print(\"Last 5 rows:\\n\", heart_data.tail())\n",
    "print(\"Shape:\", heart_data.shape)\n",
    "print(\"Info:\\n\", heart_data.info())\n",
    "print(\"Missing values:\\n\", heart_data.isnull().sum())\n",
    "print(\"Statistical measures:\\n\", heart_data.describe())\n",
    "print(\"Target distribution:\\n\", heart_data['target'].value_counts())\n",
    "\n",
    "X = heart_data.drop(columns='target', axis=1)\n",
    "Y = heart_data['target']\n",
    "print(\"Features:\\n\", X)\n",
    "print(\"Target:\\n\", Y)\n",
    "\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n",
    "print(\"Shapes:\", X.shape, X_train.shape, X_test.shape)\n",
    "\n",
    "X_train.to_csv(\"X_train.csv\", index=False)\n",
    "X_test.to_csv(\"X_test.csv\", index=False)\n",
    "Y_train.to_csv(\"Y_train.csv\", index=False)\n",
    "Y_test.to_csv(\"Y_test.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "e39ba42f-2e67-4dc9-80a2-d76a46159df5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy on Training data: 0.8553719008264463\n",
      "Accuracy on Test data: 0.8032786885245902\n",
      "\n",
      "Prediction: [0]\n",
      "\n",
      "The Person does not have Heart Disease\n"
     ]
    }
   ],
   "source": [
    "# heart_disease_prediction_model.py\n",
    "\n",
    "url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n",
    "heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n",
    "X = heart_data.drop(columns='target', axis=1)\n",
    "Y = heart_data['target']\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n",
    "\n",
    "model = LogisticRegression(max_iter=1000)\n",
    "model.fit(X_train, Y_train)\n",
    "\n",
    "X_train_prediction = model.predict(X_train)\n",
    "training_data_accuracy = accuracy_score(X_train_prediction, Y_train)\n",
    "print('Accuracy on Training data:', training_data_accuracy)\n",
    "\n",
    "X_test_prediction = model.predict(X_test)\n",
    "test_data_accuracy = accuracy_score(X_test_prediction, Y_test)\n",
    "print('Accuracy on Test data:', test_data_accuracy)\n",
    "\n",
    "input_data = [62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2]\n",
    "input_data_df = pd.DataFrame([input_data], columns=X.columns)\n",
    "prediction = model.predict(input_data_df)\n",
    "print(\"\\nPrediction:\", prediction)\n",
    "\n",
    "if prediction[0] == 0:\n",
    "    print('\\nThe Person does not have Heart Disease')\n",
    "else:\n",
    "    print('\\nThe Person has Heart Disease')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ffa3ec7a-82eb-4774-9967-903c0aa15c2c",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}