278 lines (277 with data), 11.4 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "ab17373c-d5c2-41f8-9494-8e779bdc3ef8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"First 5 rows:\n",
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n",
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n",
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n",
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n",
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n",
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n",
"\n",
" ca thal target \n",
"0 0 1 1 \n",
"1 0 2 1 \n",
"2 0 2 1 \n",
"3 0 2 1 \n",
"4 0 2 1 \n",
"Last 5 rows:\n",
" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
"298 57 0 0 140 241 0 1 123 1 0.2 \n",
"299 45 1 3 110 264 0 1 132 0 1.2 \n",
"300 68 1 0 144 193 1 1 141 0 3.4 \n",
"301 57 1 0 130 131 0 1 115 1 1.2 \n",
"302 57 0 1 130 236 0 0 174 0 0.0 \n",
"\n",
" slope ca thal target \n",
"298 1 0 3 0 \n",
"299 1 0 3 0 \n",
"300 1 2 3 0 \n",
"301 1 1 3 0 \n",
"302 1 1 2 0 \n",
"Shape: (303, 14)\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 303 entries, 0 to 302\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 age 303 non-null int64 \n",
" 1 sex 303 non-null int64 \n",
" 2 cp 303 non-null int64 \n",
" 3 trestbps 303 non-null int64 \n",
" 4 chol 303 non-null int64 \n",
" 5 fbs 303 non-null int64 \n",
" 6 restecg 303 non-null int64 \n",
" 7 thalach 303 non-null int64 \n",
" 8 exang 303 non-null int64 \n",
" 9 oldpeak 303 non-null float64\n",
" 10 slope 303 non-null int64 \n",
" 11 ca 303 non-null int64 \n",
" 12 thal 303 non-null int64 \n",
" 13 target 303 non-null int64 \n",
"dtypes: float64(1), int64(13)\n",
"memory usage: 33.3 KB\n",
"Info:\n",
" None\n",
"Missing values:\n",
" age 0\n",
"sex 0\n",
"cp 0\n",
"trestbps 0\n",
"chol 0\n",
"fbs 0\n",
"restecg 0\n",
"thalach 0\n",
"exang 0\n",
"oldpeak 0\n",
"slope 0\n",
"ca 0\n",
"thal 0\n",
"target 0\n",
"dtype: int64\n",
"Statistical measures:\n",
" age sex cp trestbps chol fbs \\\n",
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
"mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n",
"std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n",
"min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n",
"25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n",
"50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n",
"75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n",
"max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n",
"\n",
" restecg thalach exang oldpeak slope ca \\\n",
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n",
"mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n",
"std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n",
"min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n",
"50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n",
"75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n",
"max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n",
"\n",
" thal target \n",
"count 303.000000 303.000000 \n",
"mean 2.313531 0.544554 \n",
"std 0.612277 0.498835 \n",
"min 0.000000 0.000000 \n",
"25% 2.000000 0.000000 \n",
"50% 2.000000 1.000000 \n",
"75% 3.000000 1.000000 \n",
"max 3.000000 1.000000 \n",
"Target distribution:\n",
" target\n",
"1 165\n",
"0 138\n",
"Name: count, dtype: int64\n",
"Features:\n",
" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n",
"0 63 1 3 145 233 1 0 150 0 2.3 \n",
"1 37 1 2 130 250 0 1 187 0 3.5 \n",
"2 41 0 1 130 204 0 0 172 0 1.4 \n",
"3 56 1 1 120 236 0 1 178 0 0.8 \n",
"4 57 0 0 120 354 0 1 163 1 0.6 \n",
".. ... ... .. ... ... ... ... ... ... ... \n",
"298 57 0 0 140 241 0 1 123 1 0.2 \n",
"299 45 1 3 110 264 0 1 132 0 1.2 \n",
"300 68 1 0 144 193 1 1 141 0 3.4 \n",
"301 57 1 0 130 131 0 1 115 1 1.2 \n",
"302 57 0 1 130 236 0 0 174 0 0.0 \n",
"\n",
" slope ca thal \n",
"0 0 0 1 \n",
"1 0 0 2 \n",
"2 2 0 2 \n",
"3 2 0 2 \n",
"4 2 0 2 \n",
".. ... .. ... \n",
"298 1 0 3 \n",
"299 1 0 3 \n",
"300 1 2 3 \n",
"301 1 1 3 \n",
"302 1 1 2 \n",
"\n",
"[303 rows x 13 columns]\n",
"Target:\n",
" 0 1\n",
"1 1\n",
"2 1\n",
"3 1\n",
"4 1\n",
" ..\n",
"298 0\n",
"299 0\n",
"300 0\n",
"301 0\n",
"302 0\n",
"Name: target, Length: 303, dtype: int64\n",
"Shapes: (303, 13) (242, 13) (61, 13)\n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n",
"try:\n",
" heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n",
"except Exception as e:\n",
" print(f\"Error loading CSV: {e}\")\n",
" raise\n",
"\n",
"print(\"First 5 rows:\\n\", heart_data.head())\n",
"print(\"Last 5 rows:\\n\", heart_data.tail())\n",
"print(\"Shape:\", heart_data.shape)\n",
"print(\"Info:\\n\", heart_data.info())\n",
"print(\"Missing values:\\n\", heart_data.isnull().sum())\n",
"print(\"Statistical measures:\\n\", heart_data.describe())\n",
"print(\"Target distribution:\\n\", heart_data['target'].value_counts())\n",
"\n",
"X = heart_data.drop(columns='target', axis=1)\n",
"Y = heart_data['target']\n",
"print(\"Features:\\n\", X)\n",
"print(\"Target:\\n\", Y)\n",
"\n",
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n",
"print(\"Shapes:\", X.shape, X_train.shape, X_test.shape)\n",
"\n",
"X_train.to_csv(\"X_train.csv\", index=False)\n",
"X_test.to_csv(\"X_test.csv\", index=False)\n",
"Y_train.to_csv(\"Y_train.csv\", index=False)\n",
"Y_test.to_csv(\"Y_test.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e39ba42f-2e67-4dc9-80a2-d76a46159df5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy on Training data: 0.8553719008264463\n",
"Accuracy on Test data: 0.8032786885245902\n",
"\n",
"Prediction: [0]\n",
"\n",
"The Person does not have Heart Disease\n"
]
}
],
"source": [
"# heart_disease_prediction_model.py\n",
"\n",
"url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n",
"heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n",
"X = heart_data.drop(columns='target', axis=1)\n",
"Y = heart_data['target']\n",
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n",
"\n",
"model = LogisticRegression(max_iter=1000)\n",
"model.fit(X_train, Y_train)\n",
"\n",
"X_train_prediction = model.predict(X_train)\n",
"training_data_accuracy = accuracy_score(X_train_prediction, Y_train)\n",
"print('Accuracy on Training data:', training_data_accuracy)\n",
"\n",
"X_test_prediction = model.predict(X_test)\n",
"test_data_accuracy = accuracy_score(X_test_prediction, Y_test)\n",
"print('Accuracy on Test data:', test_data_accuracy)\n",
"\n",
"input_data = [62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2]\n",
"input_data_df = pd.DataFrame([input_data], columns=X.columns)\n",
"prediction = model.predict(input_data_df)\n",
"print(\"\\nPrediction:\", prediction)\n",
"\n",
"if prediction[0] == 0:\n",
" print('\\nThe Person does not have Heart Disease')\n",
"else:\n",
" print('\\nThe Person has Heart Disease')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffa3ec7a-82eb-4774-9967-903c0aa15c2c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}