|
a |
|
b/Heart_Disease_Prediction.ipynb |
|
|
1 |
{ |
|
|
2 |
"cells": [ |
|
|
3 |
{ |
|
|
4 |
"cell_type": "code", |
|
|
5 |
"execution_count": 2, |
|
|
6 |
"id": "ab17373c-d5c2-41f8-9494-8e779bdc3ef8", |
|
|
7 |
"metadata": {}, |
|
|
8 |
"outputs": [ |
|
|
9 |
{ |
|
|
10 |
"name": "stdout", |
|
|
11 |
"output_type": "stream", |
|
|
12 |
"text": [ |
|
|
13 |
"First 5 rows:\n", |
|
|
14 |
" age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", |
|
|
15 |
"0 63 1 3 145 233 1 0 150 0 2.3 0 \n", |
|
|
16 |
"1 37 1 2 130 250 0 1 187 0 3.5 0 \n", |
|
|
17 |
"2 41 0 1 130 204 0 0 172 0 1.4 2 \n", |
|
|
18 |
"3 56 1 1 120 236 0 1 178 0 0.8 2 \n", |
|
|
19 |
"4 57 0 0 120 354 0 1 163 1 0.6 2 \n", |
|
|
20 |
"\n", |
|
|
21 |
" ca thal target \n", |
|
|
22 |
"0 0 1 1 \n", |
|
|
23 |
"1 0 2 1 \n", |
|
|
24 |
"2 0 2 1 \n", |
|
|
25 |
"3 0 2 1 \n", |
|
|
26 |
"4 0 2 1 \n", |
|
|
27 |
"Last 5 rows:\n", |
|
|
28 |
" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", |
|
|
29 |
"298 57 0 0 140 241 0 1 123 1 0.2 \n", |
|
|
30 |
"299 45 1 3 110 264 0 1 132 0 1.2 \n", |
|
|
31 |
"300 68 1 0 144 193 1 1 141 0 3.4 \n", |
|
|
32 |
"301 57 1 0 130 131 0 1 115 1 1.2 \n", |
|
|
33 |
"302 57 0 1 130 236 0 0 174 0 0.0 \n", |
|
|
34 |
"\n", |
|
|
35 |
" slope ca thal target \n", |
|
|
36 |
"298 1 0 3 0 \n", |
|
|
37 |
"299 1 0 3 0 \n", |
|
|
38 |
"300 1 2 3 0 \n", |
|
|
39 |
"301 1 1 3 0 \n", |
|
|
40 |
"302 1 1 2 0 \n", |
|
|
41 |
"Shape: (303, 14)\n", |
|
|
42 |
"<class 'pandas.core.frame.DataFrame'>\n", |
|
|
43 |
"RangeIndex: 303 entries, 0 to 302\n", |
|
|
44 |
"Data columns (total 14 columns):\n", |
|
|
45 |
" # Column Non-Null Count Dtype \n", |
|
|
46 |
"--- ------ -------------- ----- \n", |
|
|
47 |
" 0 age 303 non-null int64 \n", |
|
|
48 |
" 1 sex 303 non-null int64 \n", |
|
|
49 |
" 2 cp 303 non-null int64 \n", |
|
|
50 |
" 3 trestbps 303 non-null int64 \n", |
|
|
51 |
" 4 chol 303 non-null int64 \n", |
|
|
52 |
" 5 fbs 303 non-null int64 \n", |
|
|
53 |
" 6 restecg 303 non-null int64 \n", |
|
|
54 |
" 7 thalach 303 non-null int64 \n", |
|
|
55 |
" 8 exang 303 non-null int64 \n", |
|
|
56 |
" 9 oldpeak 303 non-null float64\n", |
|
|
57 |
" 10 slope 303 non-null int64 \n", |
|
|
58 |
" 11 ca 303 non-null int64 \n", |
|
|
59 |
" 12 thal 303 non-null int64 \n", |
|
|
60 |
" 13 target 303 non-null int64 \n", |
|
|
61 |
"dtypes: float64(1), int64(13)\n", |
|
|
62 |
"memory usage: 33.3 KB\n", |
|
|
63 |
"Info:\n", |
|
|
64 |
" None\n", |
|
|
65 |
"Missing values:\n", |
|
|
66 |
" age 0\n", |
|
|
67 |
"sex 0\n", |
|
|
68 |
"cp 0\n", |
|
|
69 |
"trestbps 0\n", |
|
|
70 |
"chol 0\n", |
|
|
71 |
"fbs 0\n", |
|
|
72 |
"restecg 0\n", |
|
|
73 |
"thalach 0\n", |
|
|
74 |
"exang 0\n", |
|
|
75 |
"oldpeak 0\n", |
|
|
76 |
"slope 0\n", |
|
|
77 |
"ca 0\n", |
|
|
78 |
"thal 0\n", |
|
|
79 |
"target 0\n", |
|
|
80 |
"dtype: int64\n", |
|
|
81 |
"Statistical measures:\n", |
|
|
82 |
" age sex cp trestbps chol fbs \\\n", |
|
|
83 |
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", |
|
|
84 |
"mean 54.366337 0.683168 0.966997 131.623762 246.264026 0.148515 \n", |
|
|
85 |
"std 9.082101 0.466011 1.032052 17.538143 51.830751 0.356198 \n", |
|
|
86 |
"min 29.000000 0.000000 0.000000 94.000000 126.000000 0.000000 \n", |
|
|
87 |
"25% 47.500000 0.000000 0.000000 120.000000 211.000000 0.000000 \n", |
|
|
88 |
"50% 55.000000 1.000000 1.000000 130.000000 240.000000 0.000000 \n", |
|
|
89 |
"75% 61.000000 1.000000 2.000000 140.000000 274.500000 0.000000 \n", |
|
|
90 |
"max 77.000000 1.000000 3.000000 200.000000 564.000000 1.000000 \n", |
|
|
91 |
"\n", |
|
|
92 |
" restecg thalach exang oldpeak slope ca \\\n", |
|
|
93 |
"count 303.000000 303.000000 303.000000 303.000000 303.000000 303.000000 \n", |
|
|
94 |
"mean 0.528053 149.646865 0.326733 1.039604 1.399340 0.729373 \n", |
|
|
95 |
"std 0.525860 22.905161 0.469794 1.161075 0.616226 1.022606 \n", |
|
|
96 |
"min 0.000000 71.000000 0.000000 0.000000 0.000000 0.000000 \n", |
|
|
97 |
"25% 0.000000 133.500000 0.000000 0.000000 1.000000 0.000000 \n", |
|
|
98 |
"50% 1.000000 153.000000 0.000000 0.800000 1.000000 0.000000 \n", |
|
|
99 |
"75% 1.000000 166.000000 1.000000 1.600000 2.000000 1.000000 \n", |
|
|
100 |
"max 2.000000 202.000000 1.000000 6.200000 2.000000 4.000000 \n", |
|
|
101 |
"\n", |
|
|
102 |
" thal target \n", |
|
|
103 |
"count 303.000000 303.000000 \n", |
|
|
104 |
"mean 2.313531 0.544554 \n", |
|
|
105 |
"std 0.612277 0.498835 \n", |
|
|
106 |
"min 0.000000 0.000000 \n", |
|
|
107 |
"25% 2.000000 0.000000 \n", |
|
|
108 |
"50% 2.000000 1.000000 \n", |
|
|
109 |
"75% 3.000000 1.000000 \n", |
|
|
110 |
"max 3.000000 1.000000 \n", |
|
|
111 |
"Target distribution:\n", |
|
|
112 |
" target\n", |
|
|
113 |
"1 165\n", |
|
|
114 |
"0 138\n", |
|
|
115 |
"Name: count, dtype: int64\n", |
|
|
116 |
"Features:\n", |
|
|
117 |
" age sex cp trestbps chol fbs restecg thalach exang oldpeak \\\n", |
|
|
118 |
"0 63 1 3 145 233 1 0 150 0 2.3 \n", |
|
|
119 |
"1 37 1 2 130 250 0 1 187 0 3.5 \n", |
|
|
120 |
"2 41 0 1 130 204 0 0 172 0 1.4 \n", |
|
|
121 |
"3 56 1 1 120 236 0 1 178 0 0.8 \n", |
|
|
122 |
"4 57 0 0 120 354 0 1 163 1 0.6 \n", |
|
|
123 |
".. ... ... .. ... ... ... ... ... ... ... \n", |
|
|
124 |
"298 57 0 0 140 241 0 1 123 1 0.2 \n", |
|
|
125 |
"299 45 1 3 110 264 0 1 132 0 1.2 \n", |
|
|
126 |
"300 68 1 0 144 193 1 1 141 0 3.4 \n", |
|
|
127 |
"301 57 1 0 130 131 0 1 115 1 1.2 \n", |
|
|
128 |
"302 57 0 1 130 236 0 0 174 0 0.0 \n", |
|
|
129 |
"\n", |
|
|
130 |
" slope ca thal \n", |
|
|
131 |
"0 0 0 1 \n", |
|
|
132 |
"1 0 0 2 \n", |
|
|
133 |
"2 2 0 2 \n", |
|
|
134 |
"3 2 0 2 \n", |
|
|
135 |
"4 2 0 2 \n", |
|
|
136 |
".. ... .. ... \n", |
|
|
137 |
"298 1 0 3 \n", |
|
|
138 |
"299 1 0 3 \n", |
|
|
139 |
"300 1 2 3 \n", |
|
|
140 |
"301 1 1 3 \n", |
|
|
141 |
"302 1 1 2 \n", |
|
|
142 |
"\n", |
|
|
143 |
"[303 rows x 13 columns]\n", |
|
|
144 |
"Target:\n", |
|
|
145 |
" 0 1\n", |
|
|
146 |
"1 1\n", |
|
|
147 |
"2 1\n", |
|
|
148 |
"3 1\n", |
|
|
149 |
"4 1\n", |
|
|
150 |
" ..\n", |
|
|
151 |
"298 0\n", |
|
|
152 |
"299 0\n", |
|
|
153 |
"300 0\n", |
|
|
154 |
"301 0\n", |
|
|
155 |
"302 0\n", |
|
|
156 |
"Name: target, Length: 303, dtype: int64\n", |
|
|
157 |
"Shapes: (303, 13) (242, 13) (61, 13)\n" |
|
|
158 |
] |
|
|
159 |
} |
|
|
160 |
], |
|
|
161 |
"source": [ |
|
|
162 |
"import numpy as np\n", |
|
|
163 |
"import pandas as pd\n", |
|
|
164 |
"from sklearn.model_selection import train_test_split\n", |
|
|
165 |
"from sklearn.linear_model import LogisticRegression\n", |
|
|
166 |
"from sklearn.metrics import accuracy_score\n", |
|
|
167 |
"\n", |
|
|
168 |
"url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n", |
|
|
169 |
"try:\n", |
|
|
170 |
" heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n", |
|
|
171 |
"except Exception as e:\n", |
|
|
172 |
" print(f\"Error loading CSV: {e}\")\n", |
|
|
173 |
" raise\n", |
|
|
174 |
"\n", |
|
|
175 |
"print(\"First 5 rows:\\n\", heart_data.head())\n", |
|
|
176 |
"print(\"Last 5 rows:\\n\", heart_data.tail())\n", |
|
|
177 |
"print(\"Shape:\", heart_data.shape)\n", |
|
|
178 |
"print(\"Info:\\n\", heart_data.info())\n", |
|
|
179 |
"print(\"Missing values:\\n\", heart_data.isnull().sum())\n", |
|
|
180 |
"print(\"Statistical measures:\\n\", heart_data.describe())\n", |
|
|
181 |
"print(\"Target distribution:\\n\", heart_data['target'].value_counts())\n", |
|
|
182 |
"\n", |
|
|
183 |
"X = heart_data.drop(columns='target', axis=1)\n", |
|
|
184 |
"Y = heart_data['target']\n", |
|
|
185 |
"print(\"Features:\\n\", X)\n", |
|
|
186 |
"print(\"Target:\\n\", Y)\n", |
|
|
187 |
"\n", |
|
|
188 |
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n", |
|
|
189 |
"print(\"Shapes:\", X.shape, X_train.shape, X_test.shape)\n", |
|
|
190 |
"\n", |
|
|
191 |
"X_train.to_csv(\"X_train.csv\", index=False)\n", |
|
|
192 |
"X_test.to_csv(\"X_test.csv\", index=False)\n", |
|
|
193 |
"Y_train.to_csv(\"Y_train.csv\", index=False)\n", |
|
|
194 |
"Y_test.to_csv(\"Y_test.csv\", index=False)" |
|
|
195 |
] |
|
|
196 |
}, |
|
|
197 |
{ |
|
|
198 |
"cell_type": "code", |
|
|
199 |
"execution_count": 4, |
|
|
200 |
"id": "e39ba42f-2e67-4dc9-80a2-d76a46159df5", |
|
|
201 |
"metadata": {}, |
|
|
202 |
"outputs": [ |
|
|
203 |
{ |
|
|
204 |
"name": "stdout", |
|
|
205 |
"output_type": "stream", |
|
|
206 |
"text": [ |
|
|
207 |
"Accuracy on Training data: 0.8553719008264463\n", |
|
|
208 |
"Accuracy on Test data: 0.8032786885245902\n", |
|
|
209 |
"\n", |
|
|
210 |
"Prediction: [0]\n", |
|
|
211 |
"\n", |
|
|
212 |
"The Person does not have Heart Disease\n" |
|
|
213 |
] |
|
|
214 |
} |
|
|
215 |
], |
|
|
216 |
"source": [ |
|
|
217 |
"# heart_disease_prediction_model.py\n", |
|
|
218 |
"\n", |
|
|
219 |
"url = \"https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv\"\n", |
|
|
220 |
"heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')\n", |
|
|
221 |
"X = heart_data.drop(columns='target', axis=1)\n", |
|
|
222 |
"Y = heart_data['target']\n", |
|
|
223 |
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)\n", |
|
|
224 |
"\n", |
|
|
225 |
"model = LogisticRegression(max_iter=1000)\n", |
|
|
226 |
"model.fit(X_train, Y_train)\n", |
|
|
227 |
"\n", |
|
|
228 |
"X_train_prediction = model.predict(X_train)\n", |
|
|
229 |
"training_data_accuracy = accuracy_score(X_train_prediction, Y_train)\n", |
|
|
230 |
"print('Accuracy on Training data:', training_data_accuracy)\n", |
|
|
231 |
"\n", |
|
|
232 |
"X_test_prediction = model.predict(X_test)\n", |
|
|
233 |
"test_data_accuracy = accuracy_score(X_test_prediction, Y_test)\n", |
|
|
234 |
"print('Accuracy on Test data:', test_data_accuracy)\n", |
|
|
235 |
"\n", |
|
|
236 |
"input_data = [62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2]\n", |
|
|
237 |
"input_data_df = pd.DataFrame([input_data], columns=X.columns)\n", |
|
|
238 |
"prediction = model.predict(input_data_df)\n", |
|
|
239 |
"print(\"\\nPrediction:\", prediction)\n", |
|
|
240 |
"\n", |
|
|
241 |
"if prediction[0] == 0:\n", |
|
|
242 |
" print('\\nThe Person does not have Heart Disease')\n", |
|
|
243 |
"else:\n", |
|
|
244 |
" print('\\nThe Person has Heart Disease')" |
|
|
245 |
] |
|
|
246 |
}, |
|
|
247 |
{ |
|
|
248 |
"cell_type": "code", |
|
|
249 |
"execution_count": null, |
|
|
250 |
"id": "ffa3ec7a-82eb-4774-9967-903c0aa15c2c", |
|
|
251 |
"metadata": {}, |
|
|
252 |
"outputs": [], |
|
|
253 |
"source": [] |
|
|
254 |
} |
|
|
255 |
], |
|
|
256 |
"metadata": { |
|
|
257 |
"kernelspec": { |
|
|
258 |
"display_name": "Python 3 (ipykernel)", |
|
|
259 |
"language": "python", |
|
|
260 |
"name": "python3" |
|
|
261 |
}, |
|
|
262 |
"language_info": { |
|
|
263 |
"codemirror_mode": { |
|
|
264 |
"name": "ipython", |
|
|
265 |
"version": 3 |
|
|
266 |
}, |
|
|
267 |
"file_extension": ".py", |
|
|
268 |
"mimetype": "text/x-python", |
|
|
269 |
"name": "python", |
|
|
270 |
"nbconvert_exporter": "python", |
|
|
271 |
"pygments_lexer": "ipython3", |
|
|
272 |
"version": "3.12.7" |
|
|
273 |
} |
|
|
274 |
}, |
|
|
275 |
"nbformat": 4, |
|
|
276 |
"nbformat_minor": 5 |
|
|
277 |
} |