In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

url = "https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv"
try:
    heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')
except Exception as e:
    print(f"Error loading CSV: {e}")
    raise

print("First 5 rows:\n", heart_data.head())
print("Last 5 rows:\n", heart_data.tail())
print("Shape:", heart_data.shape)
print("Info:\n", heart_data.info())
print("Missing values:\n", heart_data.isnull().sum())
print("Statistical measures:\n", heart_data.describe())
print("Target distribution:\n", heart_data['target'].value_counts())

X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
print("Features:\n", X)
print("Target:\n", Y)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)
print("Shapes:", X.shape, X_train.shape, X_test.shape)

X_train.to_csv("X_train.csv", index=False)
X_test.to_csv("X_test.csv", index=False)
Y_train.to_csv("Y_train.csv", index=False)
Y_test.to_csv("Y_test.csv", index=False)

First 5 rows:
    age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  slope  \
0   63    1   3       145   233    1        0      150      0      2.3      0   
1   37    1   2       130   250    0        1      187      0      3.5      0   
2   41    0   1       130   204    0        0      172      0      1.4      2   
3   56    1   1       120   236    0        1      178      0      0.8      2   
4   57    0   0       120   354    0        1      163      1      0.6      2   

   ca  thal  target  
0   0     1       1  
1   0     2       1  
2   0     2       1  
3   0     2       1  
4   0     2       1  
Last 5 rows:
      age  sex  cp  trestbps  chol  fbs  restecg  thalach  exang  oldpeak  \
298   57    0   0       140   241    0        1      123      1      0.2   
299   45    1   3       110   264    0        1      132      0      1.2   
300   68    1   0       144   193    1        1      141      0      3.4   
301   57    1   0       130   131    0        1 

In [4]:
# heart_disease_prediction_model.py

url = "https://raw.githubusercontent.com/akilan0303/Heart-Disease-Prediction/main/heart_disease_data.csv"
heart_data = pd.read_csv(url, engine='python', on_bad_lines='skip')
X = heart_data.drop(columns='target', axis=1)
Y = heart_data['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, Y_train)

X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)
print('Accuracy on Training data:', training_data_accuracy)

X_test_prediction = model.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)
print('Accuracy on Test data:', test_data_accuracy)

input_data = [62, 0, 0, 140, 268, 0, 0, 160, 0, 3.6, 0, 2, 2]
input_data_df = pd.DataFrame([input_data], columns=X.columns)
prediction = model.predict(input_data_df)
print("\nPrediction:", prediction)

if prediction[0] == 0:
    print('\nThe Person does not have Heart Disease')
else:
    print('\nThe Person has Heart Disease')

Accuracy on Training data: 0.8553719008264463
Accuracy on Test data: 0.8032786885245902

Prediction: [0]

The Person does not have Heart Disease
