{ "cells": [ { "cell_type": "code", "execution_count": 55, "id": "e7fc773c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import scipy.stats as stats\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn import linear_model\n", "from sklearn import preprocessing\n", "df=pd.read_csv('heart_data.csv')\n", "\n", "#x_list=['BMI','PhysicalHealth','SleepTime']\n", "#x_data=df[x_list]\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 56, "id": "aa8974a4", "metadata": {}, "outputs": [], "source": [ "smoke_new=preprocessing.LabelEncoder()\n", "smoke_new=smoke_new.fit_transform(df['Smoking'])\n", "df['Smoking']=smoke_new" ] }, { "cell_type": "code", "execution_count": 65, "id": "f0f1b529", "metadata": {}, "outputs": [], "source": [ "columns=['HeartDisease','AlcoholDrinking','Stroke','DiffWalking','Diabetic','Sex','Diabetic','PhysicalActivity','Asthma','KidneyDisease','SkinCancer','Race','GenHealth','AgeCategory']\n", "for column in columns:\n", " temp=preprocessing.LabelEncoder()\n", " df[column]=temp.fit_transform(df[column])" ] }, { "cell_type": "code", "execution_count": 68, "id": "e4326dcd", "metadata": {}, "outputs": [], "source": [ "y_column='HeartDisease'\n", "feature_column=[x for x in df.columns if x != y_column]\n", "x_data=df[feature_column]\n", "y_data=df['HeartDisease']" ] }, { "cell_type": "code", "execution_count": 79, "id": "28aac296", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 292422\n", "1 27373\n", "Name: HeartDisease, dtype: int64" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['HeartDisease'].value_counts()" ] }, { "cell_type": "code", "execution_count": 69, "id": "65cea96c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | BMI | \n", "Smoking | \n", "AlcoholDrinking | \n", "Stroke | \n", "PhysicalHealth | \n", "MentalHealth | \n", "DiffWalking | \n", "Sex | \n", "AgeCategory | \n", "Race | \n", "Diabetic | \n", "PhysicalActivity | \n", "GenHealth | \n", "SleepTime | \n", "Asthma | \n", "KidneyDisease | \n", "SkinCancer | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-1.844750 | \n", "1.193474 | \n", "-0.27032 | \n", "-0.198040 | \n", "-0.046751 | \n", "3.281069 | \n", "-0.401578 | \n", "-0.951711 | \n", "0.136184 | \n", "0.497653 | \n", "2.372175 | \n", "0.538256 | \n", "1.159288 | \n", "-1.460354 | \n", "2.541515 | \n", "-0.195554 | \n", "3.118419 | \n", "
1 | \n", "-1.256338 | \n", "-0.837890 | \n", "-0.27032 | \n", "5.049478 | \n", "-0.424070 | \n", "-0.490039 | \n", "-0.401578 | \n", "-0.951711 | \n", "1.538806 | \n", "0.497653 | \n", "-0.419253 | \n", "0.538256 | \n", "1.159288 | \n", "-0.067601 | \n", "-0.393466 | \n", "-0.195554 | \n", "-0.320675 | \n", "
2 | \n", "-0.274603 | \n", "1.193474 | \n", "-0.27032 | \n", "-0.198040 | \n", "2.091388 | \n", "3.281069 | \n", "-0.401578 | \n", "1.050739 | \n", "0.697233 | \n", "0.497653 | \n", "2.372175 | \n", "0.538256 | \n", "-0.795561 | \n", "0.628776 | \n", "2.541515 | \n", "-0.195554 | \n", "-0.320675 | \n", "
3 | \n", "-0.647473 | \n", "-0.837890 | \n", "-0.27032 | \n", "-0.198040 | \n", "-0.424070 | \n", "-0.490039 | \n", "-0.401578 | \n", "-0.951711 | \n", "1.258282 | \n", "0.497653 | \n", "-0.419253 | \n", "-1.857852 | \n", "-0.143945 | \n", "-0.763977 | \n", "-0.393466 | \n", "-0.195554 | \n", "3.118419 | \n", "
4 | \n", "-0.726138 | \n", "-0.837890 | \n", "-0.27032 | \n", "-0.198040 | \n", "3.097572 | \n", "-0.490039 | \n", "2.490174 | \n", "-0.951711 | \n", "-0.705388 | \n", "0.497653 | \n", "-0.419253 | \n", "0.538256 | \n", "1.159288 | \n", "0.628776 | \n", "-0.393466 | \n", "-0.195554 | \n", "-0.320675 | \n", "