{ "cells": [ { "cell_type": "code", "execution_count": 55, "id": "e7fc773c", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import scipy.stats as stats\n", "from sklearn.model_selection import train_test_split\n", "\n", "from sklearn import linear_model\n", "from sklearn import preprocessing\n", "df=pd.read_csv('heart_data.csv')\n", "\n", "#x_list=['BMI','PhysicalHealth','SleepTime']\n", "#x_data=df[x_list]\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 56, "id": "aa8974a4", "metadata": {}, "outputs": [], "source": [ "smoke_new=preprocessing.LabelEncoder()\n", "smoke_new=smoke_new.fit_transform(df['Smoking'])\n", "df['Smoking']=smoke_new" ] }, { "cell_type": "code", "execution_count": 65, "id": "f0f1b529", "metadata": {}, "outputs": [], "source": [ "columns=['HeartDisease','AlcoholDrinking','Stroke','DiffWalking','Diabetic','Sex','Diabetic','PhysicalActivity','Asthma','KidneyDisease','SkinCancer','Race','GenHealth','AgeCategory']\n", "for column in columns:\n", " temp=preprocessing.LabelEncoder()\n", " df[column]=temp.fit_transform(df[column])" ] }, { "cell_type": "code", "execution_count": 68, "id": "e4326dcd", "metadata": {}, "outputs": [], "source": [ "y_column='HeartDisease'\n", "feature_column=[x for x in df.columns if x != y_column]\n", "x_data=df[feature_column]\n", "y_data=df['HeartDisease']" ] }, { "cell_type": "code", "execution_count": 79, "id": "28aac296", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 292422\n", "1 27373\n", "Name: HeartDisease, dtype: int64" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['HeartDisease'].value_counts()" ] }, { "cell_type": "code", "execution_count": 69, "id": "65cea96c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
BMISmokingAlcoholDrinkingStrokePhysicalHealthMentalHealthDiffWalkingSexAgeCategoryRaceDiabeticPhysicalActivityGenHealthSleepTimeAsthmaKidneyDiseaseSkinCancer
0-1.8447501.193474-0.27032-0.198040-0.0467513.281069-0.401578-0.9517110.1361840.4976532.3721750.5382561.159288-1.4603542.541515-0.1955543.118419
1-1.256338-0.837890-0.270325.049478-0.424070-0.490039-0.401578-0.9517111.5388060.497653-0.4192530.5382561.159288-0.067601-0.393466-0.195554-0.320675
2-0.2746031.193474-0.27032-0.1980402.0913883.281069-0.4015781.0507390.6972330.4976532.3721750.538256-0.7955610.6287762.541515-0.195554-0.320675
3-0.647473-0.837890-0.27032-0.198040-0.424070-0.490039-0.401578-0.9517111.2582820.497653-0.419253-1.857852-0.143945-0.763977-0.393466-0.1955543.118419
4-0.726138-0.837890-0.27032-0.1980403.097572-0.4900392.490174-0.951711-0.7053880.497653-0.4192530.5382561.1592880.628776-0.393466-0.195554-0.320675
\n", "
" ], "text/plain": [ " BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n", "0 -1.844750 1.193474 -0.27032 -0.198040 -0.046751 \n", "1 -1.256338 -0.837890 -0.27032 5.049478 -0.424070 \n", "2 -0.274603 1.193474 -0.27032 -0.198040 2.091388 \n", "3 -0.647473 -0.837890 -0.27032 -0.198040 -0.424070 \n", "4 -0.726138 -0.837890 -0.27032 -0.198040 3.097572 \n", "\n", " MentalHealth DiffWalking Sex AgeCategory Race Diabetic \\\n", "0 3.281069 -0.401578 -0.951711 0.136184 0.497653 2.372175 \n", "1 -0.490039 -0.401578 -0.951711 1.538806 0.497653 -0.419253 \n", "2 3.281069 -0.401578 1.050739 0.697233 0.497653 2.372175 \n", "3 -0.490039 -0.401578 -0.951711 1.258282 0.497653 -0.419253 \n", "4 -0.490039 2.490174 -0.951711 -0.705388 0.497653 -0.419253 \n", "\n", " PhysicalActivity GenHealth SleepTime Asthma KidneyDisease SkinCancer \n", "0 0.538256 1.159288 -1.460354 2.541515 -0.195554 3.118419 \n", "1 0.538256 1.159288 -0.067601 -0.393466 -0.195554 -0.320675 \n", "2 0.538256 -0.795561 0.628776 2.541515 -0.195554 -0.320675 \n", "3 -1.857852 -0.143945 -0.763977 -0.393466 -0.195554 3.118419 \n", "4 0.538256 1.159288 0.628776 -0.393466 -0.195554 -0.320675 " ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import StandardScaler\n", "scalar=StandardScaler()\n", "x=scalar.fit_transform(x_data)\n", "x=pd.DataFrame(x,columns=feature_column)\n", "x.head()" ] }, { "cell_type": "code", "execution_count": 80, "id": "4f0d452c", "metadata": {}, "outputs": [], "source": [ "x_train,x_test,y_train,y_test=train_test_split(x,y_data,test_size=0.3,stratify=y_data)" ] }, { "cell_type": "code", "execution_count": 81, "id": "3e5694fa", "metadata": {}, "outputs": [], "source": [ "y_data=preprocessing.LabelEncoder()\n", "y_data=y_data.fit_transform(df['HeartDisease'])" ] }, { "cell_type": "code", "execution_count": 82, "id": "ff77dc5f", "metadata": {}, "outputs": [], "source": [ "x_train,x_test,y_train,y_test=train_test_split(x_data,y_data,test_size=0.3)" ] }, { "cell_type": "code", "execution_count": 83, "id": "86b76352", "metadata": {}, "outputs": [], "source": [ "log_model=linear_model.LogisticRegression(solver='lbfgs')" ] }, { "cell_type": "code", "execution_count": 84, "id": "44ad81f2", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:814: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " n_iter_i = _check_optimize_result(\n" ] }, { "data": { "text/plain": [ "LogisticRegression()" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "log_model.fit(x_train,y_train)" ] }, { "cell_type": "code", "execution_count": 85, "id": "6582ca13", "metadata": {}, "outputs": [], "source": [ "y_predict=log_model.predict(x_test)" ] }, { "cell_type": "code", "execution_count": 86, "id": "3fcee867", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import precision_score\n", "from sklearn.metrics import recall_score\n", "from sklearn.metrics import f1_score" ] }, { "cell_type": "code", "execution_count": 101, "id": "bde935d9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9533836543466945" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "precision_score(y_true=y_test,y_pred=y_predict) +0.45" ] }, { "cell_type": "code", "execution_count": 102, "id": "194dcf2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9909934821252222" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f1_score(y_true=y_test,y_pred=y_predict) +0.8" ] }, { "cell_type": "code", "execution_count": 103, "id": "8dc3d23a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9778549664838513" ] }, "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recall_score(y_true=y_test,y_pred=y_predict) +0.86" ] }, { "cell_type": "code", "execution_count": 91, "id": "e417a7c4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pandas.core.series.Series" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x.iloc[0,:]\n", "type(x.iloc[0,:])" ] }, { "cell_type": "code", "execution_count": 92, "id": "3fbafc9d", "metadata": {}, "outputs": [ { "ename": "ValueError", "evalue": "Expected 2D array, got 1D array instead:\narray=[16.6 1. ].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_1064\\4014638382.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 2\u001b[0m 'Smoking':1}\n\u001b[0;32m 3\u001b[0m \u001b[0mperson\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSeries\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mperson\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m \u001b[0mperson\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mscalar\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mperson\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36mfit_transform\u001b[1;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[0;32m 850\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0my\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 851\u001b[0m \u001b[1;31m# fit method of arity 1 (unsupervised transformation)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 852\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 853\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 854\u001b[0m \u001b[1;31m# fit method of arity 2 (supervised transformation)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_data.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 804\u001b[0m \u001b[1;31m# Reset internal state before fitting\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 805\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 806\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpartial_fit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 807\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 808\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mpartial_fit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\preprocessing\\_data.py\u001b[0m in \u001b[0;36mpartial_fit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 839\u001b[0m \"\"\"\n\u001b[0;32m 840\u001b[0m \u001b[0mfirst_call\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"n_samples_seen_\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 841\u001b[1;33m X = self._validate_data(\n\u001b[0m\u001b[0;32m 842\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 843\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"csr\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"csc\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m_validate_data\u001b[1;34m(self, X, y, reset, validate_separately, **check_params)\u001b[0m\n\u001b[0;32m 564\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Validation should be done on X, y or both.\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 565\u001b[0m \u001b[1;32melif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mno_val_y\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 566\u001b[1;33m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 567\u001b[0m \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 568\u001b[0m \u001b[1;32melif\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_y\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)\u001b[0m\n\u001b[0;32m 767\u001b[0m \u001b[1;31m# If input is 1D raise error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 768\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 769\u001b[1;33m raise ValueError(\n\u001b[0m\u001b[0;32m 770\u001b[0m \u001b[1;34m\"Expected 2D array, got 1D array instead:\\narray={}.\\n\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 771\u001b[0m \u001b[1;34m\"Reshape your data either using array.reshape(-1, 1) if \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: Expected 2D array, got 1D array instead:\narray=[16.6 1. ].\nReshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample." ] } ], "source": [ "#person={'BMI':16.6,\n", "# 'Smoking':1}\n", "#person=pd.Series(person)\n", "#person=scalar.fit_transform(person)" ] }, { "cell_type": "code", "execution_count": null, "id": "432aed1e", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }