--- a +++ b/Model Buliding/first_notebook.ipynb @@ -0,0 +1,451 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Pregnancies</th>\n", + " <th>Glucose</th>\n", + " <th>BloodPressure</th>\n", + " <th>SkinThickness</th>\n", + " <th>Insulin</th>\n", + " <th>BMI</th>\n", + " <th>DiabetesPedigreeFunction</th>\n", + " <th>Age</th>\n", + " <th>Outcome</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>6</td>\n", + " <td>148</td>\n", + " <td>72</td>\n", + " <td>35</td>\n", + " <td>0</td>\n", + " <td>33.6</td>\n", + " <td>0.627</td>\n", + " <td>50</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>85</td>\n", + " <td>66</td>\n", + " <td>29</td>\n", + " <td>0</td>\n", + " <td>26.6</td>\n", + " <td>0.351</td>\n", + " <td>31</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>8</td>\n", + " <td>183</td>\n", + " <td>64</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>23.3</td>\n", + " <td>0.672</td>\n", + " <td>32</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>1</td>\n", + " <td>89</td>\n", + " <td>66</td>\n", + " <td>23</td>\n", + " <td>94</td>\n", + " <td>28.1</td>\n", + " <td>0.167</td>\n", + " <td>21</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0</td>\n", + " <td>137</td>\n", + " <td>40</td>\n", + " <td>35</td>\n", + " <td>168</td>\n", + " <td>43.1</td>\n", + " <td>2.288</td>\n", + " <td>33</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", + "0 6 148 72 35 0 33.6 \n", + "1 1 85 66 29 0 26.6 \n", + "2 8 183 64 0 0 23.3 \n", + "3 1 89 66 23 94 28.1 \n", + "4 0 137 40 35 168 43.1 \n", + "\n", + " DiabetesPedigreeFunction Age Outcome \n", + "0 0.627 50 1 \n", + "1 0.351 31 0 \n", + "2 0.672 32 1 \n", + "3 0.167 21 0 \n", + "4 2.288 33 1 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_diabetes = pd.read_csv('diabetes.csv')\n", + "df_diabetes.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>age</th>\n", + " <th>sex</th>\n", + " <th>cp</th>\n", + " <th>trestbps</th>\n", + " <th>chol</th>\n", + " <th>fbs</th>\n", + " <th>restecg</th>\n", + " <th>thalach</th>\n", + " <th>exang</th>\n", + " <th>oldpeak</th>\n", + " <th>slope</th>\n", + " <th>ca</th>\n", + " <th>thal</th>\n", + " <th>target</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>52</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>125</td>\n", + " <td>212</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>168</td>\n", + " <td>0</td>\n", + " <td>1.0</td>\n", + " <td>2</td>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>53</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>140</td>\n", + " <td>203</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>155</td>\n", + " <td>1</td>\n", + " <td>3.1</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>70</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>145</td>\n", + " <td>174</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>125</td>\n", + " <td>1</td>\n", + " <td>2.6</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>61</td>\n", + " <td>1</td>\n", + " <td>0</td>\n", + " <td>148</td>\n", + " <td>203</td>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>161</td>\n", + " <td>0</td>\n", + " <td>0.0</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>62</td>\n", + " <td>0</td>\n", + " <td>0</td>\n", + " <td>138</td>\n", + " <td>294</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>106</td>\n", + " <td>0</td>\n", + " <td>1.9</td>\n", + " <td>1</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " age sex cp trestbps chol fbs restecg thalach exang oldpeak slope \\\n", + "0 52 1 0 125 212 0 1 168 0 1.0 2 \n", + "1 53 1 0 140 203 1 0 155 1 3.1 0 \n", + "2 70 1 0 145 174 0 1 125 1 2.6 0 \n", + "3 61 1 0 148 203 0 1 161 0 0.0 2 \n", + "4 62 0 0 138 294 1 1 106 0 1.9 1 \n", + "\n", + " ca thal target \n", + "0 2 3 0 \n", + "1 0 3 0 \n", + "2 0 3 0 \n", + "3 1 3 0 \n", + "4 3 2 0 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_heart = pd.read_csv(\"heart_complete.csv\")\n", + "df_heart.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Age</th>\n", + " <th>Gender</th>\n", + " <th>Height</th>\n", + " <th>Weight</th>\n", + " <th>BMI</th>\n", + " <th>PhysicalActivityLevel</th>\n", + " <th>ObesityCategory</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>56</td>\n", + " <td>Male</td>\n", + " <td>173.575262</td>\n", + " <td>71.982051</td>\n", + " <td>23.891783</td>\n", + " <td>4</td>\n", + " <td>Normal weight</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>69</td>\n", + " <td>Male</td>\n", + " <td>164.127306</td>\n", + " <td>89.959256</td>\n", + " <td>33.395209</td>\n", + " <td>2</td>\n", + " <td>Obese</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>46</td>\n", + " <td>Female</td>\n", + " <td>168.072202</td>\n", + " <td>72.930629</td>\n", + " <td>25.817737</td>\n", + " <td>4</td>\n", + " <td>Overweight</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>32</td>\n", + " <td>Male</td>\n", + " <td>168.459633</td>\n", + " <td>84.886912</td>\n", + " <td>29.912247</td>\n", + " <td>3</td>\n", + " <td>Overweight</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>60</td>\n", + " <td>Male</td>\n", + " <td>183.568568</td>\n", + " <td>69.038945</td>\n", + " <td>20.487903</td>\n", + " <td>3</td>\n", + " <td>Normal weight</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Age Gender Height Weight BMI PhysicalActivityLevel \\\n", + "0 56 Male 173.575262 71.982051 23.891783 4 \n", + "1 69 Male 164.127306 89.959256 33.395209 2 \n", + "2 46 Female 168.072202 72.930629 25.817737 4 \n", + "3 32 Male 168.459633 84.886912 29.912247 3 \n", + "4 60 Male 183.568568 69.038945 20.487903 3 \n", + "\n", + " ObesityCategory \n", + "0 Normal weight \n", + "1 Obese \n", + "2 Overweight \n", + "3 Overweight \n", + "4 Normal weight " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_obesity = pd.read_csv(\"obesity_data.csv\")\n", + "df_obesity.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.18 ('ds_ml')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "bcc4fb5aa31885ae3822c808f45050c24798a2479b24a824a4f952e5682b37fd" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}