Switch to side-by-side view

--- a
+++ b/Lung_cancer_patient_decision_tree.ipynb
@@ -0,0 +1,5000 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lung cancer:  Decision Trees\n",
+    "\n",
+    "\n",
+    "What we'll be doing\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "----\n",
+    "Install additional programs\n",
+    "-----\n",
+    "\n",
+    "weneed to have [graphviz](https://www.graphviz.org/) installed to display the tree structure later on.\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Import packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import graphviz \n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "%matplotlib inline\n",
+    "\n",
+    "from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV\n",
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn import tree\n",
+    "from sklearn.metrics import mean_squared_error, r2_score\n",
+    "from sklearn.datasets import load_iris"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Load and format data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>patient_id</th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>air_pollution</th>\n",
+       "      <th>alcohol_use</th>\n",
+       "      <th>dust_allergy</th>\n",
+       "      <th>occupational_hazards</th>\n",
+       "      <th>genetic_risk</th>\n",
+       "      <th>chronic_lung_disease</th>\n",
+       "      <th>balanced_diet</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fatigue</th>\n",
+       "      <th>weight_loss</th>\n",
+       "      <th>shortness_of_breath</th>\n",
+       "      <th>wheezing</th>\n",
+       "      <th>swallowing_difficulty</th>\n",
+       "      <th>clubbing_of_finger_nails</th>\n",
+       "      <th>frequent_cold</th>\n",
+       "      <th>dry_cough</th>\n",
+       "      <th>snoring</th>\n",
+       "      <th>level</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>P1</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>P10</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>P107</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>P189</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>P19</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>P190</td>\n",
+       "      <td>49</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>P191</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>P192</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>P193</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>P194</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>P195</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>P196</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>P197</td>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>P108</td>\n",
+       "      <td>64</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>P198</td>\n",
+       "      <td>18</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>P199</td>\n",
+       "      <td>47</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>P2</td>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>P20</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>P200</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>P201</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>P202</td>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>P203</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>P204</td>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>P205</td>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>P109</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>P206</td>\n",
+       "      <td>27</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>P207</td>\n",
+       "      <td>48</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>P208</td>\n",
+       "      <td>64</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>P209</td>\n",
+       "      <td>39</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>P21</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>970</th>\n",
+       "      <td>P974</td>\n",
+       "      <td>31</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>971</th>\n",
+       "      <td>P975</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>972</th>\n",
+       "      <td>P976</td>\n",
+       "      <td>35</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>973</th>\n",
+       "      <td>P977</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>974</th>\n",
+       "      <td>P978</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>975</th>\n",
+       "      <td>P979</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>976</th>\n",
+       "      <td>P98</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>977</th>\n",
+       "      <td>P980</td>\n",
+       "      <td>53</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>Low</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>978</th>\n",
+       "      <td>P187</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>979</th>\n",
+       "      <td>P981</td>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>980</th>\n",
+       "      <td>P982</td>\n",
+       "      <td>46</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>981</th>\n",
+       "      <td>P983</td>\n",
+       "      <td>27</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>982</th>\n",
+       "      <td>P984</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>983</th>\n",
+       "      <td>P985</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>Medium</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>984</th>\n",
+       "      <td>P986</td>\n",
+       "      <td>28</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>985</th>\n",
+       "      <td>P987</td>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>986</th>\n",
+       "      <td>P988</td>\n",
+       "      <td>29</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>987</th>\n",
+       "      <td>P989</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>988</th>\n",
+       "      <td>P99</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>989</th>\n",
+       "      <td>P188</td>\n",
+       "      <td>29</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>990</th>\n",
+       "      <td>P990</td>\n",
+       "      <td>49</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>991</th>\n",
+       "      <td>P991</td>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>992</th>\n",
+       "      <td>P992</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>993</th>\n",
+       "      <td>P993</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>994</th>\n",
+       "      <td>P994</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>995</th>\n",
+       "      <td>P995</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>996</th>\n",
+       "      <td>P996</td>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>997</th>\n",
+       "      <td>P997</td>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>998</th>\n",
+       "      <td>P998</td>\n",
+       "      <td>18</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>999</th>\n",
+       "      <td>P999</td>\n",
+       "      <td>47</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>High</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1000 rows × 25 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    patient_id  age  gender  air_pollution  alcohol_use  dust_allergy  \\\n",
+       "0           P1   33       1              2            4             5   \n",
+       "1          P10   17       1              3            1             5   \n",
+       "2         P107   44       1              6            7             7   \n",
+       "3         P189   39       2              6            8             7   \n",
+       "4          P19   38       2              2            1             5   \n",
+       "5         P190   49       1              6            5             6   \n",
+       "6         P191   37       1              8            8             7   \n",
+       "7         P192   26       2              7            7             7   \n",
+       "8         P193   37       2              7            7             7   \n",
+       "9         P194   33       1              6            7             7   \n",
+       "10        P195   44       1              6            7             7   \n",
+       "11        P196   37       2              6            8             7   \n",
+       "12        P197   25       2              4            5             6   \n",
+       "13        P108   64       2              6            8             7   \n",
+       "14        P198   18       2              6            8             7   \n",
+       "15        P199   47       1              6            5             6   \n",
+       "16          P2   25       2              3            1             4   \n",
+       "17         P20   19       1              3            2             4   \n",
+       "18        P200   26       2              8            8             7   \n",
+       "19        P201   37       1              7            7             7   \n",
+       "20        P202   35       2              4            5             6   \n",
+       "21        P203   33       1              2            4             5   \n",
+       "22        P204   25       2              3            1             4   \n",
+       "23        P205   35       2              4            5             6   \n",
+       "24        P109   39       2              4            5             6   \n",
+       "25        P206   27       2              2            3             4   \n",
+       "26        P207   48       1              6            7             7   \n",
+       "27        P208   64       1              6            8             7   \n",
+       "28        P209   39       1              4            5             6   \n",
+       "29         P21   33       1              6            7             7   \n",
+       "..         ...  ...     ...            ...          ...           ...   \n",
+       "970       P974   31       2              3            2             4   \n",
+       "971       P975   38       2              1            2             3   \n",
+       "972       P976   35       1              6            8             7   \n",
+       "973       P977   44       1              6            7             7   \n",
+       "974       P978   33       1              2            4             5   \n",
+       "975       P979   45       1              3            1             4   \n",
+       "976        P98   26       2              8            8             7   \n",
+       "977       P980   53       1              3            1             4   \n",
+       "978       P187   19       1              6            8             7   \n",
+       "979       P981   35       2              4            5             6   \n",
+       "980       P982   46       1              6            8             7   \n",
+       "981       P983   27       1              6            7             7   \n",
+       "982       P984   26       1              3            2             4   \n",
+       "983       P985   37       1              1            2             3   \n",
+       "984       P986   28       1              6            7             7   \n",
+       "985       P987   19       1              6            8             7   \n",
+       "986       P988   29       2              4            5             6   \n",
+       "987       P989   39       2              6            8             7   \n",
+       "988        P99   37       1              7            7             7   \n",
+       "989       P188   29       2              4            5             6   \n",
+       "990       P990   49       1              6            5             6   \n",
+       "991       P991   37       1              8            8             7   \n",
+       "992       P992   26       2              7            7             7   \n",
+       "993       P993   37       2              7            7             7   \n",
+       "994       P994   33       1              6            7             7   \n",
+       "995       P995   44       1              6            7             7   \n",
+       "996       P996   37       2              6            8             7   \n",
+       "997       P997   25       2              4            5             6   \n",
+       "998       P998   18       2              6            8             7   \n",
+       "999       P999   47       1              6            5             6   \n",
+       "\n",
+       "     occupational_hazards  genetic_risk  chronic_lung_disease  balanced_diet  \\\n",
+       "0                       4             3                     2              2   \n",
+       "1                       3             4                     2              2   \n",
+       "2                       7             7                     6              7   \n",
+       "3                       7             7                     6              7   \n",
+       "4                       3             2                     3              2   \n",
+       "5                       5             5                     4              6   \n",
+       "6                       7             7                     6              7   \n",
+       "7                       7             7                     6              7   \n",
+       "8                       7             6                     7              7   \n",
+       "9                       7             7                     7              6   \n",
+       "10                      7             7                     6              7   \n",
+       "11                      7             7                     6              7   \n",
+       "12                      5             5                     4              6   \n",
+       "13                      7             7                     6              7   \n",
+       "14                      7             7                     6              7   \n",
+       "15                      5             5                     4              6   \n",
+       "16                      3             2                     3              4   \n",
+       "17                      2             3                     2              3   \n",
+       "18                      7             7                     6              7   \n",
+       "19                      7             6                     7              7   \n",
+       "20                      5             5                     4              6   \n",
+       "21                      4             3                     2              2   \n",
+       "22                      3             2                     3              4   \n",
+       "23                      5             6                     5              5   \n",
+       "24                      6             5                     4              6   \n",
+       "25                      2             4                     3              3   \n",
+       "26                      7             7                     6              7   \n",
+       "27                      7             7                     6              7   \n",
+       "28                      6             5                     4              6   \n",
+       "29                      7             7                     6              7   \n",
+       "..                    ...           ...                   ...            ...   \n",
+       "970                     2             3                     2              3   \n",
+       "971                     4             2                     4              3   \n",
+       "972                     7             7                     6              2   \n",
+       "973                     7             7                     6              7   \n",
+       "974                     4             3                     2              2   \n",
+       "975                     3             2                     3              4   \n",
+       "976                     7             7                     6              7   \n",
+       "977                     2             3                     2              3   \n",
+       "978                     7             7                     6              7   \n",
+       "979                     5             5                     4              6   \n",
+       "980                     7             7                     6              7   \n",
+       "981                     7             7                     6              7   \n",
+       "982                     2             3                     2              3   \n",
+       "983                     4             2                     4              3   \n",
+       "984                     7             7                     6              7   \n",
+       "985                     7             7                     6              7   \n",
+       "986                     5             5                     4              6   \n",
+       "987                     7             7                     6              7   \n",
+       "988                     7             6                     7              7   \n",
+       "989                     5             5                     4              6   \n",
+       "990                     5             5                     4              6   \n",
+       "991                     7             7                     6              7   \n",
+       "992                     7             7                     6              7   \n",
+       "993                     7             6                     7              7   \n",
+       "994                     7             7                     7              6   \n",
+       "995                     7             7                     6              7   \n",
+       "996                     7             7                     6              7   \n",
+       "997                     5             5                     4              6   \n",
+       "998                     7             7                     6              7   \n",
+       "999                     5             5                     4              6   \n",
+       "\n",
+       "      ...    fatigue  weight_loss  shortness_of_breath  wheezing  \\\n",
+       "0     ...          3            4                    2         2   \n",
+       "1     ...          1            3                    7         8   \n",
+       "2     ...          5            3                    2         7   \n",
+       "3     ...          3            2                    4         1   \n",
+       "4     ...          6            7                    2         5   \n",
+       "5     ...          8            7                    9         2   \n",
+       "6     ...          3            2                    4         1   \n",
+       "7     ...          2            7                    6         7   \n",
+       "8     ...          4            2                    3         1   \n",
+       "9     ...          8            5                    7         6   \n",
+       "10    ...          5            3                    2         7   \n",
+       "11    ...          9            6                    5         7   \n",
+       "12    ...          8            7                    9         2   \n",
+       "13    ...          9            6                    5         7   \n",
+       "14    ...          3            2                    4         1   \n",
+       "15    ...          8            7                    9         2   \n",
+       "16    ...          3            2                    2         4   \n",
+       "17    ...          4            5                    6         5   \n",
+       "18    ...          3            2                    4         1   \n",
+       "19    ...          4            2                    3         1   \n",
+       "20    ...          8            7                    9         2   \n",
+       "21    ...          3            4                    2         2   \n",
+       "22    ...          3            2                    2         4   \n",
+       "23    ...          1            4                    3         2   \n",
+       "24    ...          5            3                    2         4   \n",
+       "25    ...          1            2                    4         6   \n",
+       "26    ...          5            3                    2         7   \n",
+       "27    ...          9            6                    5         7   \n",
+       "28    ...          5            3                    2         4   \n",
+       "29    ...          4            4                    5         6   \n",
+       "..    ...        ...          ...                  ...       ...   \n",
+       "970   ...          4            5                    6         5   \n",
+       "971   ...          4            1                    2         4   \n",
+       "972   ...          2            7                    6         5   \n",
+       "973   ...          5            3                    2         7   \n",
+       "974   ...          3            4                    2         2   \n",
+       "975   ...          3            2                    2         4   \n",
+       "976   ...          3            2                    4         1   \n",
+       "977   ...          2            2                    3         4   \n",
+       "978   ...          9            6                    5         7   \n",
+       "979   ...          8            7                    9         2   \n",
+       "980   ...          3            2                    4         1   \n",
+       "981   ...          2            7                    6         7   \n",
+       "982   ...          4            5                    6         5   \n",
+       "983   ...          4            1                    2         4   \n",
+       "984   ...          5            3                    2         7   \n",
+       "985   ...          9            6                    5         7   \n",
+       "986   ...          8            7                    9         2   \n",
+       "987   ...          3            2                    4         1   \n",
+       "988   ...          4            2                    3         1   \n",
+       "989   ...          8            7                    9         2   \n",
+       "990   ...          8            7                    9         2   \n",
+       "991   ...          3            2                    4         1   \n",
+       "992   ...          2            7                    6         7   \n",
+       "993   ...          4            2                    3         1   \n",
+       "994   ...          8            5                    7         6   \n",
+       "995   ...          5            3                    2         7   \n",
+       "996   ...          9            6                    5         7   \n",
+       "997   ...          8            7                    9         2   \n",
+       "998   ...          3            2                    4         1   \n",
+       "999   ...          8            7                    9         2   \n",
+       "\n",
+       "     swallowing_difficulty  clubbing_of_finger_nails  frequent_cold  \\\n",
+       "0                        3                         1              2   \n",
+       "1                        6                         2              1   \n",
+       "2                        8                         2              4   \n",
+       "3                        4                         2              4   \n",
+       "4                        8                         1              3   \n",
+       "5                        1                         4              6   \n",
+       "6                        4                         2              4   \n",
+       "7                        6                         7              2   \n",
+       "8                        4                         5              6   \n",
+       "9                        7                         8              7   \n",
+       "10                       8                         2              4   \n",
+       "11                       2                         4              3   \n",
+       "12                       1                         4              6   \n",
+       "13                       2                         4              3   \n",
+       "14                       4                         2              4   \n",
+       "15                       1                         4              6   \n",
+       "16                       2                         2              3   \n",
+       "17                       5                         4              6   \n",
+       "18                       4                         2              4   \n",
+       "19                       4                         5              6   \n",
+       "20                       1                         4              6   \n",
+       "21                       3                         1              2   \n",
+       "22                       2                         2              3   \n",
+       "23                       4                         6              2   \n",
+       "24                       3                         1              7   \n",
+       "25                       5                         4              2   \n",
+       "26                       8                         2              4   \n",
+       "27                       2                         4              3   \n",
+       "28                       3                         1              7   \n",
+       "29                       5                         5              4   \n",
+       "..                     ...                       ...            ...   \n",
+       "970                      5                         4              6   \n",
+       "971                      6                         5              4   \n",
+       "972                      1                         9              3   \n",
+       "973                      8                         2              4   \n",
+       "974                      3                         1              2   \n",
+       "975                      2                         2              3   \n",
+       "976                      4                         2              4   \n",
+       "977                      1                         5              2   \n",
+       "978                      2                         4              3   \n",
+       "979                      1                         4              6   \n",
+       "980                      4                         2              4   \n",
+       "981                      6                         7              2   \n",
+       "982                      5                         4              6   \n",
+       "983                      6                         5              4   \n",
+       "984                      8                         2              4   \n",
+       "985                      2                         4              3   \n",
+       "986                      1                         4              6   \n",
+       "987                      4                         2              4   \n",
+       "988                      4                         5              6   \n",
+       "989                      1                         4              6   \n",
+       "990                      1                         4              6   \n",
+       "991                      4                         2              4   \n",
+       "992                      6                         7              2   \n",
+       "993                      4                         5              6   \n",
+       "994                      7                         8              7   \n",
+       "995                      8                         2              4   \n",
+       "996                      2                         4              3   \n",
+       "997                      1                         4              6   \n",
+       "998                      4                         2              4   \n",
+       "999                      1                         4              6   \n",
+       "\n",
+       "     dry_cough  snoring   level  \n",
+       "0            3        4     Low  \n",
+       "1            7        2  Medium  \n",
+       "2            5        3    High  \n",
+       "3            2        3    High  \n",
+       "4            2        3  Medium  \n",
+       "5            7        2    High  \n",
+       "6            2        3    High  \n",
+       "7            3        1    High  \n",
+       "8            7        5    High  \n",
+       "9            6        2    High  \n",
+       "10           5        3    High  \n",
+       "11           1        4    High  \n",
+       "12           7        2    High  \n",
+       "13           1        4    High  \n",
+       "14           2        3    High  \n",
+       "15           7        2    High  \n",
+       "16           4        3     Low  \n",
+       "17           5        4  Medium  \n",
+       "18           2        3    High  \n",
+       "19           7        5    High  \n",
+       "20           7        2    High  \n",
+       "21           3        4     Low  \n",
+       "22           4        3     Low  \n",
+       "23           4        1  Medium  \n",
+       "24           5        6  Medium  \n",
+       "25           1        5  Medium  \n",
+       "26           5        3    High  \n",
+       "27           1        4    High  \n",
+       "28           5        6  Medium  \n",
+       "29           6        5    High  \n",
+       "..         ...      ...     ...  \n",
+       "970          5        4  Medium  \n",
+       "971          2        5  Medium  \n",
+       "972          4        2  Medium  \n",
+       "973          5        3    High  \n",
+       "974          3        4     Low  \n",
+       "975          4        3     Low  \n",
+       "976          2        3    High  \n",
+       "977          6        2     Low  \n",
+       "978          1        4    High  \n",
+       "979          7        2    High  \n",
+       "980          2        3    High  \n",
+       "981          3        1    High  \n",
+       "982          5        4  Medium  \n",
+       "983          2        5  Medium  \n",
+       "984          5        3    High  \n",
+       "985          1        4    High  \n",
+       "986          7        2    High  \n",
+       "987          2        3    High  \n",
+       "988          7        5    High  \n",
+       "989          7        2    High  \n",
+       "990          7        2    High  \n",
+       "991          2        3    High  \n",
+       "992          3        1    High  \n",
+       "993          7        5    High  \n",
+       "994          6        2    High  \n",
+       "995          5        3    High  \n",
+       "996          1        4    High  \n",
+       "997          7        2    High  \n",
+       "998          2        3    High  \n",
+       "999          7        2    High  \n",
+       "\n",
+       "[1000 rows x 25 columns]"
+      ]
+     },
+     "execution_count": 52,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Load data\n",
+    "filename = 'cancer_patient.csv'\n",
+    "df = pd.read_csv(filename)\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We are going to build Classification to predict the lung cancer and we will build a tree that considers many the features in the dataset.\n",
+    "\n",
+    "Here we choose a limited subset of data to conduct the analysis for the sake of training time. In practice, we should use more features. This is a mix of numeric and one hot-coded categorical variables. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Drop everything that is not numeric\n",
+    "#df = df.select_dtypes(exclude=['object'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>patient_id</th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>air_pollution</th>\n",
+       "      <th>alcohol_use</th>\n",
+       "      <th>dust_allergy</th>\n",
+       "      <th>occupational_hazards</th>\n",
+       "      <th>genetic_risk</th>\n",
+       "      <th>chronic_lung_disease</th>\n",
+       "      <th>balanced_diet</th>\n",
+       "      <th>...</th>\n",
+       "      <th>fatigue</th>\n",
+       "      <th>weight_loss</th>\n",
+       "      <th>shortness_of_breath</th>\n",
+       "      <th>wheezing</th>\n",
+       "      <th>swallowing_difficulty</th>\n",
+       "      <th>clubbing_of_finger_nails</th>\n",
+       "      <th>frequent_cold</th>\n",
+       "      <th>dry_cough</th>\n",
+       "      <th>snoring</th>\n",
+       "      <th>level</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>P1</td>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>P10</td>\n",
+       "      <td>17</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>P107</td>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>P189</td>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>P19</td>\n",
+       "      <td>38</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 25 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  patient_id  age  gender  air_pollution  alcohol_use  dust_allergy  \\\n",
+       "0         P1   33       1              2            4             5   \n",
+       "1        P10   17       1              3            1             5   \n",
+       "2       P107   44       1              6            7             7   \n",
+       "3       P189   39       2              6            8             7   \n",
+       "4        P19   38       2              2            1             5   \n",
+       "\n",
+       "   occupational_hazards  genetic_risk  chronic_lung_disease  balanced_diet  \\\n",
+       "0                     4             3                     2              2   \n",
+       "1                     3             4                     2              2   \n",
+       "2                     7             7                     6              7   \n",
+       "3                     7             7                     6              7   \n",
+       "4                     3             2                     3              2   \n",
+       "\n",
+       "   ...    fatigue  weight_loss  shortness_of_breath  wheezing  \\\n",
+       "0  ...          3            4                    2         2   \n",
+       "1  ...          1            3                    7         8   \n",
+       "2  ...          5            3                    2         7   \n",
+       "3  ...          3            2                    4         1   \n",
+       "4  ...          6            7                    2         5   \n",
+       "\n",
+       "   swallowing_difficulty  clubbing_of_finger_nails  frequent_cold  dry_cough  \\\n",
+       "0                      3                         1              2          3   \n",
+       "1                      6                         2              1          7   \n",
+       "2                      8                         2              4          5   \n",
+       "3                      4                         2              4          2   \n",
+       "4                      8                         1              3          2   \n",
+       "\n",
+       "   snoring  level  \n",
+       "0        4      1  \n",
+       "1        2      2  \n",
+       "2        3      3  \n",
+       "3        3      3  \n",
+       "4        3      2  \n",
+       "\n",
+       "[5 rows x 25 columns]"
+      ]
+     },
+     "execution_count": 54,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def data_cleaning(data):\n",
+    "    df[\"age\"]=data[\"age\"].fillna(df[\"age\"].median())\n",
+    "    \n",
+    "    \n",
+    "    df.loc[data[\"level\"]==\"Low\",\"level\"]=1\n",
+    "    df.loc[data[\"level\"]==\"Medium\",\"level\"]=2\n",
+    "    df.loc[data[\"level\"]==\"High\",\"level\"]=3\n",
+    "    \n",
+    "    return data\n",
+    "\n",
+    "df=data_cleaning(df)\n",
+    "df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "we've used hot encoding to our y because it's categorical and we need to convert it to numeric"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "y_column = 'level'\n",
+    "y = df[y_column]\n",
+    "# Drop returns a copy of the DataFrame with the specified columns removed.  \n",
+    "X = df.drop([y_column, \"patient_id\"], axis=1) # patient_id will not be helpful"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>gender</th>\n",
+       "      <th>air_pollution</th>\n",
+       "      <th>alcohol_use</th>\n",
+       "      <th>dust_allergy</th>\n",
+       "      <th>occupational_hazards</th>\n",
+       "      <th>genetic_risk</th>\n",
+       "      <th>chronic_lung_disease</th>\n",
+       "      <th>balanced_diet</th>\n",
+       "      <th>obesity</th>\n",
+       "      <th>...</th>\n",
+       "      <th>coughing_of_blood</th>\n",
+       "      <th>fatigue</th>\n",
+       "      <th>weight_loss</th>\n",
+       "      <th>shortness_of_breath</th>\n",
+       "      <th>wheezing</th>\n",
+       "      <th>swallowing_difficulty</th>\n",
+       "      <th>clubbing_of_finger_nails</th>\n",
+       "      <th>frequent_cold</th>\n",
+       "      <th>dry_cough</th>\n",
+       "      <th>snoring</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>17</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>38</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>49</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>64</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>18</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>47</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>27</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>48</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>64</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>39</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>970</th>\n",
+       "      <td>31</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>971</th>\n",
+       "      <td>38</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>972</th>\n",
+       "      <td>35</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>973</th>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>974</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>975</th>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>976</th>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>977</th>\n",
+       "      <td>53</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>978</th>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>979</th>\n",
+       "      <td>35</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>980</th>\n",
+       "      <td>46</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>981</th>\n",
+       "      <td>27</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>982</th>\n",
+       "      <td>26</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>983</th>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>984</th>\n",
+       "      <td>28</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>985</th>\n",
+       "      <td>19</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>986</th>\n",
+       "      <td>29</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>987</th>\n",
+       "      <td>39</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>988</th>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>989</th>\n",
+       "      <td>29</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>990</th>\n",
+       "      <td>49</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>991</th>\n",
+       "      <td>37</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>992</th>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>993</th>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>994</th>\n",
+       "      <td>33</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>995</th>\n",
+       "      <td>44</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>996</th>\n",
+       "      <td>37</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>997</th>\n",
+       "      <td>25</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>998</th>\n",
+       "      <td>18</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>999</th>\n",
+       "      <td>47</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>6</td>\n",
+       "      <td>5</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>8</td>\n",
+       "      <td>7</td>\n",
+       "      <td>9</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>7</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1000 rows × 23 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "     age  gender  air_pollution  alcohol_use  dust_allergy  \\\n",
+       "0     33       1              2            4             5   \n",
+       "1     17       1              3            1             5   \n",
+       "2     44       1              6            7             7   \n",
+       "3     39       2              6            8             7   \n",
+       "4     38       2              2            1             5   \n",
+       "5     49       1              6            5             6   \n",
+       "6     37       1              8            8             7   \n",
+       "7     26       2              7            7             7   \n",
+       "8     37       2              7            7             7   \n",
+       "9     33       1              6            7             7   \n",
+       "10    44       1              6            7             7   \n",
+       "11    37       2              6            8             7   \n",
+       "12    25       2              4            5             6   \n",
+       "13    64       2              6            8             7   \n",
+       "14    18       2              6            8             7   \n",
+       "15    47       1              6            5             6   \n",
+       "16    25       2              3            1             4   \n",
+       "17    19       1              3            2             4   \n",
+       "18    26       2              8            8             7   \n",
+       "19    37       1              7            7             7   \n",
+       "20    35       2              4            5             6   \n",
+       "21    33       1              2            4             5   \n",
+       "22    25       2              3            1             4   \n",
+       "23    35       2              4            5             6   \n",
+       "24    39       2              4            5             6   \n",
+       "25    27       2              2            3             4   \n",
+       "26    48       1              6            7             7   \n",
+       "27    64       1              6            8             7   \n",
+       "28    39       1              4            5             6   \n",
+       "29    33       1              6            7             7   \n",
+       "..   ...     ...            ...          ...           ...   \n",
+       "970   31       2              3            2             4   \n",
+       "971   38       2              1            2             3   \n",
+       "972   35       1              6            8             7   \n",
+       "973   44       1              6            7             7   \n",
+       "974   33       1              2            4             5   \n",
+       "975   45       1              3            1             4   \n",
+       "976   26       2              8            8             7   \n",
+       "977   53       1              3            1             4   \n",
+       "978   19       1              6            8             7   \n",
+       "979   35       2              4            5             6   \n",
+       "980   46       1              6            8             7   \n",
+       "981   27       1              6            7             7   \n",
+       "982   26       1              3            2             4   \n",
+       "983   37       1              1            2             3   \n",
+       "984   28       1              6            7             7   \n",
+       "985   19       1              6            8             7   \n",
+       "986   29       2              4            5             6   \n",
+       "987   39       2              6            8             7   \n",
+       "988   37       1              7            7             7   \n",
+       "989   29       2              4            5             6   \n",
+       "990   49       1              6            5             6   \n",
+       "991   37       1              8            8             7   \n",
+       "992   26       2              7            7             7   \n",
+       "993   37       2              7            7             7   \n",
+       "994   33       1              6            7             7   \n",
+       "995   44       1              6            7             7   \n",
+       "996   37       2              6            8             7   \n",
+       "997   25       2              4            5             6   \n",
+       "998   18       2              6            8             7   \n",
+       "999   47       1              6            5             6   \n",
+       "\n",
+       "     occupational_hazards  genetic_risk  chronic_lung_disease  balanced_diet  \\\n",
+       "0                       4             3                     2              2   \n",
+       "1                       3             4                     2              2   \n",
+       "2                       7             7                     6              7   \n",
+       "3                       7             7                     6              7   \n",
+       "4                       3             2                     3              2   \n",
+       "5                       5             5                     4              6   \n",
+       "6                       7             7                     6              7   \n",
+       "7                       7             7                     6              7   \n",
+       "8                       7             6                     7              7   \n",
+       "9                       7             7                     7              6   \n",
+       "10                      7             7                     6              7   \n",
+       "11                      7             7                     6              7   \n",
+       "12                      5             5                     4              6   \n",
+       "13                      7             7                     6              7   \n",
+       "14                      7             7                     6              7   \n",
+       "15                      5             5                     4              6   \n",
+       "16                      3             2                     3              4   \n",
+       "17                      2             3                     2              3   \n",
+       "18                      7             7                     6              7   \n",
+       "19                      7             6                     7              7   \n",
+       "20                      5             5                     4              6   \n",
+       "21                      4             3                     2              2   \n",
+       "22                      3             2                     3              4   \n",
+       "23                      5             6                     5              5   \n",
+       "24                      6             5                     4              6   \n",
+       "25                      2             4                     3              3   \n",
+       "26                      7             7                     6              7   \n",
+       "27                      7             7                     6              7   \n",
+       "28                      6             5                     4              6   \n",
+       "29                      7             7                     6              7   \n",
+       "..                    ...           ...                   ...            ...   \n",
+       "970                     2             3                     2              3   \n",
+       "971                     4             2                     4              3   \n",
+       "972                     7             7                     6              2   \n",
+       "973                     7             7                     6              7   \n",
+       "974                     4             3                     2              2   \n",
+       "975                     3             2                     3              4   \n",
+       "976                     7             7                     6              7   \n",
+       "977                     2             3                     2              3   \n",
+       "978                     7             7                     6              7   \n",
+       "979                     5             5                     4              6   \n",
+       "980                     7             7                     6              7   \n",
+       "981                     7             7                     6              7   \n",
+       "982                     2             3                     2              3   \n",
+       "983                     4             2                     4              3   \n",
+       "984                     7             7                     6              7   \n",
+       "985                     7             7                     6              7   \n",
+       "986                     5             5                     4              6   \n",
+       "987                     7             7                     6              7   \n",
+       "988                     7             6                     7              7   \n",
+       "989                     5             5                     4              6   \n",
+       "990                     5             5                     4              6   \n",
+       "991                     7             7                     6              7   \n",
+       "992                     7             7                     6              7   \n",
+       "993                     7             6                     7              7   \n",
+       "994                     7             7                     7              6   \n",
+       "995                     7             7                     6              7   \n",
+       "996                     7             7                     6              7   \n",
+       "997                     5             5                     4              6   \n",
+       "998                     7             7                     6              7   \n",
+       "999                     5             5                     4              6   \n",
+       "\n",
+       "     obesity   ...     coughing_of_blood  fatigue  weight_loss  \\\n",
+       "0          4   ...                     4        3            4   \n",
+       "1          2   ...                     3        1            3   \n",
+       "2          7   ...                     7        5            3   \n",
+       "3          7   ...                     9        3            2   \n",
+       "4          4   ...                     4        6            7   \n",
+       "5          7   ...                     8        8            7   \n",
+       "6          7   ...                     9        3            2   \n",
+       "7          7   ...                     7        2            7   \n",
+       "8          7   ...                     8        4            2   \n",
+       "9          7   ...                     7        8            5   \n",
+       "10         7   ...                     7        5            3   \n",
+       "11         7   ...                     7        9            6   \n",
+       "12         7   ...                     8        8            7   \n",
+       "13         7   ...                     7        9            6   \n",
+       "14         7   ...                     9        3            2   \n",
+       "15         7   ...                     8        8            7   \n",
+       "16         3   ...                     1        3            2   \n",
+       "17         3   ...                     3        4            5   \n",
+       "18         7   ...                     9        3            2   \n",
+       "19         7   ...                     8        4            2   \n",
+       "20         7   ...                     8        8            7   \n",
+       "21         4   ...                     4        3            4   \n",
+       "22         3   ...                     1        3            2   \n",
+       "23         5   ...                     5        1            4   \n",
+       "24         6   ...                     6        5            3   \n",
+       "25         3   ...                     4        1            2   \n",
+       "26         7   ...                     7        5            3   \n",
+       "27         7   ...                     7        9            6   \n",
+       "28         6   ...                     6        5            3   \n",
+       "29         7   ...                     7        4            4   \n",
+       "..       ...   ...                   ...      ...          ...   \n",
+       "970        3   ...                     3        4            5   \n",
+       "971        3   ...                     4        4            1   \n",
+       "972        4   ...                     3        2            7   \n",
+       "973        7   ...                     7        5            3   \n",
+       "974        4   ...                     4        3            4   \n",
+       "975        3   ...                     1        3            2   \n",
+       "976        7   ...                     9        3            2   \n",
+       "977        3   ...                     2        2            2   \n",
+       "978        7   ...                     7        9            6   \n",
+       "979        7   ...                     8        8            7   \n",
+       "980        7   ...                     9        3            2   \n",
+       "981        7   ...                     7        2            7   \n",
+       "982        3   ...                     3        4            5   \n",
+       "983        3   ...                     4        4            1   \n",
+       "984        7   ...                     7        5            3   \n",
+       "985        7   ...                     7        9            6   \n",
+       "986        7   ...                     8        8            7   \n",
+       "987        7   ...                     9        3            2   \n",
+       "988        7   ...                     8        4            2   \n",
+       "989        7   ...                     8        8            7   \n",
+       "990        7   ...                     8        8            7   \n",
+       "991        7   ...                     9        3            2   \n",
+       "992        7   ...                     7        2            7   \n",
+       "993        7   ...                     8        4            2   \n",
+       "994        7   ...                     7        8            5   \n",
+       "995        7   ...                     7        5            3   \n",
+       "996        7   ...                     7        9            6   \n",
+       "997        7   ...                     8        8            7   \n",
+       "998        7   ...                     9        3            2   \n",
+       "999        7   ...                     8        8            7   \n",
+       "\n",
+       "     shortness_of_breath  wheezing  swallowing_difficulty  \\\n",
+       "0                      2         2                      3   \n",
+       "1                      7         8                      6   \n",
+       "2                      2         7                      8   \n",
+       "3                      4         1                      4   \n",
+       "4                      2         5                      8   \n",
+       "5                      9         2                      1   \n",
+       "6                      4         1                      4   \n",
+       "7                      6         7                      6   \n",
+       "8                      3         1                      4   \n",
+       "9                      7         6                      7   \n",
+       "10                     2         7                      8   \n",
+       "11                     5         7                      2   \n",
+       "12                     9         2                      1   \n",
+       "13                     5         7                      2   \n",
+       "14                     4         1                      4   \n",
+       "15                     9         2                      1   \n",
+       "16                     2         4                      2   \n",
+       "17                     6         5                      5   \n",
+       "18                     4         1                      4   \n",
+       "19                     3         1                      4   \n",
+       "20                     9         2                      1   \n",
+       "21                     2         2                      3   \n",
+       "22                     2         4                      2   \n",
+       "23                     3         2                      4   \n",
+       "24                     2         4                      3   \n",
+       "25                     4         6                      5   \n",
+       "26                     2         7                      8   \n",
+       "27                     5         7                      2   \n",
+       "28                     2         4                      3   \n",
+       "29                     5         6                      5   \n",
+       "..                   ...       ...                    ...   \n",
+       "970                    6         5                      5   \n",
+       "971                    2         4                      6   \n",
+       "972                    6         5                      1   \n",
+       "973                    2         7                      8   \n",
+       "974                    2         2                      3   \n",
+       "975                    2         4                      2   \n",
+       "976                    4         1                      4   \n",
+       "977                    3         4                      1   \n",
+       "978                    5         7                      2   \n",
+       "979                    9         2                      1   \n",
+       "980                    4         1                      4   \n",
+       "981                    6         7                      6   \n",
+       "982                    6         5                      5   \n",
+       "983                    2         4                      6   \n",
+       "984                    2         7                      8   \n",
+       "985                    5         7                      2   \n",
+       "986                    9         2                      1   \n",
+       "987                    4         1                      4   \n",
+       "988                    3         1                      4   \n",
+       "989                    9         2                      1   \n",
+       "990                    9         2                      1   \n",
+       "991                    4         1                      4   \n",
+       "992                    6         7                      6   \n",
+       "993                    3         1                      4   \n",
+       "994                    7         6                      7   \n",
+       "995                    2         7                      8   \n",
+       "996                    5         7                      2   \n",
+       "997                    9         2                      1   \n",
+       "998                    4         1                      4   \n",
+       "999                    9         2                      1   \n",
+       "\n",
+       "     clubbing_of_finger_nails  frequent_cold  dry_cough  snoring  \n",
+       "0                           1              2          3        4  \n",
+       "1                           2              1          7        2  \n",
+       "2                           2              4          5        3  \n",
+       "3                           2              4          2        3  \n",
+       "4                           1              3          2        3  \n",
+       "5                           4              6          7        2  \n",
+       "6                           2              4          2        3  \n",
+       "7                           7              2          3        1  \n",
+       "8                           5              6          7        5  \n",
+       "9                           8              7          6        2  \n",
+       "10                          2              4          5        3  \n",
+       "11                          4              3          1        4  \n",
+       "12                          4              6          7        2  \n",
+       "13                          4              3          1        4  \n",
+       "14                          2              4          2        3  \n",
+       "15                          4              6          7        2  \n",
+       "16                          2              3          4        3  \n",
+       "17                          4              6          5        4  \n",
+       "18                          2              4          2        3  \n",
+       "19                          5              6          7        5  \n",
+       "20                          4              6          7        2  \n",
+       "21                          1              2          3        4  \n",
+       "22                          2              3          4        3  \n",
+       "23                          6              2          4        1  \n",
+       "24                          1              7          5        6  \n",
+       "25                          4              2          1        5  \n",
+       "26                          2              4          5        3  \n",
+       "27                          4              3          1        4  \n",
+       "28                          1              7          5        6  \n",
+       "29                          5              4          6        5  \n",
+       "..                        ...            ...        ...      ...  \n",
+       "970                         4              6          5        4  \n",
+       "971                         5              4          2        5  \n",
+       "972                         9              3          4        2  \n",
+       "973                         2              4          5        3  \n",
+       "974                         1              2          3        4  \n",
+       "975                         2              3          4        3  \n",
+       "976                         2              4          2        3  \n",
+       "977                         5              2          6        2  \n",
+       "978                         4              3          1        4  \n",
+       "979                         4              6          7        2  \n",
+       "980                         2              4          2        3  \n",
+       "981                         7              2          3        1  \n",
+       "982                         4              6          5        4  \n",
+       "983                         5              4          2        5  \n",
+       "984                         2              4          5        3  \n",
+       "985                         4              3          1        4  \n",
+       "986                         4              6          7        2  \n",
+       "987                         2              4          2        3  \n",
+       "988                         5              6          7        5  \n",
+       "989                         4              6          7        2  \n",
+       "990                         4              6          7        2  \n",
+       "991                         2              4          2        3  \n",
+       "992                         7              2          3        1  \n",
+       "993                         5              6          7        5  \n",
+       "994                         8              7          6        2  \n",
+       "995                         2              4          5        3  \n",
+       "996                         4              3          1        4  \n",
+       "997                         4              6          7        2  \n",
+       "998                         2              4          2        3  \n",
+       "999                         4              6          7        2  \n",
+       "\n",
+       "[1000 rows x 23 columns]"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Split data into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "800 200\n",
+      "800 200\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(X_train), len(X_test))\n",
+    "print(len(y_train), len(y_test))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Build a Decision Tree\n",
+    "We will use sklearn's implementation of a Decision Tree Classification."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "sklearn.tree.tree.DecisionTreeClassifier"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "DecisionTreeClassifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Many of the sklearn algorithms are implemented using the same standard steps: \n",
+    "- **Step 1: Initiate the algorithm** Define the parameters (& hyperparameters of the algorithm) of the algorithm.\n",
+    "\n",
+    "- **Step 2: Train the algorithm** Train the algorithm by fitting it to the X_train and y_train datasets.\n",
+    "\n",
+    "- **Step 3: Evaluating the algorithm** Evaluate the predictive power of the algorithm by comparing the predictive Level values to the true values. We can do this for the training and testing dataset."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Random Forest"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here is a function which encapsulates the 3 model implementation steps; Initialize, Train, Evaluate our Random Forest Classifier. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.86625\n",
+      "0.825\n"
+     ]
+    }
+   ],
+   "source": [
+    "my_rf = RandomForestClassifier(random_state = 0, min_samples_leaf = 150)\n",
+    "my_rf.fit(X_train, y_train)\n",
+    "score_train_rf = my_rf.score(X_train, y_train)\n",
+    "print(score_train_rf)\n",
+    "score_test_rf = my_rf.score(X_test, y_test)\n",
+    "print(score_test_rf)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "we observe that our score train is almost the same as our score test so our model is quite perfect"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here is a function which encapsulates the 3 model implementation steps; Initialize, Train, Evaluate our Decision tree Classifier. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.91125\n",
+      "0.825\n"
+     ]
+    }
+   ],
+   "source": [
+    "my_tree = DecisionTreeClassifier(random_state = 0, min_samples_leaf = 50)\n",
+    "my_tree.fit(X_train, y_train)\n",
+    "score_train = my_tree.score(X_train, y_train)\n",
+    "print(score_train)\n",
+    "score_test_dt = my_rf.score(X_test, y_test)\n",
+    "print(score_test_rf)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "   we observe the same thing as random forest, we have run these two models to make sure that our score train and our score test are almost similar."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.87724551 0.89820359 0.87951807]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import cross_val_score\n",
+    "print(cross_val_score(my_tree, X, y, cv=3))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 63,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_score_Classifier(sklearn_Classifier, X_train, y_train, X_test, y_test, model_parameters, print_oob_score=False):\n",
+    "    \"\"\"A helper function that:\n",
+    "        - Trains a regressor on training data\n",
+    "        - Scores data on training and test data\n",
+    "        - Returns a trained model\n",
+    "    \"\"\"\n",
+    "    # Step 1: Initializing the sklearn regressor \n",
+    "    Classifier = sklearn_Classifier(**model_parameters)\n",
+    "    \n",
+    "    # Step 2: Training the algorithm using the X_train dataset of features and y_train, the associated target features\n",
+    "    Classifier.fit(X_train, y_train)\n",
+    "    \n",
+    "    # Step 3: Calculating the score of the predictive power on the training and testing dataset.\n",
+    "    training_score = Classifier.score(X_train, y_train)\n",
+    "    testing_score = Classifier.score(X_test, y_test)\n",
+    "    \n",
+    "    # Print the results!\n",
+    "    print(f\"Train score: {training_score:>5.4f}\")\n",
+    "    print(f\"Test score: {testing_score:>7.4f}\")\n",
+    "    if print_oob_score:\n",
+    "        print(f\"OOB score: {classifier.oob_score_:>8.4f}\")\n",
+    "        \n",
+    "    return Classifier"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "With all tree algorithms the major challenge is using the parameters to balance the bias vs variance trade-off.  \n",
+    "\n",
+    "To start, check how the model preforms when using different parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train score: 0.9900\n",
+      "Test score:  0.9900\n"
+     ]
+    }
+   ],
+   "source": [
+    "trained_Classifier = train_score_Classifier(sklearn_Classifier=DecisionTreeClassifier,\n",
+    "                                          X_train=X_train, \n",
+    "                                          y_train=y_train, \n",
+    "                                          X_test=X_test, \n",
+    "                                          y_test=y_test, \n",
+    "                                          model_parameters={'min_samples_leaf':5,'max_depth': 4, 'random_state':20})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000000000DFBBC18>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000DFDB710>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000DFB1DA0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E009470>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E024B00>],\n",
+       "       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000E024B38>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E075860>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E09FEF0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000DF40860>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E0D6EF0>],\n",
+       "       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000E1075C0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E12EC50>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E161320>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E1889B0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E1BA080>],\n",
+       "       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000E1E1710>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E209DA0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E23D470>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E265B00>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E2981D0>],\n",
+       "       [<matplotlib.axes._subplots.AxesSubplot object at 0x000000000E2BF860>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E2E8EF0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E3185C0>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E340C50>,\n",
+       "        <matplotlib.axes._subplots.AxesSubplot object at 0x000000000E373320>]],\n",
+       "      dtype=object)"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 25 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "df.hist(color='blue', alpha=0.5, bins=16)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train score: 0.9825\n",
+      "Test score:  0.9750\n",
+      "\n",
+      "\n",
+      "Train score: 0.9800\n",
+      "Test score:  0.9800\n",
+      "\n",
+      "\n",
+      "Train score: 0.9250\n",
+      "Test score:  0.9050\n",
+      "\n",
+      "\n",
+      "Train score: 0.9250\n",
+      "Test score:  0.9050\n",
+      "\n",
+      "\n",
+      "Train score: 0.9113\n",
+      "Test score:  0.9100\n",
+      "\n",
+      "\n",
+      "Train score: 0.9012\n",
+      "Test score:  0.8950\n",
+      "\n",
+      "\n",
+      "Train score: 0.9012\n",
+      "Test score:  0.8950\n",
+      "\n",
+      "\n",
+      "Train score: 0.8650\n",
+      "Test score:  0.8350\n",
+      "\n",
+      "\n",
+      "Train score: 0.8650\n",
+      "Test score:  0.8350\n",
+      "\n",
+      "\n",
+      "Train score: 0.8650\n",
+      "Test score:  0.8350\n",
+      "\n",
+      "\n",
+      "Train score: 0.8650\n",
+      "Test score:  0.8350\n",
+      "\n",
+      "\n",
+      "Train score: 0.8650\n",
+      "Test score:  0.8350\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define the model parameters \n",
+    "# We are fixing the random state so that the results are reproducible and consistent.\n",
+    "parameters = {\"max_depth\":6,'min_samples_leaf': 50,'random_state':42}\n",
+    "\n",
+    "for x in [10,20,30,40,50,60,70,80,90,100,110,120]:\n",
+    "    # Train and evaluate the model\n",
+    "    trained_Classifier = train_score_Classifier(sklearn_Classifier=DecisionTreeClassifier,\n",
+    "                                              X_train=X_train, \n",
+    "                                              y_train=y_train, \n",
+    "                                              X_test=X_test, \n",
+    "                                              y_test=y_test, \n",
+    "                                              model_parameters={\"max_depth\":6,'min_samples_leaf': x,'random_state':42})\n",
+    "    print(\"\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "can only concatenate str (not \"numpy.int64\") to str",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[1;32m<ipython-input-67-c5dafc314c6f>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      4\u001b[0m                          \u001b[0mclass_names\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0my_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m                          \u001b[0mfilled\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrounded\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m                          special_characters=True) \n\u001b[0m\u001b[0;32m      7\u001b[0m \u001b[1;31m# use graphviz to render the image\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m \u001b[0mgraph\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgraphviz\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSource\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdot_data\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\tree\\export.py\u001b[0m in \u001b[0;36mexport_graphviz\u001b[1;34m(decision_tree, out_file, max_depth, feature_names, class_names, label, filled, leaves_parallel, impurity, node_ids, proportion, rotate, rounded, special_characters, precision)\u001b[0m\n\u001b[0;32m    462\u001b[0m             \u001b[0mrecurse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdecision_tree\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"impurity\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    463\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 464\u001b[1;33m             \u001b[0mrecurse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdecision_tree\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdecision_tree\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcriterion\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    465\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    466\u001b[0m         \u001b[1;31m# If required, draw leaf nodes at same depth as each other\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\tree\\export.py\u001b[0m in \u001b[0;36mrecurse\u001b[1;34m(tree, node_id, criterion, parent, depth)\u001b[0m\n\u001b[0;32m    330\u001b[0m             out_file.write('%d [label=%s'\n\u001b[0;32m    331\u001b[0m                            % (node_id,\n\u001b[1;32m--> 332\u001b[1;33m                               node_to_str(tree, node_id, criterion)))\n\u001b[0m\u001b[0;32m    333\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    334\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mfilled\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\sklearn\\tree\\export.py\u001b[0m in \u001b[0;36mnode_to_str\u001b[1;34m(tree, node_id, criterion)\u001b[0m\n\u001b[0;32m    300\u001b[0m                                           \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    301\u001b[0m                                           characters[2])\n\u001b[1;32m--> 302\u001b[1;33m             \u001b[0mnode_string\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mclass_name\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    303\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    304\u001b[0m         \u001b[1;31m# Clean up any trailing newlines\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mTypeError\u001b[0m: can only concatenate str (not \"numpy.int64\") to str"
+     ]
+    }
+   ],
+   "source": [
+    "# from the sklearn tree library, create image of trained decision tree\n",
+    "dot_data = tree.export_graphviz(trained_Classifier, out_file='tree.dot', \n",
+    "                         feature_names=X_train.columns,  \n",
+    "                         class_names=y_train.values,  \n",
+    "                         filled=True, rounded=True,  \n",
+    "                         special_characters=True) \n",
+    "# use graphviz to render the image\n",
+    "graph = graphviz.Source(dot_data)\n",
+    "graph"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<span style=\"color:red\"> In a final report, try not to include code that isn't being used.</span>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# You will find attached tree.dot that makes us print our decision tree and a capture of it is displayed below"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<img src=\"./images/decisionTree.png\" alt=\"Drawing\" style=\"width: 1000px;height=500\"/>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<span style=\"color:red\"> Great! Make sure you include text explaining the tree to prove you understand what's happening. Basically the value = [x, y, z] list represents the number of leafs in each class. The tree takes the highest voted classification as the answer. So everyone in this dataset has lung cancer? And the ones that have the highest degree are those who have coughing of blood, don't have occupational hazards, are obese, and have clubbing of finger nails. Is that right?</span>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# trained_classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tree.export_graphviz(trained_Classifier, out_file='tree.dot', feature_names=X_train.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 69,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "GridSearchCV(cv=None, error_score='raise',\n",
+       "       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,\n",
+       "            max_features=None, max_leaf_nodes=None,\n",
+       "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
+       "            min_samples_leaf=1, min_samples_split=2,\n",
+       "            min_weight_fraction_leaf=0.0, presort=False, random_state=42,\n",
+       "            splitter='best'),\n",
+       "       fit_params=None, iid=True, n_jobs=1,\n",
+       "       param_grid={'max_depth': [8, 10, 14], 'min_impurity_decrease': [0.1, 0.01, 0.0], 'min_samples_split': [10, 50, 2]},\n",
+       "       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n",
+       "       scoring=None, verbose=0)"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Set parameters to search through - known as parameter grid\n",
+    "parameters = {'max_depth':[8,10,14], \n",
+    "              'min_impurity_decrease': [.1,.01, 0.0],\n",
+    "              'min_samples_split': [10, 50, 2]}\n",
+    "# Initialize model\n",
+    "decision_Classifier= DecisionTreeClassifier(random_state=42)\n",
+    "\n",
+    "# Initialize GridSearch and then fit\n",
+    "Classifier = GridSearchCV(decision_Classifier, parameters)\n",
+    "Classifier.fit(X_train, y_train)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 70,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'class_weight': None,\n",
+       " 'criterion': 'gini',\n",
+       " 'max_depth': 8,\n",
+       " 'max_features': None,\n",
+       " 'max_leaf_nodes': None,\n",
+       " 'min_impurity_decrease': 0.0,\n",
+       " 'min_impurity_split': None,\n",
+       " 'min_samples_leaf': 1,\n",
+       " 'min_samples_split': 10,\n",
+       " 'min_weight_fraction_leaf': 0.0,\n",
+       " 'presort': False,\n",
+       " 'random_state': 42,\n",
+       " 'splitter': 'best'}"
+      ]
+     },
+     "execution_count": 70,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# print out what GridSearchCV found to be the best parameters \n",
+    "Classifier.best_estimator_.get_params()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Train score: 1.0000\n",
+      "Test score:  1.0000\n"
+     ]
+    }
+   ],
+   "source": [
+    "# evaluate the tuned model\n",
+    "trained_Classifier = train_score_Classifier(sklearn_Classifier=DecisionTreeClassifier,\n",
+    "                                          X_train=X_train, \n",
+    "                                          y_train=y_train, \n",
+    "                                          X_test=X_test, \n",
+    "                                          y_test=y_test, \n",
+    "                                          model_parameters=Classifier.best_estimator_.get_params())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Performance that our train data and test data are similar-that's great\n",
+    "\n",
+    "we will visualise how these look in a scattor plot."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<span style=\"color:red\">In the case of having 100% on both train and test data, bootstrapping and k-fold cross validation should usually provide more reliable performance numbers. I sent code on k-folds earlier.</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# plotting a graph of the true values vs the predicted values for the training and test datasets\n",
+    "def plot_y_yhat_scatter(y_actual,y_predicted,train_test):\n",
+    "    ax = sns.regplot(x=y_actual, y=y_predicted, fit_reg=False)\n",
+    "    ax.set_xlabel('true values')\n",
+    "    ax.set_ylabel('predicted values')\n",
+    "    ax.set_title('Relationship between true and predicted Level of Lung cancer: '+train_test+' results')\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plot_y_yhat_scatter(y_train, trained_Classifier.predict(X_train),train_test = \"training\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 74,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plot_y_yhat_scatter(y_test, trained_Classifier.predict(X_test),train_test = \"test\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "the relationship between our predected values and true values is high(our data is fit)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  Feature Importance\n",
+    "\n",
+    "We can look at which features are driving our model's predictions by examining the feature importance."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 75,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.        , 0.        , 0.06980933, 0.        , 0.        ,\n",
+       "       0.        , 0.        , 0.        , 0.        , 0.11364043,\n",
+       "       0.        , 0.        , 0.        , 0.41225448, 0.        ,\n",
+       "       0.        , 0.        , 0.26339052, 0.        , 0.02929909,\n",
+       "       0.        , 0.        , 0.11160615])"
+      ]
+     },
+     "execution_count": 75,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "importances = trained_Classifier.feature_importances_\n",
+    "importances"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<pre>\n",
+    "0.06980933--->air_pollution \n",
+    "0.11364043--->obesity\n",
+    "0.41225448--->coughing_of_blood \n",
+    "0.26339052--->wheezing\n",
+    "0.02929909--->clubbing_of_finger_nails \n",
+    "0.11160615--->snoring \n",
+    "this are the most important features in predecting Lung cancer\n",
+    "<pre>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['age', 'gender', 'air_pollution', 'alcohol_use', 'dust_allergy',\n",
+       "       'occupational_hazards', 'genetic_risk', 'chronic_lung_disease',\n",
+       "       'balanced_diet', 'obesity', 'smoking', 'passive_smoker', 'chest_pain',\n",
+       "       'coughing_of_blood', 'fatigue', 'weight_loss', 'shortness_of_breath',\n",
+       "       'wheezing', 'swallowing_difficulty', 'clubbing_of_finger_nails',\n",
+       "       'frequent_cold', 'dry_cough', 'snoring'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 864x504 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Get the feature importances from our final trained model...\n",
+    "importances = trained_Classifier.feature_importances_\n",
+    "\n",
+    "# Find the indices of the feature importances in descending order\n",
+    "indices = np.argsort(importances)[::-1]\n",
+    "\n",
+    "# Plotting a bar chart of feature importances in descending order\n",
+    "plt.figure(figsize=(12,7))\n",
+    "sns.barplot(y=X_train.columns[indices],x=importances[indices]);"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<span style=\"color:red\"> Interesting that air pollution makes it to the list this time, as opposed to the tree from earlier. What are the parameters that you changed to make this happen? Why did you change the parameters?</span>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Feature ranking:\n",
+      "1. feature 9 (0.104505)\n",
+      "2. feature 13 (0.092689)\n",
+      "3. feature 11 (0.064121)\n",
+      "4. feature 8 (0.061259)\n",
+      "5. feature 17 (0.053987)\n",
+      "6. feature 3 (0.053947)\n",
+      "7. feature 14 (0.051066)\n",
+      "8. feature 18 (0.046122)\n",
+      "9. feature 4 (0.045952)\n",
+      "10. feature 10 (0.042849)\n",
+      "11. feature 6 (0.042549)\n",
+      "12. feature 2 (0.040003)\n",
+      "13. feature 16 (0.038493)\n",
+      "14. feature 22 (0.037226)\n",
+      "15. feature 12 (0.034225)\n",
+      "16. feature 19 (0.033471)\n",
+      "17. feature 5 (0.032807)\n",
+      "18. feature 7 (0.032128)\n",
+      "19. feature 20 (0.031667)\n",
+      "20. feature 15 (0.030936)\n",
+      "21. feature 21 (0.024520)\n",
+      "22. feature 0 (0.004499)\n",
+      "23. feature 1 (0.000979)\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from sklearn.datasets import make_classification\n",
+    "from sklearn.ensemble import ExtraTreesClassifier\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Build a forest and compute the feature importances\n",
+    "forest = ExtraTreesClassifier(n_estimators=250,\n",
+    "                              random_state=0)\n",
+    "\n",
+    "forest.fit(X, y)\n",
+    "importances = forest.feature_importances_\n",
+    "std = np.std([tree.feature_importances_ for tree in forest.estimators_],\n",
+    "             axis=0)\n",
+    "indices = np.argsort(importances)[::-1]\n",
+    "\n",
+    "# Print the feature ranking\n",
+    "print(\"Feature ranking:\")\n",
+    "\n",
+    "for f in range(X.shape[1]):\n",
+    "    print(\"%d. feature %d (%f)\" % (f + 1, indices[f], importances[indices[f]]))\n",
+    "\n",
+    "# Plot the feature importances of the forest\n",
+    "plt.figure()\n",
+    "plt.title(\"Feature importances\")\n",
+    "plt.bar(range(X_train.shape[1]), importances[indices],\n",
+    "       color=\"r\", yerr=std[indices], align=\"center\")\n",
+    "plt.xticks(range(X_train.shape[1]), indices)\n",
+    "plt.xlim([-1, X.shape[1]])\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Advanced Material: Optimising the algorithm\n",
+    "<a id='AdvancedCV'></a>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### K-folds example for finding optimal parameters "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "K-folds is a method of evaluating and tuning a model on the given dataset without overfitting to either the training dataset or the testing dataset. It finds the optimal balance between bias and variance in the model. \n",
+    "\n",
+    "Below we show how the model performs on the training and test datasets while varying the max tree depth. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Text(0.5,0,'Max depth of the tree')"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# define max depth range\n",
+    "depth_range = np.asarray(range(2,22,2))\n",
+    "\n",
+    "# initialize empty arrays to store the results\n",
+    "scores_train = np.zeros(len(depth_range))\n",
+    "scores_test = np.zeros(len(depth_range))\n",
+    "\n",
+    "for i in range(len(depth_range)):\n",
+    "    # train DTR with given max depth\n",
+    "    dt_Classifier = DecisionTreeClassifier(max_depth=depth_range[i], random_state=42)\n",
+    "    model = dt_Classifier.fit(X_train, y_train)\n",
+    "    # evaluate on both training and test datasets\n",
+    "    scores_train[i] = model.score(X_train, y_train)\n",
+    "    scores_test[i] = model.score(X_test, y_test)\n",
+    "\n",
+    "# plot the results on the same graph\n",
+    "ax = sns.regplot(x=depth_range, y=scores_train, order=3, ci=None,label='train')\n",
+    "sns.regplot(x=depth_range, y=scores_test,order=3, ci=None, label='test', ax=ax)\n",
+    "ax.legend(loc='best')\n",
+    "ax.set_ylabel('R2 from regression between true and predicted values')\n",
+    "ax.set_xlabel('Max depth of the tree')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "As the depth increases:\n",
+    "Both of training score and test score are increasing similary, we are not in the case of overfitting\n",
+    "\n",
+    "This method does the following:\n",
+    "- Splits the dataset K equal random subsests\n",
+    "- Trains the data on K-1 subsets\n",
+    "- Evaluates performance on Kth left out subset\n",
+    "- Stores evaluation metric\n",
+    "- Repeats for K times for each random subset\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# initialize empty array to store results\n",
+    "scores_cv = np.empty(len(depth_range))\n",
+    "for i in range(len(depth_range)):\n",
+    "    # initialize model\n",
+    "    dt_Classifier = DecisionTreeClassifier(max_depth=depth_range[i], random_state=42)\n",
+    "    # calculate the cross val scores. This returns an array where each element corresponds to the performance on each k-fold.\n",
+    "    cv_scores = cross_val_score(dt_Classifier, X_train, y_train,cv=5, n_jobs=-1)\n",
+    "    # calculate mean cross validation score and save\n",
+    "    scores_cv[i] = np.mean(cv_scores)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<Figure size 432x288 with 1 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# plot results\n",
+    "ax = sns.regplot(x=depth_range, y=scores_cv, ci=None, order=3);\n",
+    "ax.set_xlabel('Max depth of the tree');\n",
+    "ax.set_ylabel('Average cross validated R2');"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}