[c9d045]: / notebooks / exploratory_data_analysis.ipynb

Download this file

1315 lines (1314 with data), 76.5 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Exploratory data analysis"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from matplotlib import pyplot as plt\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "from tabulate import tabulate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loading data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ptnum</th>\n",
       "      <th>label</th>\n",
       "      <th>scc</th>\n",
       "      <th>C-103579009</th>\n",
       "      <th>C-125680007</th>\n",
       "      <th>C-186034007</th>\n",
       "      <th>C-263495000</th>\n",
       "      <th>C-398070004</th>\n",
       "      <th>C-424144002</th>\n",
       "      <th>C-72514-3</th>\n",
       "      <th>...</th>\n",
       "      <th>C-92140-3</th>\n",
       "      <th>C-92141-1</th>\n",
       "      <th>C-92142-9</th>\n",
       "      <th>C-94040-3</th>\n",
       "      <th>C-94531-1</th>\n",
       "      <th>C-979092</th>\n",
       "      <th>C-993452</th>\n",
       "      <th>C-997501</th>\n",
       "      <th>C-999998</th>\n",
       "      <th>C-999999</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>p17767.2</td>\n",
       "      <td>0</td>\n",
       "      <td>101</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>p3832.1</td>\n",
       "      <td>0</td>\n",
       "      <td>110</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>normal</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>p10784.2</td>\n",
       "      <td>0</td>\n",
       "      <td>127</td>\n",
       "      <td>black</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>p17630</td>\n",
       "      <td>0</td>\n",
       "      <td>129</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>p17983</td>\n",
       "      <td>1</td>\n",
       "      <td>69</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 785 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      ptnum  label  scc C-103579009 C-125680007  C-186034007 C-263495000  \\\n",
       "0  p17767.2      0  101       white           m  nonhispanic           m   \n",
       "1   p3832.1      0  110       white           m  nonhispanic           m   \n",
       "2  p10784.2      0  127       black           m  nonhispanic           m   \n",
       "3    p17630      0  129       white           m  nonhispanic           m   \n",
       "4    p17983      1   69       white           m  nonhispanic           m   \n",
       "\n",
       "     C-398070004 C-424144002 C-72514-3  ... C-92140-3 C-92141-1 C-92142-9  \\\n",
       "0  massachusetts       50t70  abnormal  ...       NaN       NaN       NaN   \n",
       "1  massachusetts       50t70    normal  ...       NaN       NaN       NaN   \n",
       "2  massachusetts       50t70  abnormal  ...       NaN       NaN       NaN   \n",
       "3  massachusetts       50t70  abnormal  ...       NaN       NaN       NaN   \n",
       "4  massachusetts       50t70  abnormal  ...       NaN       NaN       NaN   \n",
       "\n",
       "  C-94040-3 C-94531-1 C-979092 C-993452 C-997501 C-999998 C-999999  \n",
       "0       NaN       NaN      NaN      NaN      NaN      NaN      NaN  \n",
       "1       NaN       NaN      NaN      NaN      NaN      NaN      NaN  \n",
       "2       NaN       NaN      NaN      NaN      NaN      NaN      NaN  \n",
       "3       NaN       NaN      NaN      NaN      NaN      NaN      NaN  \n",
       "4       NaN       NaN      NaN      NaN      NaN      NaN      NaN  \n",
       "\n",
       "[5 rows x 785 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('../data/learning_data.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**We will be predicting value in column 'label' (1 - patient diagnosed with lung cancer, 0 - patient undiagnosed ) using the other columns as features.**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 9048 entries, 0 to 9047\n",
      "Columns: 785 entries, ptnum to C-999999\n",
      "dtypes: float64(59), int64(2), object(724)\n",
      "memory usage: 54.2+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>scc</th>\n",
       "      <th>C-75443-2</th>\n",
       "      <th>C-84215-3</th>\n",
       "      <th>C-26453-1</th>\n",
       "      <th>C-26464-8</th>\n",
       "      <th>C-26515-7</th>\n",
       "      <th>C-30385-9</th>\n",
       "      <th>C-30428-7</th>\n",
       "      <th>C-33037-3</th>\n",
       "      <th>...</th>\n",
       "      <th>C-713-8</th>\n",
       "      <th>C-727711</th>\n",
       "      <th>C-731-0</th>\n",
       "      <th>C-736-9</th>\n",
       "      <th>C-742-7</th>\n",
       "      <th>C-751-8</th>\n",
       "      <th>C-770-8</th>\n",
       "      <th>C-82078001</th>\n",
       "      <th>C-86849004</th>\n",
       "      <th>C-86964003</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>9048.000000</td>\n",
       "      <td>9048.000000</td>\n",
       "      <td>16.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>151.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>256.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>256.000000</td>\n",
       "      <td>256.00000</td>\n",
       "      <td>256.000000</td>\n",
       "      <td>256.000000</td>\n",
       "      <td>256.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.252874</td>\n",
       "      <td>103.895999</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.125166</td>\n",
       "      <td>13.175497</td>\n",
       "      <td>308.200662</td>\n",
       "      <td>13.006623</td>\n",
       "      <td>87.692715</td>\n",
       "      <td>8.339073</td>\n",
       "      <td>...</td>\n",
       "      <td>4.513672</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.945664</td>\n",
       "      <td>15.17957</td>\n",
       "      <td>0.948047</td>\n",
       "      <td>2.679688</td>\n",
       "      <td>27.573555</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.434683</td>\n",
       "      <td>21.988655</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.379863</td>\n",
       "      <td>1.223817</td>\n",
       "      <td>88.495507</td>\n",
       "      <td>0.899605</td>\n",
       "      <td>4.636940</td>\n",
       "      <td>3.508351</td>\n",
       "      <td>...</td>\n",
       "      <td>0.255184</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.154276</td>\n",
       "      <td>1.67533</td>\n",
       "      <td>0.075522</td>\n",
       "      <td>0.225392</td>\n",
       "      <td>3.014690</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>11.100000</td>\n",
       "      <td>155.300000</td>\n",
       "      <td>11.600000</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>3.740000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>10.32000</td>\n",
       "      <td>0.750000</td>\n",
       "      <td>2.080000</td>\n",
       "      <td>18.610000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.800000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>232.950000</td>\n",
       "      <td>12.150000</td>\n",
       "      <td>83.600000</td>\n",
       "      <td>5.500000</td>\n",
       "      <td>...</td>\n",
       "      <td>4.370000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.980000</td>\n",
       "      <td>14.08250</td>\n",
       "      <td>0.900000</td>\n",
       "      <td>2.540000</td>\n",
       "      <td>25.457500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>107.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.100000</td>\n",
       "      <td>13.400000</td>\n",
       "      <td>304.100000</td>\n",
       "      <td>13.100000</td>\n",
       "      <td>88.100000</td>\n",
       "      <td>8.200000</td>\n",
       "      <td>...</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.19000</td>\n",
       "      <td>0.950000</td>\n",
       "      <td>2.690000</td>\n",
       "      <td>27.840000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>116.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.400000</td>\n",
       "      <td>14.300000</td>\n",
       "      <td>386.550000</td>\n",
       "      <td>13.800000</td>\n",
       "      <td>91.400000</td>\n",
       "      <td>11.350000</td>\n",
       "      <td>...</td>\n",
       "      <td>4.680000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.020000</td>\n",
       "      <td>16.38500</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.830000</td>\n",
       "      <td>29.502500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>190.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.900000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>449.900000</td>\n",
       "      <td>14.600000</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>14.900000</td>\n",
       "      <td>...</td>\n",
       "      <td>5.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.090000</td>\n",
       "      <td>20.15000</td>\n",
       "      <td>1.160000</td>\n",
       "      <td>3.260000</td>\n",
       "      <td>34.850000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 61 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "             label          scc  C-75443-2  C-84215-3   C-26453-1   C-26464-8  \\\n",
       "count  9048.000000  9048.000000       16.0       16.0  151.000000  151.000000   \n",
       "mean      0.252874   103.895999        1.0        1.0    5.125166   13.175497   \n",
       "std       0.434683    21.988655        0.0        0.0    0.379863    1.223817   \n",
       "min       0.000000     9.000000        1.0        1.0    4.500000   11.100000   \n",
       "25%       0.000000   100.000000        1.0        1.0    4.800000   12.000000   \n",
       "50%       0.000000   107.000000        1.0        1.0    5.100000   13.400000   \n",
       "75%       1.000000   116.000000        1.0        1.0    5.400000   14.300000   \n",
       "max       1.000000   190.000000        1.0        1.0    5.900000   15.000000   \n",
       "\n",
       "        C-26515-7   C-30385-9   C-30428-7   C-33037-3  ...     C-713-8  \\\n",
       "count  151.000000  151.000000  151.000000  151.000000  ...  256.000000   \n",
       "mean   308.200662   13.006623   87.692715    8.339073  ...    4.513672   \n",
       "std     88.495507    0.899605    4.636940    3.508351  ...    0.255184   \n",
       "min    155.300000   11.600000   80.000000    2.000000  ...    3.740000   \n",
       "25%    232.950000   12.150000   83.600000    5.500000  ...    4.370000   \n",
       "50%    304.100000   13.100000   88.100000    8.200000  ...    4.500000   \n",
       "75%    386.550000   13.800000   91.400000   11.350000  ...    4.680000   \n",
       "max    449.900000   14.600000   96.000000   14.900000  ...    5.240000   \n",
       "\n",
       "       C-727711     C-731-0    C-736-9     C-742-7     C-751-8     C-770-8  \\\n",
       "count       0.0  256.000000  256.00000  256.000000  256.000000  256.000000   \n",
       "mean        NaN    0.945664   15.17957    0.948047    2.679688   27.573555   \n",
       "std         NaN    0.154276    1.67533    0.075522    0.225392    3.014690   \n",
       "min         NaN    0.500000   10.32000    0.750000    2.080000   18.610000   \n",
       "25%         NaN    0.980000   14.08250    0.900000    2.540000   25.457500   \n",
       "50%         NaN    1.000000   15.19000    0.950000    2.690000   27.840000   \n",
       "75%         NaN    1.020000   16.38500    1.000000    2.830000   29.502500   \n",
       "max         NaN    1.090000   20.15000    1.160000    3.260000   34.850000   \n",
       "\n",
       "       C-82078001  C-86849004  C-86964003  \n",
       "count         0.0         0.0         0.0  \n",
       "mean          NaN         NaN         NaN  \n",
       "std           NaN         NaN         NaN  \n",
       "min           NaN         NaN         NaN  \n",
       "25%           NaN         NaN         NaN  \n",
       "50%           NaN         NaN         NaN  \n",
       "75%           NaN         NaN         NaN  \n",
       "max           NaN         NaN         NaN  \n",
       "\n",
       "[8 rows x 61 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Nulls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "First 10 columns with highest percentage of nulls:\n",
      "     Column         Nulls\n",
      "---  -----------  -------\n",
      "715  C-48387007         1\n",
      "733  C-698423002        1\n",
      "753  C-82078001         1\n",
      "613  C-13569004         1\n",
      "615  C-1373463          1\n",
      "742  C-727711           1\n",
      "616  C-14152002         1\n",
      "689  C-313572           1\n",
      "620  C-161621004        1\n",
      "610  C-113076002        1\n",
      "738  C-707418001        1\n",
      "765  C-86849004         1\n",
      "766  C-86964003         1\n",
      "699  C-406602003        1\n",
      "681  C-288328004        1\n",
      "700  C-408512008        1\n",
      "702  C-427089005        1\n",
      "652  C-232657004        1\n",
      "707  C-444260001        1\n",
      "708  C-448417001        1\n",
      "647  C-205532           1\n",
      "709  C-448813005        1\n",
      "717  C-52734007         1\n",
      "640  C-198767           1\n",
      "633  C-190905008        1\n"
     ]
    }
   ],
   "source": [
    "null_percentages = df.isnull().mean().reset_index()\n",
    "null_percentages.sort_values(by=0, ascending=False, inplace=True)\n",
    "null_percentages.columns = ['Column', 'Nulls']\n",
    "print('First 10 columns with highest percentage of nulls:')\n",
    "print(tabulate(null_percentages.head(25), headers='keys', tablefmt='simple'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In feature engineering we will drop the columns with only null values. Other columns with null values will be either imputed or dropped based on the percentage of null values."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Another not neccesary column is ptnum, which is just a patient number"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.drop('ptnum', axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Decoding column names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>scc</th>\n",
       "      <th>race</th>\n",
       "      <th>marital</th>\n",
       "      <th>ethnic</th>\n",
       "      <th>gender</th>\n",
       "      <th>state</th>\n",
       "      <th>age</th>\n",
       "      <th>Pain severity - 0-10 verbal numeric rating [Score] - Reported</th>\n",
       "      <th>Influenza  seasonal  injectable  preservative free</th>\n",
       "      <th>...</th>\n",
       "      <th>Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n",
       "      <th>Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n",
       "      <th>Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n",
       "      <th>Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n",
       "      <th>SARS-CoV-2 RNA Pnl Resp NAA+probe</th>\n",
       "      <th>Hydroxychloroquine Sulfate 200 MG Oral Tablet</th>\n",
       "      <th>1 ML denosumab 60 MG/ML Prefilled Syringe</th>\n",
       "      <th>Fexofenadine hydrochloride 60 MG Oral Tablet</th>\n",
       "      <th>Leronlimab 700 MG Injection</th>\n",
       "      <th>Lenzilumab 200 MG IV</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>101</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>110</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>normal</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>127</td>\n",
       "      <td>black</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>129</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>69</td>\n",
       "      <td>white</td>\n",
       "      <td>m</td>\n",
       "      <td>nonhispanic</td>\n",
       "      <td>m</td>\n",
       "      <td>massachusetts</td>\n",
       "      <td>50t70</td>\n",
       "      <td>abnormal</td>\n",
       "      <td>True</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 784 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   label  scc   race marital       ethnic gender          state    age  \\\n",
       "0      0  101  white       m  nonhispanic      m  massachusetts  50t70   \n",
       "1      0  110  white       m  nonhispanic      m  massachusetts  50t70   \n",
       "2      0  127  black       m  nonhispanic      m  massachusetts  50t70   \n",
       "3      0  129  white       m  nonhispanic      m  massachusetts  50t70   \n",
       "4      1   69  white       m  nonhispanic      m  massachusetts  50t70   \n",
       "\n",
       "  Pain severity - 0-10 verbal numeric rating [Score] - Reported  \\\n",
       "0                                           abnormal              \n",
       "1                                             normal              \n",
       "2                                           abnormal              \n",
       "3                                           abnormal              \n",
       "4                                           abnormal              \n",
       "\n",
       "  Influenza  seasonal  injectable  preservative free  ...  \\\n",
       "0                                               True  ...   \n",
       "1                                               True  ...   \n",
       "2                                               True  ...   \n",
       "3                                               True  ...   \n",
       "4                                               True  ...   \n",
       "\n",
       "  Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection  \\\n",
       "0                                                NaN                                         \n",
       "1                                                NaN                                         \n",
       "2                                                NaN                                         \n",
       "3                                                NaN                                         \n",
       "4                                                NaN                                         \n",
       "\n",
       "  Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection  \\\n",
       "0                                                NaN                                     \n",
       "1                                                NaN                                     \n",
       "2                                                NaN                                     \n",
       "3                                                NaN                                     \n",
       "4                                                NaN                                     \n",
       "\n",
       "  Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection  \\\n",
       "0                                                NaN                                     \n",
       "1                                                NaN                                     \n",
       "2                                                NaN                                     \n",
       "3                                                NaN                                     \n",
       "4                                                NaN                                     \n",
       "\n",
       "  Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection  \\\n",
       "0                                                NaN                                        \n",
       "1                                                NaN                                        \n",
       "2                                                NaN                                        \n",
       "3                                                NaN                                        \n",
       "4                                                NaN                                        \n",
       "\n",
       "  SARS-CoV-2 RNA Pnl Resp NAA+probe  \\\n",
       "0                               NaN   \n",
       "1                               NaN   \n",
       "2                               NaN   \n",
       "3                               NaN   \n",
       "4                               NaN   \n",
       "\n",
       "  Hydroxychloroquine Sulfate 200 MG Oral Tablet  \\\n",
       "0                                           NaN   \n",
       "1                                           NaN   \n",
       "2                                           NaN   \n",
       "3                                           NaN   \n",
       "4                                           NaN   \n",
       "\n",
       "  1 ML denosumab 60 MG/ML Prefilled Syringe  \\\n",
       "0                                       NaN   \n",
       "1                                       NaN   \n",
       "2                                       NaN   \n",
       "3                                       NaN   \n",
       "4                                       NaN   \n",
       "\n",
       "  Fexofenadine hydrochloride 60 MG Oral Tablet Leronlimab 700 MG Injection  \\\n",
       "0                                          NaN                         NaN   \n",
       "1                                          NaN                         NaN   \n",
       "2                                          NaN                         NaN   \n",
       "3                                          NaN                         NaN   \n",
       "4                                          NaN                         NaN   \n",
       "\n",
       "  Lenzilumab 200 MG IV  \n",
       "0                  NaN  \n",
       "1                  NaN  \n",
       "2                  NaN  \n",
       "3                  NaN  \n",
       "4                  NaN  \n",
       "\n",
       "[5 rows x 784 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_codes = pd.read_csv('../data/codes.csv')\n",
    "code_to_name = df_codes.set_index('code')['name'].to_dict()\n",
    "df = df.rename(columns=code_to_name)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['label', 'scc', 'race', 'marital', 'ethnic', 'gender', 'state', 'age',\n",
      "       'Pain severity - 0-10 verbal numeric rating [Score] - Reported',\n",
      "       'Influenza  seasonal  injectable  preservative free',\n",
      "       ...\n",
      "       'Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection',\n",
      "       'Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection',\n",
      "       'Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection',\n",
      "       'Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection',\n",
      "       'SARS-CoV-2 RNA Pnl Resp NAA+probe',\n",
      "       'Hydroxychloroquine Sulfate 200 MG Oral Tablet',\n",
      "       '1 ML denosumab 60 MG/ML Prefilled Syringe',\n",
      "       'Fexofenadine hydrochloride 60 MG Oral Tablet',\n",
      "       'Leronlimab 700 MG Injection', 'Lenzilumab 200 MG IV'],\n",
      "      dtype='object', length=784)\n"
     ]
    }
   ],
   "source": [
    "column_names = df.columns\n",
    "print(column_names)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Destribution of categorical values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_columns = df.select_dtypes(include = ['object'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(9048, 784)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "DataFrame after combining columns with the same name:\n",
      "     Glucose Throat culture (procedure)  \\\n",
      "0     normal                        NaN   \n",
      "1     normal                        NaN   \n",
      "2     normal                        NaN   \n",
      "3     normal                        NaN   \n",
      "4     normal                        NaN   \n",
      "...      ...                        ...   \n",
      "9043  normal                        NaN   \n",
      "9044  normal                       True   \n",
      "9045  normal                        NaN   \n",
      "9046  normal                        NaN   \n",
      "9047  normal                       True   \n",
      "\n",
      "     Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma  \\\n",
      "0                                                   NaN                        \n",
      "1                                              abnormal                        \n",
      "2                                              abnormal                        \n",
      "3                                              abnormal                        \n",
      "4                                                   NaN                        \n",
      "...                                                 ...                        \n",
      "9043                                           abnormal                        \n",
      "9044                                           abnormal                        \n",
      "9045                                           abnormal                        \n",
      "9046                                                NaN                        \n",
      "9047                                           abnormal                        \n",
      "\n",
      "     lisinopril 10 MG Oral Tablet History of appendectomy  \\\n",
      "0                            True                     NaN   \n",
      "1                             NaN                     NaN   \n",
      "2                             NaN                     NaN   \n",
      "3                            True                     NaN   \n",
      "4                            True                     NaN   \n",
      "...                           ...                     ...   \n",
      "9043                          NaN                     NaN   \n",
      "9044                          NaN                     NaN   \n",
      "9045                          NaN                     NaN   \n",
      "9046                         True                     NaN   \n",
      "9047                          NaN                     NaN   \n",
      "\n",
      "     clonazePAM 0.25 MG Oral Tablet  \\\n",
      "0                               NaN   \n",
      "1                               NaN   \n",
      "2                               NaN   \n",
      "3                               NaN   \n",
      "4                               NaN   \n",
      "...                             ...   \n",
      "9043                            NaN   \n",
      "9044                            NaN   \n",
      "9045                            NaN   \n",
      "9046                            NaN   \n",
      "9047                            NaN   \n",
      "\n",
      "     Speech and language therapy regime (regime/therapy  \\\n",
      "0                                                  True   \n",
      "1                                                   NaN   \n",
      "2                                                  True   \n",
      "3                                                  True   \n",
      "4                                                   NaN   \n",
      "...                                                 ...   \n",
      "9043                                                NaN   \n",
      "9044                                                NaN   \n",
      "9045                                                NaN   \n",
      "9046                                                NaN   \n",
      "9047                                                NaN   \n",
      "\n",
      "     remdesivir 100 MG Injection losartan potassium 25 MG Oral Tablet  \\\n",
      "0                            NaN                                  NaN   \n",
      "1                            NaN                                  NaN   \n",
      "2                            NaN                                  NaN   \n",
      "3                            NaN                                  NaN   \n",
      "4                            NaN                                  NaN   \n",
      "...                          ...                                  ...   \n",
      "9043                         NaN                                  NaN   \n",
      "9044                         NaN                                  NaN   \n",
      "9045                         NaN                                  NaN   \n",
      "9046                         NaN                                  NaN   \n",
      "9047                         NaN                                  NaN   \n",
      "\n",
      "     Headache (finding)  ... Specific gravity of Urine by Test strip  \\\n",
      "0                   NaN  ...                                     NaN   \n",
      "1                   NaN  ...                                     NaN   \n",
      "2                   NaN  ...                                     NaN   \n",
      "3                   NaN  ...                                     NaN   \n",
      "4                   NaN  ...                                     NaN   \n",
      "...                 ...  ...                                     ...   \n",
      "9043                NaN  ...                                     NaN   \n",
      "9044                NaN  ...                                     NaN   \n",
      "9045                NaN  ...                                     NaN   \n",
      "9046                NaN  ...                                     NaN   \n",
      "9047                NaN  ...                                     NaN   \n",
      "\n",
      "     Iron binding capacity [Mass/volume] in Serum or Plasma  \\\n",
      "0                                                   NaN       \n",
      "1                                                normal       \n",
      "2                                                normal       \n",
      "3                                                normal       \n",
      "4                                                   NaN       \n",
      "...                                                 ...       \n",
      "9043                                                NaN       \n",
      "9044                                             normal       \n",
      "9045                                             normal       \n",
      "9046                                                NaN       \n",
      "9047                                             normal       \n",
      "\n",
      "     sacubitril 97 MG / valsartan 103 MG Oral Tablet  \\\n",
      "0                                                NaN   \n",
      "1                                                NaN   \n",
      "2                                               True   \n",
      "3                                                NaN   \n",
      "4                                                NaN   \n",
      "...                                              ...   \n",
      "9043                                             NaN   \n",
      "9044                                             NaN   \n",
      "9045                                            True   \n",
      "9046                                             NaN   \n",
      "9047                                             NaN   \n",
      "\n",
      "     Catheter ablation of tissue of heart Bilateral tubal ligation  \\\n",
      "0                                     NaN                      NaN   \n",
      "1                                     NaN                      NaN   \n",
      "2                                     NaN                      NaN   \n",
      "3                                     NaN                      NaN   \n",
      "4                                     NaN                      NaN   \n",
      "...                                   ...                      ...   \n",
      "9043                                  NaN                      NaN   \n",
      "9044                                  NaN                      NaN   \n",
      "9045                                  NaN                      NaN   \n",
      "9046                                  NaN                      NaN   \n",
      "9047                                  NaN                      NaN   \n",
      "\n",
      "      History of amputation of foot (situation)  \\\n",
      "0                                           NaN   \n",
      "1                                           NaN   \n",
      "2                                           NaN   \n",
      "3                                           NaN   \n",
      "4                                           NaN   \n",
      "...                                         ...   \n",
      "9043                                        NaN   \n",
      "9044                                        NaN   \n",
      "9045                                        NaN   \n",
      "9046                                        NaN   \n",
      "9047                                        NaN   \n",
      "\n",
      "     Are you covered by health insurance or some other kind of health care plan [PhenX]  \\\n",
      "0                                                   NaN                                   \n",
      "1                                                   NaN                                   \n",
      "2                                                   NaN                                   \n",
      "3                                                   NaN                                   \n",
      "4                                                   NaN                                   \n",
      "...                                                 ...                                   \n",
      "9043                                                NaN                                   \n",
      "9044                                                NaN                                   \n",
      "9045                                                NaN                                   \n",
      "9046                                                NaN                                   \n",
      "9047                                                NaN                                   \n",
      "\n",
      "     White oak IgE Ab in Serum Ferritin [Mass/volume] in Serum or Plasma  \\\n",
      "0                          NaN                                       NaN   \n",
      "1                          NaN                                    normal   \n",
      "2                          NaN                                    normal   \n",
      "3                          NaN                                    normal   \n",
      "4                          NaN                                       NaN   \n",
      "...                        ...                                       ...   \n",
      "9043                       NaN                                       NaN   \n",
      "9044                       NaN                                    normal   \n",
      "9045                       NaN                                    normal   \n",
      "9046                       NaN                                       NaN   \n",
      "9047                       NaN                                    normal   \n",
      "\n",
      "      Brain damage - traumatic  \n",
      "0                          NaN  \n",
      "1                          NaN  \n",
      "2                          NaN  \n",
      "3                          NaN  \n",
      "4                          NaN  \n",
      "...                        ...  \n",
      "9043                       NaN  \n",
      "9044                       NaN  \n",
      "9045                       NaN  \n",
      "9046                       NaN  \n",
      "9047                       NaN  \n",
      "\n",
      "[9048 rows x 779 columns]\n"
     ]
    }
   ],
   "source": [
    "columns = df.columns\n",
    "unique_columns = list(set(columns))  # Get unique column names\n",
    "\n",
    "# Create a new DataFrame with combined columns\n",
    "df_combined = pd.DataFrame()\n",
    "\n",
    "# Iterate over the unique columns to combine and merge appropriately\n",
    "for col in unique_columns:\n",
    "    if list(columns).count(col) > 1:  # If the column name is duplicated\n",
    "        # Find all columns with this name and combine them, e.g., with first non-null\n",
    "        combined_series = df[[col]].bfill(axis=1).iloc[:, 0]  # Backfill to combine\n",
    "        df_combined[col] = combined_series\n",
    "    else:\n",
    "        df_combined[col] = df[col]  # If it's unique, just copy the column\n",
    "\n",
    "print(\"\\nDataFrame after combining columns with the same name:\")\n",
    "print(df_combined)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df_combined.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.drop(columns=['race', 'ethnic'], inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Distribution of label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjMAAAHACAYAAABXvOnoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAABHDklEQVR4nO3deVxU9f7H8fcoMKAihsoqKiJuoeaeWO7gflNLr5K5ZT+9uC/XMrtXMoOiMitTs8wlNdvUunZLMZVyyXDLNZdSxIVwBdxA4Pz+6MHcJlwYAoax1/PxmMej8z3f+c5nBmjefs/3nGMyDMMQAACAgypl7wIAAAD+DMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDO4ZixYtkslkkqurqxITE/Psb9u2rUJCQuxQmbRp0yaZTCZ9+umndnl9W504cULdunWTp6enTCaTxo0bd9u+1atXV/fu3Yusljlz5mjRokVFNr4ju3jxovr16ycvLy+ZTCb17Nnztn0L8/c/929tx44dhTLe78c8ceJEoY2Jvw4nexcAFLaMjAw999xz+uCDD+xdisMaP368tm/frvfff18+Pj7y9fW1Wy1z5sxRpUqVNHjwYLvVUFK98MILWrVqld5//30FBQXJ09PT3iUBdkGYwT2nc+fOWr58uSZNmqSGDRvau5xidf36dbm6uspkMv2pcfbv36/mzZvf8V/6sL/9+/crKChIjz/+uL1LAeyKw0y450yePFkVK1bU008/fcd+J06ckMlkuuUhDJPJpKioKMt2VFSUTCaT9u7dqz59+sjDw0Oenp6aMGGCsrKydPjwYXXu3Fnu7u6qXr26YmNjb/maN27c0IQJE+Tj4yM3Nze1adNGu3fvztNvx44d+tvf/iZPT0+5urqqUaNG+vjjj6365E7Lr1u3TkOHDlXlypVVpkwZZWRk3PY9nzx5UgMGDJCXl5fMZrPq1q2r1157TTk5OZL+dzjs2LFj+uqrr2QymWye+s/9XF999VXNnDlTgYGBKleunFq2bKnvv//equ8vv/yifv36yc/PT2azWd7e3urQoYP27Nkj6bdDWAcOHFB8fLyllurVq1s+y4kTJ+qBBx6w/Dxatmypzz//PE9NJpNJo0aN0gcffKC6deuqTJkyatiwodasWZOn708//aT+/fvL29tbZrNZVatW1cCBA60+1+TkZA0fPlxVqlSRi4uLAgMD9fzzzysrK8tqrLlz56phw4YqV66c3N3dVadOHT377LN3/QwvXryoyMhI+fv7y8XFRTVq1NDUqVMtNeR+xuvXr9ehQ4csn82mTZvuOvad7NixQ/369VP16tXl5uam6tWrq3///rc8bCtJly5d0pAhQ+Tp6amyZcuqR48e+uWXX/L0W79+vTp06KDy5curTJkyatWqlb755pu71rN79251797d8vvq5+enbt266dSpU3/qfeLew8wM7jnu7u567rnnNHbsWG3YsEHt27cvtLH79u2rAQMGaPjw4YqLi1NsbKxu3ryp9evXKzIyUpMmTdLy5cv19NNPq2bNmurdu7fV85999lk1btxY7733nlJTUxUVFaW2bdtq9+7dqlGjhiRp48aN6ty5s1q0aKF58+bJw8NDK1as0N///nddu3Ytz+GWoUOHqlu3bvrggw909epVOTs737L2c+fOKTQ0VJmZmXrhhRdUvXp1rVmzRpMmTdLPP/+sOXPmqHHjxtq2bZt69eqloKAgvfrqq5JUoMNMb7/9turUqaNZs2ZJkv71r3+pa9euOn78uDw8PCRJXbt2VXZ2tmJjY1W1alWdP39eW7du1eXLlyVJq1at0mOPPSYPDw/NmTNHkmQ2myX9djjx4sWLmjRpkvz9/ZWZman169erd+/eWrhwoQYOHGhVz5dffqmEhARNnz5d5cqVU2xsrHr16qXDhw9bPvsff/xRDz30kCpVqqTp06crODhYZ8+e1RdffKHMzEyZzWYlJyerefPmKlWqlP79738rKChI27Zt04wZM3TixAktXLhQkrRixQpFRkZq9OjRevXVV1WqVCkdO3ZMBw8evOPnduPGDbVr104///yznn/+eTVo0EDfffedYmJitGfPHn355Zfy9fXVtm3bFBkZqdTUVC1btkySVK9ePZt/Tr934sQJ1a5dW/369ZOnp6fOnj2ruXPnqlmzZjp48KAqVapk1f/JJ59UWFiYli9frqSkJD333HNq27at9u7dqwoVKkiSli5dqoEDB+qRRx7R4sWL5ezsrHfeeUedOnXS2rVr1aFDh1vWcvXqVYWFhSkwMFBvv/22vL29lZycrI0bNyo9Pf1PvU/cgwzgHrFw4UJDkpGQkGBkZGQYNWrUMJo2bWrk5OQYhmEYbdq0Me6//35L/+PHjxuSjIULF+YZS5Ixbdo0y/a0adMMScZrr71m1e+BBx4wJBkrV660tN28edOoXLmy0bt3b0vbxo0bDUlG48aNLfUYhmGcOHHCcHZ2NoYNG2Zpq1OnjtGoUSPj5s2bVq/VvXt3w9fX18jOzrZ6vwMHDszX5/PMM88Ykozt27dbtf/jH/8wTCaTcfjwYUtbtWrVjG7duuVr3D/2zf1c69evb2RlZVnaf/jhB0OS8eGHHxqGYRjnz583JBmzZs264/j333+/0aZNm7vWkZWVZdy8edN48sknjUaNGlntk2R4e3sbaWlplrbk5GSjVKlSRkxMjKWtffv2RoUKFYyUlJTbvs7w4cONcuXKGYmJiVbtr776qiHJOHDggGEYhjFq1CijQoUKd637j+bNm2dIMj7++GOr9pdfftmQZKxbt87S9sff6TuxpW+urKws48qVK0bZsmWNN954w9Ke+7vXq1cvq/5btmwxJBkzZswwDMMwrl69anh6eho9evSw6pednW00bNjQaN68eZ4xjx8/bhiGYezYscOQZKxevdqmmvHXxGEm3JNcXFw0Y8YM7dixI8/hmT/jj2ft1K1bVyaTSV26dLG0OTk5qWbNmrecmo+IiLBaz1KtWjWFhoZq48aNkqRjx47pp59+sqyByMrKsjy6du2qs2fP6vDhw1ZjPvroo/mqfcOGDapXr56aN29u1T548GAZhqENGzbka5z86tatm0qXLm3ZbtCggSRZPhdPT08FBQXplVde0cyZM7V7927L4a78+uSTT9SqVSuVK1dOTk5OcnZ21oIFC3To0KE8fdu1ayd3d3fLtre3t7y8vCz1XLt2TfHx8erbt68qV65829dcs2aN2rVrJz8/P6ufT+7vQHx8vCSpefPmunz5svr376/PP/9c58+fz9d72rBhg8qWLavHHnvMqj13Ri4/h2cK6sqVK5ZZRScnJzk5OalcuXK6evXqLT/TP67VCQ0NVbVq1Sy/z1u3btXFixc1aNAgq88qJydHnTt3VkJCgq5evXrLWmrWrKn77rtPTz/9tObNm3fXGS38tRFmcM/q16+fGjdurKlTp+rmzZuFMuYfzxZxcXFRmTJl5Orqmqf9xo0beZ7v4+Nzy7YLFy5Ikn799VdJ0qRJk+Ts7Gz1iIyMlKQ8X4r5PQR04cKFW/b18/Oz7C9MFStWtNrOPTx0/fp1Sb+tY/nmm2/UqVMnxcbGqnHjxqpcubLGjBmTr8MIK1euVN++feXv76+lS5dq27ZtSkhI0NChQ2/52f+xntyacuu5dOmSsrOzVaVKlTu+7q+//qr//Oc/eX4+999/v6T//XyeeOIJvf/++0pMTNSjjz4qLy8vtWjRQnFxcXcc/8KFC/Lx8cmziNvLy0tOTk6F/nP6vYiICM2ePVvDhg3T2rVr9cMPPyghIUGVK1e2fE6/l9/f58ceeyzP5/Xyyy/LMAxdvHjxlrV4eHgoPj5eDzzwgJ599lndf//98vPz07Rp0wrt7xn3DtbM4J5lMpn08ssvKywsTPPnz8+zPzeA/HHBbFF+WSQnJ9+yLfeLNndNwpQpU/Kst8lVu3Ztq+38nrlUsWJFnT17Nk/7mTNnrF67OFWrVk0LFiyQJB05ckQff/yxoqKilJmZqXnz5t3xuUuXLlVgYKA++ugjq8/gTgug78TT01OlS5e+6+LSSpUqqUGDBnrxxRdvuT83HErSkCFDNGTIEF29elXffvutpk2bpu7du+vIkSOqVq3aLZ9fsWJFbd++XYZhWL2vlJQUZWVlFdnPKTU1VWvWrNG0adP0zDPPWNpz1ybdyu1+n2vWrCnpf79Tb731lh588MFbjuHt7X3bmurXr68VK1bIMAzt3btXixYt0vTp0+Xm5mZVI8DMDO5pHTt2VFhYmKZPn64rV65Y7fP29parq6v27t1r1X6rs2EKy4cffijDMCzbiYmJ2rp1q9q2bSvpt6ASHBysH3/8UU2bNr3l4/eHSmzRoUMHHTx4ULt27bJqX7JkiUwmk9q1a1fg91UYatWqpeeee07169e3qvH3sye/ZzKZ5OLiYvWFn5ycXOCfX+7ZZZ988skdDwl1797dckr0rX4+vw8zucqWLasuXbpo6tSpyszM1IEDB247focOHXTlyhWtXr3aqn3JkiWW/UXBZDLJMAzLDFqu9957T9nZ2bd8Tu7C41xbt25VYmKi5fe5VatWqlChgg4ePHjb32cXF5d81dawYUO9/vrrqlChQp7fYYCZGdzzXn75ZTVp0kQpKSmWQwHSb/+DHDBggOWCYw0bNtQPP/yg5cuXF1ktKSkp6tWrl5566imlpqZq2rRpcnV11ZQpUyx93nnnHXXp0kWdOnXS4MGD5e/vr4sXL+rQoUPatWuXPvnkkwK99vjx47VkyRJ169ZN06dPV7Vq1fTll19qzpw5+sc//qFatWoV1tvMl71792rUqFHq06ePgoOD5eLiog0bNmjv3r1W/+rO/df5Rx99pBo1asjV1VX169dX9+7dtXLlSkVGRuqxxx5TUlKSXnjhBfn6+uro0aMFqmnmzJl66KGH1KJFCz3zzDOqWbOmfv31V33xxRd655135O7urunTpysuLk6hoaEaM2aMateurRs3bujEiRP673//q3nz5qlKlSp66qmn5ObmplatWsnX11fJycmKiYmRh4eHmjVrdtsaBg4cqLfffluDBg3SiRMnVL9+fW3evFnR0dHq2rWrOnbsWKD3JklpaWm3vAp15cqV1aZNG7Vu3VqvvPKKKlWqpOrVqys+Pl4LFiywnJn0Rzt27NCwYcPUp08fJSUlaerUqfL397ccEi1XrpzeeustDRo0SBcvXtRjjz0mLy8vnTt3Tj/++KPOnTunuXPn3nLsNWvWaM6cOerZs6dq1KghwzC0cuVKXb58WWFhYQX+DHCPsuPiY6BQ/f5spj+KiIgwJOU5myM1NdUYNmyY4e3tbZQtW9bo0aOHceLEiduezXTu3Dmr5w8aNMgoW7Zsntf745kjuWczffDBB8aYMWOMypUrG2az2Xj44YeNHTt25Hn+jz/+aPTt29fw8vIynJ2dDR8fH6N9+/bGvHnz8vV+bycxMdGIiIgwKlasaDg7Oxu1a9c2XnnlFcsZUrkK42ymV155JU/f33+uv/76qzF48GCjTp06RtmyZY1y5coZDRo0MF5//XWrs6BOnDhhhIeHG+7u7oYko1q1apZ9L730klG9enXDbDYbdevWNd59913Lz+qPrzty5Mhb1j5o0CCrtoMHDxp9+vQxKlasaLi4uBhVq1Y1Bg8ebNy4ccPS59y5c8aYMWOMwMBAw9nZ2fD09DSaNGliTJ061bhy5YphGIaxePFio127doa3t7fh4uJi+Pn5GX379jX27t1718/0woULxogRIwxfX1/DycnJqFatmjFlyhSrGgzD9rOZJN3ykXu22KlTp4xHH33UuO+++wx3d3ejc+fOxv79+/N8Trm/e+vWrTOeeOIJo0KFCoabm5vRtWtX4+jRo3leOz4+3ujWrZvh6elpODs7G/7+/ka3bt2MTz75JM+YuWcz/fTTT0b//v2NoKAgw83NzfDw8DCaN29uLFq0KF/vF38tJsP43Zw3AACAg2HNDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA7tnr9oXk5Ojs6cOSN3d/d8X/YdAADYl2EYSk9Pl5+fn0qVuvPcyz0fZs6cOaOAgAB7lwEAAAogKSnprjeAvefDTO59bJKSklS+fHk7V4P69evr5MmTedqHDRum1157zapt7NixWrRokWJiYiyXR7+Vbt26afPmzXnaw8PDLZf+37Jli958803t2bNHycnJWrZsmbp3727V/80339Sbb74p6bdL/48cOdKyb8eOHZowYYI2btyo0qVL5/8NAwAKJC0tTQEBAfm6H909H2ZyDy2VL1+eMFMC7Nixw+qmdfv371dYWJgef/xxq5/P6tWrtXv3bvn5+cnV1fWOP7vPP/9cmZmZlu0LFy6oYcOG6t+/v9XzmjRpoqeeekqPPvqoypQpY7Vv3759io6O1po1a2QYhrp3764ePXooJCREN2/e1MSJE/Xuu+/qvvvuK6yPAgCQD/lZInLPhxmULJUrV7bafumllxQUFKQ2bdpY2k6fPq1Ro0Zp7dq16tat213H9PT0tNpesWKFypQpoz59+ljaunTpoi5dutx2jEOHDqlBgwZq3769JKlBgwY6dOiQQkJC9Morr6h169Z3vDkgAMB+CDOwm8zMTC1dulQTJkywJO+cnBw98cQT+uc//2l1h2tbLFiwQP369VPZsmXz/Zz69evryJEjOnnypAzD0JEjRxQSEqJjx45p0aJF2rlzZ4FqAQAUPU7Nht2sXr1aly9f1uDBgy1tL7/8spycnDRmzJgCjfnDDz9o//79GjZsmE3Pq1u3rqKjoxUWFqbw8HDFxMSobt26GjFihGJjY7V27VqFhISoUaNG+vbbbwtUGwCgaDAzA7tZsGCBunTpIj8/P0nSzp079cYbb2jXrl0FPo1+wYIFCgkJUfPmzW1+7ogRIzRixAjL9qJFi+Tu7q6WLVuqdu3aSkhI0KlTp9SvXz8dP35cZrO5QDUCAAoXMzOwi8TERK1fv95qBuW7775TSkqKqlatKicnJzk5OSkxMVETJ05U9erV7zrmtWvXtGLFCptnZW7l/Pnzmj59ut566y1t375dtWrVUnBwsNq1a6ebN2/qyJEjf/o1AACFg5kZ2MXChQvl5eVltcD3iSeeUMeOHa36derUSU888YSGDBly1zE//vhjZWRkaMCAAX+6vnHjxmn8+PGqUqWKEhISdPPmTcu+rKwsqzOyAAD2RZhBscvJydHChQs1aNAgOTn971ewYsWKqlixolVfZ2dn+fj4qHbt2pa2gQMHyt/fXzExMVZ9FyxYoJ49e+YZQ5KuXLmiY8eOWbaPHz+uPXv2yNPTU1WrVrXqGxcXp6NHj2rJkiWSpObNm+unn37SV199paSkJJUuXdqqHgCAfRFmUOzWr1+vkydPaujQoQV6/smTJ/Nc2vrIkSPavHmz1q1bd8vn7NixQ+3atbNsT5gwQZI0aNAgLVq0yNJ+/fp1jRo1Sh999JHlNfz9/fXWW29pyJAhMpvNWrx4sdzc3ApUOwCg8JkMwzDsXURRSktLk4eHh1JTU7loHgAADsKW728WAAMAAIdGmAEAAA6NMAMAABwaYQYAADg0wgwAAHBohBkAAODQuM7MPeyTn1PtXQKKUZ8gD3uXAAB2wcwMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA6NMAMAABwaYQYAADg0wgwAAHBodg0z1atXl8lkyvMYOXKkJMkwDEVFRcnPz09ubm5q27atDhw4YM+SAQBACWPXMJOQkKCzZ89aHnFxcZKkPn36SJJiY2M1c+ZMzZ49WwkJCfLx8VFYWJjS09PtWTYAAChB7BpmKleuLB8fH8tjzZo1CgoKUps2bWQYhmbNmqWpU6eqd+/eCgkJ0eLFi3Xt2jUtX77cnmUDAIASpMSsmcnMzNTSpUs1dOhQmUwmHT9+XMnJyQoPD7f0MZvNatOmjbZu3XrbcTIyMpSWlmb1AAAA964SE2ZWr16ty5cva/DgwZKk5ORkSZK3t7dVP29vb8u+W4mJiZGHh4flERAQUGQ1AwAA+ysxYWbBggXq0qWL/Pz8rNpNJpPVtmEYedp+b8qUKUpNTbU8kpKSiqReAABQMjjZuwBJSkxM1Pr167Vy5UpLm4+Pj6TfZmh8fX0t7SkpKXlma37PbDbLbDYXXbEAAKBEKREzMwsXLpSXl5e6detmaQsMDJSPj4/lDCfpt3U18fHxCg0NtUeZAACgBLL7zExOTo4WLlyoQYMGycnpf+WYTCaNGzdO0dHRCg4OVnBwsKKjo1WmTBlFRETYsWIAAFCS2D3MrF+/XidPntTQoUPz7Js8ebKuX7+uyMhIXbp0SS1atNC6devk7u5uh0oBAEBJZDIMw7B3EUUpLS1NHh4eSk1NVfny5e1dTrH65OdUe5eAYtQnyMPeJQBAobHl+7tErJkBAAAoKMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA6NMAMAABwaYQYAADg0wgwAAHBohBkAAODQCDMAAMChEWYAAIBDI8wAAACHRpgBAAAOjTADAAAcGmEGAAA4NMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKHZPcycPn1aAwYMUMWKFVWmTBk98MAD2rlzp2W/YRiKioqSn5+f3Nzc1LZtWx04cMCOFQMAgJLErmHm0qVLatWqlZydnfXVV1/p4MGDeu2111ShQgVLn9jYWM2cOVOzZ89WQkKCfHx8FBYWpvT0dPsVDgAASgwne774yy+/rICAAC1cuNDSVr16dct/G4ahWbNmaerUqerdu7ckafHixfL29tby5cs1fPjw4i4ZAACUMHadmfniiy/UtGlT9enTR15eXmrUqJHeffddy/7jx48rOTlZ4eHhljaz2aw2bdpo69attxwzIyNDaWlpVg8AAHDvsmuY+eWXXzR37lwFBwdr7dq1GjFihMaMGaMlS5ZIkpKTkyVJ3t7eVs/z9va27PujmJgYeXh4WB4BAQFF+yYAAIBd2TXM5OTkqHHjxoqOjlajRo00fPhwPfXUU5o7d65VP5PJZLVtGEaetlxTpkxRamqq5ZGUlFRk9QMAAPuza5jx9fVVvXr1rNrq1q2rkydPSpJ8fHwkKc8sTEpKSp7Zmlxms1nly5e3egAAgHuXXcNMq1atdPjwYau2I0eOqFq1apKkwMBA+fj4KC4uzrI/MzNT8fHxCg0NLdZaAQBAyWTXs5nGjx+v0NBQRUdHq2/fvvrhhx80f/58zZ8/X9Jvh5fGjRun6OhoBQcHKzg4WNHR0SpTpowiIiLsWToAACgh7BpmmjVrplWrVmnKlCmaPn26AgMDNWvWLD3++OOWPpMnT9b169cVGRmpS5cuqUWLFlq3bp3c3d3tWDkAACgpTIZhGPYuoiilpaXJw8NDqampf7n1M5/8nGrvElCM+gR52LsEACg0tnx/2/12BgAAAH8GYQYAADg0wgwAAHBohBkAAODQCDMAAMChEWYAAIBDI8wAAACHRpgBAAAOjTADAAAcms1hZvHixfryyy8t25MnT1aFChUUGhqqxMTEQi0OAADgbmwOM9HR0XJzc5Mkbdu2TbNnz1ZsbKwqVaqk8ePHF3qBAAAAd2LzjSaTkpJUs2ZNSdLq1av12GOP6f/+7//UqlUrtW3btrDrAwAAuCObZ2bKlSunCxcuSJLWrVunjh07SpJcXV11/fr1wq0OAADgLmyemQkLC9OwYcPUqFEjHTlyRN26dZMkHThwQNWrVy/s+gAAAO7I5pmZt99+Wy1bttS5c+f02WefqWLFipKknTt3qn///oVeIAAAwJ2YDMMw7F1EUUpLS5OHh4dSU1NVvnx5e5dTrD75OdXeJaAY9QnysHcJAFBobPn+LtB1Zr777jsNGDBAoaGhOn36tCTpgw8+0ObNmwsyHAAAQIHZHGY+++wzderUSW5ubtq1a5cyMjIkSenp6YqOji70AgEAAO7E5jAzY8YMzZs3T++++66cnZ0t7aGhodq1a1ehFgcAAHA3NoeZw4cPq3Xr1nnay5cvr8uXLxdGTQAAAPlmc5jx9fXVsWPH8rRv3rxZNWrUKJSiAAAA8svmMDN8+HCNHTtW27dvl8lk0pkzZ7Rs2TJNmjRJkZGRRVEjAADAbdl80bzJkycrNTVV7dq1040bN9S6dWuZzWZNmjRJo0aNKooaAQAAbqvA15m5du2aDh48qJycHNWrV0/lypUr7NoKBdeZwV8F15kBcC+x5fvb5pmZ1NRUZWdny9PTU02bNrW0X7x4UU5OTn+5wAAAAOzL5jUz/fr104oVK/K0f/zxx+rXr1+hFAUAAJBfNoeZ7du3q127dnna27Ztq+3btxdKUQAAAPllc5jJyMhQVlZWnvabN2/q+vXrhVIUAABAftkcZpo1a6b58+fnaZ83b56aNGlSKEUBAADkl80LgF988UV17NhRP/74ozp06CBJ+uabb5SQkKB169YVeoEAAAB3YvPMTKtWrbRt2zYFBATo448/1n/+8x/VrFlTe/fu1cMPP1wUNQIAANyWzTMzkvTAAw9o2bJlhV0LAACAzQoUZnJycnTs2DGlpKQoJyfHat+tbkIJAABQVGwOM99//70iIiKUmJioP1482GQyKTs7u9CKAwAAuBubw8yIESPUtGlTffnll/L19ZXJZCqKugAAAPLF5jBz9OhRffrpp6pZs2ZR1AMAAGATm89matGihY4dO1YoLx4VFSWTyWT18PHxsew3DENRUVHy8/OTm5ub2rZtqwMHDhTKawMAgHuDzTMzo0eP1sSJE5WcnKz69evL2dnZan+DBg1sGu/+++/X+vXrLdulS5e2/HdsbKxmzpypRYsWqVatWpoxY4bCwsJ0+PBhubu721o6AAC4B9kcZh599FFJ0tChQy1tJpNJhmEUaAGwk5OT1WxMLsMwNGvWLE2dOlW9e/eWJC1evFje3t5avny5hg8fbmvpAADgHmRzmDl+/HihFnD06FH5+fnJbDarRYsWio6OVo0aNXT8+HElJycrPDzc0tdsNqtNmzbaunXrbcNMRkaGMjIyLNtpaWmFWi8AAChZbA4z1apVK7QXb9GihZYsWaJatWrp119/1YwZMxQaGqoDBw4oOTlZkuTt7W31HG9vbyUmJt52zJiYGD3//POFViMAACjZCnTRPEk6ePCgTp48qczMTKv2v/3tb/keo0uXLpb/rl+/vlq2bKmgoCAtXrxYDz74oCTlOfU793DW7UyZMkUTJkywbKelpSkgICDfNQEAAMdic5j55Zdf1KtXL+3bt8+yVkb6X+j4MxfNK1u2rOrXr6+jR4+qZ8+ekqTk5GT5+vpa+qSkpOSZrfk9s9kss9lc4BoAAIBjsfnU7LFjxyowMFC//vqrypQpowMHDujbb79V06ZNtWnTpj9VTEZGhg4dOiRfX18FBgbKx8dHcXFxlv2ZmZmKj49XaGjon3odAABw77B5Zmbbtm3asGGDKleurFKlSqlUqVJ66KGHFBMTozFjxmj37t35HmvSpEnq0aOHqlatqpSUFM2YMUNpaWkaNGiQTCaTxo0bp+joaAUHBys4OFjR0dEqU6aMIiIibC0bAADco2wOM9nZ2SpXrpwkqVKlSjpz5oxq166tatWq6fDhwzaNderUKfXv31/nz59X5cqV9eCDD+r777+3LDKePHmyrl+/rsjISF26dEktWrTQunXruMYMAACwsDnMhISEaO/evapRo4ZatGih2NhYubi4aP78+apRo4ZNY61YseKO+00mk6KiohQVFWVrmQAA4C/C5jDz3HPP6erVq5KkGTNmqHv37nr44YdVsWLFu4YTAACAwmZzmOnUqZPlv2vUqKGDBw/q4sWLuu+++7iDNgAAKHY2n800dOhQpaenW7V5enrq2rVrVrc4AAAAKA42h5nFixfr+vXredqvX7+uJUuWFEpRAAAA+ZXvw0xpaWkyDEOGYSg9PV2urq6WfdnZ2frvf/8rLy+vIikSAADgdvIdZipUqCCTySSTyaRatWrl2W8ymbgnEgAAKHb5DjMbN26UYRhq3769PvvsM3l6elr2ubi4qFq1avLz8yuSIgEAAG4n32GmTZs2kqTjx4+ratWqnLkEAABKBJsXAB86dEhbtmyxbL/99tt64IEHFBERoUuXLhVqcQAAAHdjc5j55z//qbS0NEnSvn37NGHCBHXt2lW//PKLJkyYUOgFAgAA3InNF807fvy46tWrJ0n67LPP1KNHD0VHR2vXrl3q2rVroRcIAABwJzbPzLi4uOjatWuSpPXr1ys8PFzSbxfOy52xAQAAKC42z8w89NBDmjBhglq1aqUffvhBH330kSTpyJEjqlKlSqEXCAAAcCc2z8zMnj1bTk5O+vTTTzV37lz5+/tLkr766it17ty50AsEAAC4E5NhGIa9iyhKaWlp8vDwUGpqqsqXL2/vcorVJz+n2rsEFKM+QR72LgEACo0t3982H2aSpJycHB07dkwpKSnKycmx2te6deuCDAkAAFAgNoeZ77//XhEREUpMTNQfJ3VMJpOys7MLrTgAAIC7sTnMjBgxQk2bNtWXX34pX19frgQMAADsyuYwc/ToUX366aeqWbNmUdQDAABgE5vPZmrRooWOHTtWFLUAAADYzOaZmdGjR2vixIlKTk5W/fr15ezsbLW/QYMGhVYcAADA3dgcZh599FFJ0tChQy1tJpNJhmGwABgAABS7At2bCQAAoKSwOcxUq1atKOoAAAAokHyHmS+++CJf/f72t78VuBgAAABb5TvM9OzZ8659WDMDAACKW77DzB9vWwAAAFAS2HydGQAAgJKEMAMAABwaYQYAADg0wgwAAHBo+Qozb775pm7cuCFJOnnypAzDKNKiAAAA8itfYWbChAlKS0uTJAUGBurcuXNFWhQAAEB+5evUbD8/P3322Wfq2rWrDMPQqVOnLDM1f1S1atVCLRAAAOBO8hVmnnvuOY0ePVqjRo2SyWRSs2bN8vThRpMAAMAe8hVm/u///k/9+/dXYmKiGjRooPXr16tixYpFXRsAAMBd5ftsJnd3d4WEhGjhwoVq1aqVGjZseMtHQcXExMhkMmncuHGWNsMwFBUVJT8/P7m5ualt27Y6cOBAgV8DAADce2w+NXvQoEEym83auXOnli5dqmXLlmnXrl1/qoiEhATNnz9fDRo0sGqPjY3VzJkzNXv2bCUkJMjHx0dhYWFKT0//U68HAADuHTaHmZSUFLVv317NmjXTmDFjNGrUKDVt2lQdOnQo0FlOV65c0eOPP653331X9913n6XdMAzNmjVLU6dOVe/evRUSEqLFixfr2rVrWr58uc2vAwAA7k02h5nRo0crLS1NBw4c0MWLF3Xp0iXt379faWlpGjNmjM0FjBw5Ut26dVPHjh2t2o8fP67k5GSFh4db2sxms9q0aaOtW7fedryMjAylpaVZPQAAwL0r33fNzvX1119r/fr1qlu3rqWtXr16evvtt62CR36sWLFCu3btUkJCQp59ycnJkiRvb2+rdm9vbyUmJt52zJiYGD3//PM21QEAAByXzTMzOTk5cnZ2ztPu7OysnJycfI+TlJSksWPHaunSpXJ1db1tP5PJZLWdewr47UyZMkWpqamWR1JSUr5rAgAAjsfmMNO+fXuNHTtWZ86csbSdPn1a48ePV4cOHfI9zs6dO5WSkqImTZrIyclJTk5Oio+P15tvviknJyfLjEzuDE2ulJSUPLM1v2c2m1W+fHmrBwAAuHfZHGZmz56t9PR0Va9eXUFBQapZs6YCAwOVnp6ut956K9/jdOjQQfv27dOePXssj6ZNm+rxxx/Xnj17VKNGDfn4+CguLs7ynMzMTMXHxys0NNTWsgEAwD3K5jUzAQEB2rVrl+Li4vTTTz/JMAzVq1cvzwLeu8m9bs3vlS1bVhUrVrS0jxs3TtHR0QoODlZwcLCio6NVpkwZRURE2Fo2AAC4R9kcZnKFhYUpLCysMGvJY/Lkybp+/boiIyN16dIltWjRQuvWrZO7u3uRvi4AAHAcJsMwDHsXUZTS0tLk4eGh1NTUv9z6mU9+TrV3CShGfYI87F0CABQaW76/bV4zAwAAUJIQZgAAgEMjzAAAAIdWoDDz888/67nnnlP//v2VkpIi6bcrA3NHawAAUNxsDjPx8fGqX7++tm/frpUrV+rKlSuSpL1792ratGmFXiAAAMCd2BxmnnnmGc2YMUNxcXFycXGxtLdr107btm0r1OIAAADuxuYws2/fPvXq1StPe+XKlXXhwoVCKQoAACC/bA4zFSpU0NmzZ/O07969W/7+/oVSFAAAQH7ZHGYiIiL09NNPKzk5WSaTSTk5OdqyZYsmTZqkgQMHFkWNAAAAt2VzmHnxxRdVtWpV+fv768qVK6pXr55at26t0NBQPffcc0VRIwAAwG3ZfG8mZ2dnLVu2TNOnT9fu3buVk5OjRo0aKTg4uCjqAwAAuKMC32gyKChIQUFBhVkLAACAzWwOMxMmTLhlu8lkkqurq2rWrKlHHnlEnp6ef7o4AACAu7E5zOzevVu7du1Sdna2ateuLcMwdPToUZUuXVp16tTRnDlzNHHiRG3evFn16tUripoBAAAsbF4A/Mgjj6hjx446c+aMdu7cqV27dun06dMKCwtT//79dfr0abVu3Vrjx48vinoBAACsmAzDMGx5gr+/v+Li4vLMuhw4cEDh4eE6ffq0du3apfDwcJ0/f75Qiy2ItLQ0eXh4KDU1VeXLl7d3OcXqk59T7V0CilGfIA97lwAAhcaW72+bZ2ZSU1MtN5f8vXPnziktLU3SbxfWy8zMtHVoAAAAmxXoMNPQoUO1atUqnTp1SqdPn9aqVav05JNPqmfPnpKkH374QbVq1SrsWgEAAPKweQHwO++8o/Hjx6tfv37Kysr6bRAnJw0aNEivv/66JKlOnTp67733CrdSAACAW7B5zUyuK1eu6JdffpFhGAoKClK5cuUKu7ZCwZoZ/FWwZgbAvcSW7+8CXzSvXLlyatCgQUGfDgAAUCgKFGYSEhL0ySef6OTJk3kW+q5cubJQCgMAAMgPmxcAr1ixQq1atdLBgwe1atUq3bx5UwcPHtSGDRvk4cE0NwAAKF42h5no6Gi9/vrrWrNmjVxcXPTGG2/o0KFD6tu3r6pWrVoUNQIAANyWzWHm559/Vrdu3SRJZrNZV69elclk0vjx4zV//vxCLxAAAOBObA4znp6eSk9Pl/Tb1YD3798vSbp8+bKuXbtWuNUBAADchc0LgB9++GHFxcWpfv366tu3r8aOHasNGzYoLi5OHTp0KIoaAQAAbsvmMDN79mzduHFDkjRlyhQ5Oztr8+bN6t27t/71r38VeoEAAAB3UuCL5jkKLpqHvwoumgfgXlKkN5osXbr0LW80eeHCBZUuXdrW4QAAAP4Um8PM7SZyMjIy5OLi8qcLAgAAsEW+18y8+eabkiSTyaT33nvP6l5M2dnZ+vbbb1WnTp3CrxAAAOAO8h1mcu+IbRiG5s2bZ3VIycXFRdWrV9e8efMKv0IAAIA7yHeYOX78uCSpXbt2Wrlype67774iKwoAACC/bD41e+PGjUVRBwAAQIHYHGays7O1aNEiffPNN0pJSVFOTo7V/g0bNhRacQAAAHdj89lMY8eO1dixY5Wdna2QkBA1bNjQ6mGLuXPnqkGDBipfvrzKly+vli1b6quvvrLsNwxDUVFR8vPzk5ubm9q2basDBw7YWjIAALiH2Twzs2LFCn388cfq2rXrn37xKlWq6KWXXlLNmjUlSYsXL9Yjjzyi3bt36/7771dsbKxmzpypRYsWqVatWpoxY4bCwsJ0+PBhubu7/+nXBwAAjs/mmRkXFxdL+PizevTooa5du6pWrVqqVauWXnzxRZUrV07ff/+9DMPQrFmzNHXqVPXu3VshISFavHixrl27puXLlxfK6wMAAMdnc5iZOHGi3njjjdtePK+gsrOztWLFCl29elUtW7bU8ePHlZycrPDwcEsfs9msNm3aaOvWrYX62gAAwHHZfJhp8+bN2rhxo7766ivdf//9cnZ2ttq/cuVKm8bbt2+fWrZsqRs3bqhcuXJatWqV6tWrZwks3t7eVv29vb2VmJh42/EyMjKUkZFh2U5LS7OpHgAA4FhsDjMVKlRQr169Cq2A2rVra8+ePbp8+bI+++wzDRo0SPHx8Zb9JpPJqr9hGHnafi8mJkbPP/98odUHAABKthJ31+yOHTsqKChITz/9tIKCgrRr1y41atTIsv+RRx5RhQoVtHjx4ls+/1YzMwEBAdw1G/c87poN4F5SpHfNlqSsrCytX79e77zzjtLT0yVJZ86c0ZUrVwoynBXDMJSRkaHAwED5+PgoLi7Osi8zM1Px8fEKDQ297fPNZrPlVO/cBwAAuHfZfJgpMTFRnTt31smTJ5WRkaGwsDC5u7srNjZWN27csOn+TM8++6y6dOmigIAApaena8WKFdq0aZO+/vprmUwmjRs3TtHR0QoODlZwcLCio6NVpkwZRURE2Fo2AAC4R9kcZsaOHaumTZvqxx9/VMWKFS3tvXr10rBhw2wa69dff9UTTzyhs2fPysPDQw0aNNDXX3+tsLAwSdLkyZN1/fp1RUZG6tKlS2rRooXWrVvHNWYAAICFzWtmKlWqpC1btqh27dpyd3fXjz/+qBo1aujEiROqV6+erl27VlS1Fogtx9zuNayZ+WthzQyAe0mRrpnJyclRdnZ2nvZTp04xYwIAAIqdzWEmLCxMs2bNsmybTCZduXJF06ZNK5RbHAAAANjC5jUzr7/+utq1a6d69erpxo0bioiI0NGjR1WpUiV9+OGHRVEjAADAbdk8M+Pn56c9e/bon//8p4YPH65GjRrppZde0u7du+Xl5VUUNQIAHEBMTIyaNWsmd3d3eXl5qWfPnjp8+LBVn8GDB8tkMlk9HnzwwTuO++677+rhhx/Wfffdp/vuu08dO3bUDz/8YNUnPT1d48aNU7Vq1eTm5qbQ0FAlJCRY9Xn11Vfl7e0tb29vvf7661b7tm/friZNmtxyGQVKPptnZiTJzc1NQ4YM0ZAhQwq7HgCAg4qPj9fIkSPVrFkzZWVlaerUqQoPD9fBgwdVtmxZS7/OnTtr4cKFlm0XF5c7jrtp0yb1799foaGhcnV1VWxsrMLDw3XgwAH5+/tLkoYNG6b9+/frgw8+kJ+fn5YuXaqOHTvq4MGD8vf31759+/Tvf/9ba9askWEY6t69u8LCwhQSEqKbN29qxIgRmj9/vkqXLl00Hw6KlM1hJiYmRt7e3ho6dKhV+/vvv69z587p6aefLrTiAACO4+uvv7baXrhwoby8vLRz5061bt3a0m42m+Xj45PvcZctW2a1/e677+rTTz/VN998o4EDB+r69ev67LPP9Pnnn1teJyoqSqtXr9bcuXM1Y8YMHTp0SA0aNFD79u0lSQ0aNNChQ4cUEhKiV155Ra1bt1azZs0K+tZhZzYfZnrnnXdUp06dPO3333+/TRfMAwDc21JTf7s8hKenp1X7pk2b5OXlpVq1aumpp55SSkqKTeNeu3ZNN2/etIyblZWl7Oxsubq6WvVzc3PT5s2bJUn169fXkSNHdPLkSSUmJurIkSMKCQnRsWPHtGjRIs2YMaOgbxMlgM1hJjk5Wb6+vnnaK1eurLNnzxZKUQAAx2YYhiZMmKCHHnpIISEhlvYuXbpo2bJl2rBhg1577TUlJCSoffv2VvfUu5tnnnlG/v7+6tixoyTJ3d1dLVu21AsvvKAzZ84oOztbS5cu1fbt2y3fS3Xr1lV0dLTCwsIUHh6umJgY1a1bVyNGjFBsbKzWrl2rkJAQNWrUSN9++23hfhgocjYfZgoICNCWLVsUGBho1b5lyxb5+fkVWmEAAMc1atQo7d271zIzkuvvf/+75b9DQkLUtGlTVatWTV9++aV69+5913FjY2P14YcfatOmTVYzMR988IGGDh0qf39/lS5dWo0bN1ZERIR27dpl6TNixAiNGDHCsr1o0SJLEKpdu7YSEhJ06tQp9evXT8ePH5fZbP4zHwGKkc1hZtiwYRo3bpxu3rxpOfb4zTffaPLkyZo4cWKhFwgAcCyjR4/WF198oW+//VZVqlS5Y19fX19Vq1ZNR48eveu4r776qqKjo7V+/Xo1aNDAal9QUJDi4+N19epVpaWlydfXV3//+9/z/MM71/nz5zV9+nR9++232r59u2rVqmW5D+DNmzd15MgR1a9fP/9vGnZlc5iZPHmyLl68qMjISGVmZkqSXF1d9fTTT2vKlCmFXiAAwDEYhqHRo0dr1apV2rRp022DxO9duHBBSUlJt1y+8HuvvPKKZsyYobVr16pp06a37Ve2bFmVLVtWly5d0tq1axUbG3vLfuPGjdP48eNVpUoVJSQk6ObNm5Z9uWtw4DhsCjPZ2dnavHmznn76af3rX//SoUOH5ObmpuDgYKbjAOAvbuTIkVq+fLk+//xzubu7Kzk5WZLk4eEhNzc3XblyRVFRUXr00Ufl6+urEydO6Nlnn1WlSpXUq1cvyzgDBw6Uv7+/YmJiJP12aOlf//qXli9frurVq1vGLVeunMqVKydJWrt2rQzDUO3atXXs2DH985//VO3atW95CZG4uDgdPXpUS5YskSQ1b95cP/30k7766islJSWpdOnSql27dpF+VihcNoWZ0qVLq1OnTjp06JACAwM5jQ0AYDF37lxJUtu2ba3aFy5cqMGDB6t06dLat2+flixZosuXL8vX11ft2rXTRx99ZHVvv5MnT6pUqf+dnzJnzhxlZmbqsccesxp32rRpioqKkvTbmVNTpkzRqVOn5OnpqUcffVQvvviinJ2drZ5z/fp1jRo1Sh999JHlNfz9/fXWW29pyJAhMpvNWrx4sdzc3ArrY0ExsPmu2c2aNdNLL72kDh06FFVNhYq7ZuOvgrtmA7iXFOlds1988UVNmjRJa9as0dmzZ5WWlmb1AAAAKE42LwDu3LmzJOlvf/ubTCaTpd0wDJlMJhZNAQCAYmVzmNm4cWNR1AEAsAGHkf9aOIx8ZzaHmTZt2hRFHQAAAAVi85oZSfruu+80YMAAhYaG6vTp05J+u/riH6/0CAAAUNRsDjOfffaZOnXqJDc3N+3atctyP4309HRFR0cXeoEAAAB3YnOYmTFjhubNm6d3333X6vz90NBQq3tgAAAAFAebw8zhw4fVunXrPO3ly5fX5cuXC6MmAACAfLM5zPj6+urYsWN52jdv3qwaNWoUSlEAAAD5ZXOYGT58uMaOHavt27fLZDLpzJkzWrZsmSZNmqTIyMiiqBEAAOC2CnTX7NTUVLVr1043btxQ69atZTabNWnSJI0aNaooagQAALgtm8OM9NstDaZOnaqDBw8qJydH9erVs9y5FAAAoDjl+zDTtWvXNHLkSPn7+8vLy0vDhg1T9erV1bx5c4IMAACwm3yHmWnTpmnRokXq1q2b+vXrp7i4OP3jH/8oytoAAADuKt+HmVauXKkFCxaoX79+kqQBAwaoVatWys7OVunSpYusQAAAgDvJ98xMUlKSHn74Yct28+bN5eTkpDNnzhRJYQAAAPmR7zCTnZ0tFxcXqzYnJydlZWUVelEAAAD5le/DTIZhaPDgwTKbzZa2GzduaMSIESpbtqylbeXKlYVbIQAAwB3kO8wMGjQoT9uAAQMKtRgAAABb5TvMLFy4sCjrAAAAKBCbb2cAAABQkhBmAACAQ7NrmImJiVGzZs3k7u4uLy8v9ezZU4cPH7bqYxiGoqKi5OfnJzc3N7Vt21YHDhywU8UAAKCksWuYiY+P18iRI/X9998rLi5OWVlZCg8P19WrVy19YmNjNXPmTM2ePVsJCQny8fFRWFiY0tPT7Vg5AAAoKQp0o8nC8vXXX1ttL1y4UF5eXtq5c6dat24twzA0a9YsTZ06Vb1795YkLV68WN7e3lq+fLmGDx9uj7IBAEAJUqLWzKSmpkqSPD09JUnHjx9XcnKywsPDLX3MZrPatGmjrVu32qVGAABQsth1Zub3DMPQhAkT9NBDDykkJESSlJycLEny9va26uvt7a3ExMRbjpORkaGMjAzLdlpaWhFVDAAASoISMzMzatQo7d27Vx9++GGefSaTyWrbMIw8bbliYmLk4eFheQQEBBRJvQAAoGQoEWFm9OjR+uKLL7Rx40ZVqVLF0u7j4yPpfzM0uVJSUvLM1uSaMmWKUlNTLY+kpKSiKxwAANidXcOMYRgaNWqUVq5cqQ0bNigwMNBqf2BgoHx8fBQXF2dpy8zMVHx8vEJDQ285ptlsVvny5a0eAADg3mXXNTMjR47U8uXL9fnnn8vd3d0yA+Ph4SE3NzeZTCaNGzdO0dHRCg4OVnBwsKKjo1WmTBlFRETYs3QAAFBC2DXMzJ07V5LUtm1bq/aFCxdq8ODBkqTJkyfr+vXrioyM1KVLl9SiRQutW7dO7u7uxVwtAAAoiewaZgzDuGsfk8mkqKgoRUVFFX1BAADA4ZSIBcAAAAAFRZgBAAAOjTADAAAcGmEGAAA4NMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA6NMAMAABwaYQYAADg0wgwAAHBohBkAAODQCDMAAMChEWYAAIBDI8wAAACHRpgBAAAOjTADAAAcGmEGAAA4NMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODS7hplvv/1WPXr0kJ+fn0wmk1avXm213zAMRUVFyc/PT25ubmrbtq0OHDhgn2IBAECJZNcwc/XqVTVs2FCzZ8++5f7Y2FjNnDlTs2fPVkJCgnx8fBQWFqb09PRirhQAAJRUTvZ88S5duqhLly633GcYhmbNmqWpU6eqd+/ekqTFixfL29tby5cv1/Dhw4uzVAAAUEKV2DUzx48fV3JyssLDwy1tZrNZbdq00datW+1YGQAAKEnsOjNzJ8nJyZIkb29vq3Zvb28lJibe9nkZGRnKyMiwbKelpRVNgQAAoEQosTMzuUwmk9W2YRh52n4vJiZGHh4elkdAQEBRlwgAAOyoxIYZHx8fSf+bocmVkpKSZ7bm96ZMmaLU1FTLIykpqUjrBAAA9lViw0xgYKB8fHwUFxdnacvMzFR8fLxCQ0Nv+zyz2azy5ctbPQAAwL3Lrmtmrly5omPHjlm2jx8/rj179sjT01NVq1bVuHHjFB0dreDgYAUHBys6OlplypRRRESEHasGAAAliV3DzI4dO9SuXTvL9oQJEyRJgwYN0qJFizR58mRdv35dkZGRunTpklq0aKF169bJ3d3dXiUDAIASxmQYhmHvIopSWlqaPDw8lJqa+pc75PTJz6n2LgHFqE+Qh71LQDHi7/uv5a/4923L93eJXTMDAACQH4QZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA6NMAMAABwaYQYAADg0wgwAAHBohBkAAODQCDMAAMChEWYAAIBDI8wAAACHRpgBAAAOjTADAAAcGmEGAAA4NMIMAABwaIQZAADg0AgzAADAoRFmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgENziDAzZ84cBQYGytXVVU2aNNF3331n75IAAEAJUeLDzEcffaRx48Zp6tSp2r17tx5++GF16dJFJ0+etHdpAACgBCjxYWbmzJl68sknNWzYMNWtW1ezZs1SQECA5s6da+/SAABACVCiw0xmZqZ27typ8PBwq/bw8HBt3brVTlUBAICSxMneBdzJ+fPnlZ2dLW9vb6t2b29vJScn3/I5GRkZysjIsGynpqZKktLS0oqu0BLqWvpf7z3/laWlmexdAooRf99/LX/Fv+/c723DMO7at0SHmVwmk/UP0TCMPG25YmJi9Pzzz+dpDwgIKJLagJJisL0LAFBkBtu7ADtKT0+Xh4fHHfuU6DBTqVIllS5dOs8sTEpKSp7ZmlxTpkzRhAkTLNs5OTm6ePGiKlaseNsAhHtHWlqaAgIClJSUpPLly9u7HACFiL/vvxbDMJSeni4/P7+79i3RYcbFxUVNmjRRXFycevXqZWmPi4vTI488csvnmM1mmc1mq7YKFSoUZZkogcqXL8//7IB7FH/ffx13m5HJVaLDjCRNmDBBTzzxhJo2baqWLVtq/vz5OnnypEaMGGHv0gAAQAlQ4sPM3//+d124cEHTp0/X2bNnFRISov/+97+qVq2avUsDAAAlQIkPM5IUGRmpyMhIe5cBB2A2mzVt2rQ8hxoBOD7+vnE7JiM/5zwBAACUUCX6onkAAAB3Q5gBAAAOjTADAAAcGmEGAAA4NMIMAABwaA5xajZwO6dOndLcuXO1detWJScny2QyydvbW6GhoRoxYgT35AKAvwBOzYbD2rx5s7p06aKAgACFh4fL29tbhmEoJSVFcXFxSkpK0ldffaVWrVrZu1QARSApKUnTpk3T+++/b+9SYGeEGTisZs2a6aGHHtLrr79+y/3jx4/X5s2blZCQUMyVASgOP/74oxo3bqzs7Gx7lwI7I8zAYbm5uWnPnj2qXbv2Lff/9NNPatSoka5fv17MlQEoDF988cUd9//yyy+aOHEiYQasmYHj8vX11datW28bZrZt2yZfX99irgpAYenZs6dMJpPu9G9uk8lUjBWhpCLMwGFNmjRJI0aM0M6dOxUWFiZvb2+ZTCYlJycrLi5O7733nmbNmmXvMgEUkK+vr95++2317Nnzlvv37NmjJk2aFG9RKJEIM3BYkZGRqlixol5//XW98847lqnm0qVLq0mTJlqyZIn69u1r5yoBFFSTJk20a9eu24aZu83a4K+DNTO4J9y8eVPnz5+XJFWqVEnOzs52rgjAn/Xdd9/p6tWr6ty58y33X716VTt27FCbNm2KuTKUNIQZAADg0LgCMAAAcGiEGQAA4NAIMwAAwKERZgA4pEWLFqlChQp/ehyTyaTVq1f/6XEA2A9hBoDdDB48+Lan3QJAfhFmAACAQyPMACiRZs6cqfr166ts2bIKCAhQZGSkrly5kqff6tWrVatWLbm6uiosLExJSUlW+//zn/+oSZMmcnV1VY0aNfT8888rKyuruN4GgGJAmAFQIpUqVUpvvvmm9u/fr8WLF2vDhg2aPHmyVZ9r167pxRdf1OLFi7VlyxalpaWpX79+lv1r167VgAEDNGbMGB08eFDvvPOOFi1apBdffLG43w6AIsRF8wDYzeDBg3X58uV8LcD95JNP9I9//MNypedFixZpyJAh+v7779WiRQtJv90pvW7dutq+fbuaN2+u1q1bq0uXLpoyZYplnKVLl2ry5Mk6c+aMpN8WAK9atYq1O4AD495MAEqkjRs3Kjo6WgcPHlRaWpqysrJ048YNXb16VWXLlpUkOTk5qWnTppbn1KlTRxUqVNChQ4fUvHlz7dy5UwkJCVYzMdnZ2bpx44auXbumMmXKFPv7AlD4CDMASpzExER17dpVI0aM0AsvvCBPT09t3rxZTz75pG7evGnV12Qy5Xl+bltOTo6ef/559e7dO08fV1fXoikeQLEjzAAocXbs2KGsrCy99tprKlXqt6V9H3/8cZ5+WVlZ2rFjh5o3by5JOnz4sC5fvqw6depIkho3bqzDhw+rZs2axVc8gGJHmAFgV6mpqdqzZ49VW+XKlZWVlaW33npLPXr00JYtWzRv3rw8z3V2dtbo0aP15ptvytnZWaNGjdKDDz5oCTf//ve/1b17dwUEBKhPnz4qVaqU9u7dq3379mnGjBnF8fYAFAPOZgJgV5s2bVKjRo2sHu+//75mzpypl19+WSEhIVq2bJliYmLyPLdMmTJ6+umnFRERoZYtW8rNzU0rVqyw7O/UqZPWrFmjuLg4NWvWTA8++KBmzpypatWqFedbBFDEOJsJAAA4NGZmAACAQyPMAAAAh0aYAQAADo0wAwAAHBphBgAAODTCDAAAcGiEGQAA4NAIMwAAwKERZgAAgEMjzAAAAIdGmAEAAA6NMAMAABza/wM9H2zW8NCGLgAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "counts = df['label'].value_counts()\n",
    "percentages = counts / counts.sum() * 100\n",
    "\n",
    "ax = percentages.plot(kind='bar', color='skyblue', alpha=0.7)\n",
    "plt.xlabel('Label')\n",
    "plt.ylabel('Percentage of Instances')\n",
    "plt.title('Number of Instances of Labels')\n",
    "\n",
    "# Add percentage values on the bars\n",
    "for p in ax.patches:\n",
    "    width = p.get_width()\n",
    "    height = p.get_height()\n",
    "    x, y = p.get_xy() \n",
    "    ax.annotate(f'{height:.2f}%', (x + width/2, y + height*1.02), ha='center')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Conclusions\n",
    "- 24,47 % patients  have a label of 1\n",
    "- during feature engineering we will have to:\n",
    "    - drop columns with only null values,\n",
    "    - reduce the number of columns,\n",
    "    - potentially impute null values in some columns,\n",
    "    - potentially create new features"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}