--- a +++ b/notebooks/feature_engineering.ipynb @@ -0,0 +1,5795 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# **FEATURE ENGINEERING**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We have too many columns with values True/ NaN. We will try to group them by categories. \n", + "<br/> <br/>\n", + " **Diagnoses**\n", + "- Respiratory Disorders\n", + "- Heart and Cardiovascular Diseases\n", + "- Metabolic and Endocrine Disorders\n", + "- Neurological Disorders\n", + "- Orthopedic Injuries\n", + "- Mental Health Conditions\n", + "- Reproductive and Pregnancy-related\n", + "\n", + "**Medications**\n", + "- Pain Relievers and Analgesics\n", + "- Cardiovascular and Blood Pressure Medications\n", + "- Infection Medications\n", + "- Oral Medications\n", + "- Other Medications\n", + "\n", + "**Treatments and Care**\n", + "- Therapies and Regimes\n", + "- Diagnostic Procedures\n", + "- Surgerical Interventions\n", + "- Patient Care Management\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "from tabulate import tabulate" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>label</th>\n", + " <th>scc</th>\n", + " <th>race</th>\n", + " <th>marital</th>\n", + " <th>ethnic</th>\n", + " <th>gender</th>\n", + " <th>state</th>\n", + " <th>age</th>\n", + " <th>Pain severity - 0-10 verbal numeric rating [Score] - Reported</th>\n", + " <th>Influenza seasonal injectable preservative free</th>\n", + " <th>...</th>\n", + " <th>Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n", + " <th>Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n", + " <th>Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n", + " <th>Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection</th>\n", + " <th>SARS-CoV-2 RNA Pnl Resp NAA+probe</th>\n", + " <th>Hydroxychloroquine Sulfate 200 MG Oral Tablet</th>\n", + " <th>1 ML denosumab 60 MG/ML Prefilled Syringe</th>\n", + " <th>Fexofenadine hydrochloride 60 MG Oral Tablet</th>\n", + " <th>Leronlimab 700 MG Injection</th>\n", + " <th>Lenzilumab 200 MG IV</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>101</td>\n", + " <td>white</td>\n", + " <td>m</td>\n", + " <td>nonhispanic</td>\n", + " <td>m</td>\n", + " <td>massachusetts</td>\n", + " <td>50t70</td>\n", + " <td>abnormal</td>\n", + " <td>True</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0</td>\n", + " <td>110</td>\n", + " <td>white</td>\n", + " <td>m</td>\n", + " <td>nonhispanic</td>\n", + " <td>m</td>\n", + " <td>massachusetts</td>\n", + " <td>50t70</td>\n", + " <td>normal</td>\n", + " <td>True</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0</td>\n", + " <td>127</td>\n", + " <td>black</td>\n", + " <td>m</td>\n", + " <td>nonhispanic</td>\n", + " <td>m</td>\n", + " <td>massachusetts</td>\n", + " <td>50t70</td>\n", + " <td>abnormal</td>\n", + " <td>True</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0</td>\n", + " <td>129</td>\n", + " <td>white</td>\n", + " <td>m</td>\n", + " <td>nonhispanic</td>\n", + " <td>m</td>\n", + " <td>massachusetts</td>\n", + " <td>50t70</td>\n", + " <td>abnormal</td>\n", + " <td>True</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>1</td>\n", + " <td>69</td>\n", + " <td>white</td>\n", + " <td>m</td>\n", + " <td>nonhispanic</td>\n", + " <td>m</td>\n", + " <td>massachusetts</td>\n", + " <td>50t70</td>\n", + " <td>abnormal</td>\n", + " <td>True</td>\n", + " <td>...</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>5 rows × 783 columns</p>\n", + "</div>" + ], + "text/plain": [ + " label scc race marital ethnic gender state age \\\n", + "0 0 101 white m nonhispanic m massachusetts 50t70 \n", + "1 0 110 white m nonhispanic m massachusetts 50t70 \n", + "2 0 127 black m nonhispanic m massachusetts 50t70 \n", + "3 0 129 white m nonhispanic m massachusetts 50t70 \n", + "4 1 69 white m nonhispanic m massachusetts 50t70 \n", + "\n", + " Pain severity - 0-10 verbal numeric rating [Score] - Reported \\\n", + "0 abnormal \n", + "1 normal \n", + "2 abnormal \n", + "3 abnormal \n", + "4 abnormal \n", + "\n", + " Influenza seasonal injectable preservative free ... \\\n", + "0 True ... \n", + "1 True ... \n", + "2 True ... \n", + "3 True ... \n", + "4 True ... \n", + "\n", + " Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " SARS-CoV-2 RNA Pnl Resp NAA+probe \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Hydroxychloroquine Sulfate 200 MG Oral Tablet \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " 1 ML denosumab 60 MG/ML Prefilled Syringe \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " Fexofenadine hydrochloride 60 MG Oral Tablet Leronlimab 700 MG Injection \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " Lenzilumab 200 MG IV \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + "[5 rows x 783 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('../data/learning_data.csv')\n", + "df = df.drop('ptnum', axis=1)\n", + "df.drop(columns=['C-20565-8'], inplace=True)\n", + "df_codes = pd.read_csv('../data/codes.csv')\n", + "code_to_name = df_codes.set_index('code')['name'].to_dict()\n", + "df = df.rename(columns=code_to_name)\n", + "df2 = df.copy()\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "DataFrame after combining columns with the same name:\n", + " Facial laceration Norinyl 1+50 28 Day Pack Intubation \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "... ... ... ... \n", + "9043 NaN NaN NaN \n", + "9044 NaN NaN NaN \n", + "9045 NaN NaN NaN \n", + "9046 NaN NaN NaN \n", + "9047 NaN NaN NaN \n", + "\n", + " Amlodipine 5 MG Oral Tablet anastrozole 1 MG Oral Tablet \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 True NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + " Take blood sample Lack of access to transportation (finding) \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + " Incision of trachea (procedure) Alteplase 100 MG Injection \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN True \n", + "9047 NaN NaN \n", + "\n", + " Referral to hypertension clinic ... Camila 28 Day Pack \\\n", + "0 NaN ... NaN \n", + "1 NaN ... NaN \n", + "2 NaN ... NaN \n", + "3 NaN ... NaN \n", + "4 NaN ... NaN \n", + "... ... ... ... \n", + "9043 NaN ... NaN \n", + "9044 NaN ... NaN \n", + "9045 NaN ... NaN \n", + "9046 NaN ... NaN \n", + "9047 NaN ... NaN \n", + "\n", + " History of appendectomy remifentanil 2 MG Injection \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + " Chlorpheniramine Maleate 4 MG Oral Tablet Fetal anatomy study \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + " Bicarbonate [Moles/volume] in Arterial blood \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "... ... \n", + "9043 NaN \n", + "9044 NaN \n", + "9045 NaN \n", + "9046 NaN \n", + "9047 NaN \n", + "\n", + " Screening mammography (procedure) Sertraline 100 MG Oral Tablet \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + " Transport problems (finding) Sulfamethoxazole / Trimethoprim \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "... ... ... \n", + "9043 NaN NaN \n", + "9044 NaN NaN \n", + "9045 NaN NaN \n", + "9046 NaN NaN \n", + "9047 NaN NaN \n", + "\n", + "[9048 rows x 779 columns]\n" + ] + } + ], + "source": [ + "columns = df.columns\n", + "unique_columns = list(set(columns)) # Get unique column names\n", + "\n", + "# Create a new DataFrame with combined columns\n", + "df_combined = pd.DataFrame()\n", + "\n", + "# Iterate over the unique columns to combine and merge appropriately\n", + "for col in unique_columns:\n", + " if list(columns).count(col) > 1: # If the column name is duplicated\n", + " # Find all columns with this name and combine them, e.g., with first non-null\n", + " combined_series = df[[col]].bfill(axis=1).iloc[:, 0] # Backfill to combine\n", + " df_combined[col] = combined_series\n", + " else:\n", + " df_combined[col] = df[col] # If it's unique, just copy the column\n", + "\n", + "print(\"\\nDataFrame after combining columns with the same name:\")\n", + "print(df_combined)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df_combined.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating new features" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "respiratory_disorders_columns = [\n", + " 'Acute bacterial sinusitis (disorder)',\n", + " 'Acute bronchitis (disorder)',\n", + " 'Acute respiratory distress syndrome (disorder)',\n", + " 'Acute viral pharyngitis (disorder)',\n", + " 'Asthma',\n", + " 'Asthma screening',\n", + " 'Bacterial infectious disease (disorder)',\n", + " 'COVID-19',\n", + " 'Chronic obstructive bronchitis (disorder)',\n", + " 'Cough (finding)',\n", + " 'Dyspnea (finding)',\n", + " 'Fever (finding)',\n", + " 'Hemoptysis (finding)',\n", + " 'Hypoxemia (disorder)',\n", + " 'Measurement of respiratory function (procedure)',\n", + " 'NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler',\n", + " 'Nasal congestion (finding)',\n", + " 'Nasal sinus endoscopy (procedure)',\n", + " 'Otitis media',\n", + " 'Oxygen Therapy',\n", + " 'Pneumococcal conjugate PCV 13',\n", + " 'Pneumonia (disorder)',\n", + " 'Pulmonary emphysema (disorder)',\n", + " 'Respiratory distress (finding)',\n", + " 'Sore throat symptom (finding)',\n", + " 'Sputum examination (procedure)',\n", + " 'Sputum finding (finding)',\n", + " 'Streptococcal sore throat (disorder)',\n", + " 'Streptococcus pneumoniae group B antigen test',\n", + " 'Throat culture (procedure)',\n", + " 'Total knee replacement',\n", + " 'Transfer to stepdown unit (procedure)',\n", + " 'Transplant of lung (procedure)',\n", + " 'Viral sinusitis (disorder)',\n", + " 'House dust mite (organism)',\n", + " 'Mold (organism)',\n", + " 'Grass pollen (substance)',\n", + " 'Tree pollen (substance)',\n", + " 'Animal dander (substance)',\n", + " 'Wheezing (finding)'\n", + "]\n", + "\n", + "df2['Respiratory Disorders'] = df2[respiratory_disorders_columns].sum(axis=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "heart_and_cardiovascular_diseases_columns = [\n", + " 'Acute deep venous thrombosis (disorder)',\n", + " 'Acute pulmonary embolism (disorder)',\n", + " 'Atrial Fibrillation',\n", + " 'Blindness due to type 2 diabetes mellitus (disorder)',\n", + " 'Chronic congestive heart failure (disorder)',\n", + " 'Coronary Heart Disease',\n", + " 'Diabetes',\n", + " 'Diabetic renal disease (disorder)',\n", + " 'Diabetic retinopathy associated with type II diabetes mellitus (disorder)',\n", + " 'Hyperglycemia (disorder)',\n", + " 'Hypertension',\n", + " 'Hyperlipidemia',\n", + " 'Hypertriglyceridemia (disorder)',\n", + " 'Myocardial Infarction',\n", + " 'Stroke',\n", + " 'Heart failure (disorder)'\n", + "]\n", + "\n", + "\n", + "df2['Heart and Cardiovascular Diseases'] = df2[heart_and_cardiovascular_diseases_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "metabolic_and_endocrine_disorders_columns = [\n", + " 'Alcoholism',\n", + " 'Anemia (disorder)',\n", + " 'Body mass index 30+ - obesity (finding)',\n", + " 'Chronic kidney disease stage 1 (disorder)',\n", + " 'Chronic kidney disease stage 2 (disorder)',\n", + " 'Chronic kidney disease stage 3 (disorder)',\n", + " 'Fatigue (finding)',\n", + " 'Gout',\n", + " 'Hyperlipidemia',\n", + " 'Hypertension',\n", + " 'Hypertriglyceridemia (disorder)',\n", + " 'Hypoxemia (disorder)',\n", + " 'Idiopathic atrophic hypothyroidism',\n", + " 'Loss of taste (finding)',\n", + " 'Macular edema and retinopathy due to type 2 diabetes mellitus (disorder)',\n", + " 'Metabolic syndrome X (disorder)',\n", + " 'Microalbuminuria due to type 2 diabetes mellitus (disorder)',\n", + " 'Neuropathy due to type 2 diabetes mellitus (disorder)',\n", + " 'Nonproliferative diabetic retinopathy due to type 2 diabetes mellitus (disorder)',\n", + " 'Osteoarthritis of hip',\n", + " 'Osteoarthritis of knee',\n", + " 'Osteoporosis (disorder)',\n", + " 'Pathological fracture due to osteoporosis (disorder)',\n", + " 'Prediabetes',\n", + " 'Proteinuria due to type 2 diabetes mellitus (disorder)',\n", + " 'Protracted diarrhea',\n", + " 'Rheumatoid arthritis',\n", + " 'Secondary malignant neoplasm of colon',\n", + " 'Stress (finding)',\n", + " 'Unhealthy alcohol drinking behavior (finding)',\n", + " 'Whiplash injury to neck',\n", + " 'Proliferative diabetic retinopathy due to type II diabetes mellitus (disorder)'\n", + "]\n", + "\n", + "df2['Metabolic and Endocrine Disorders'] = df2[metabolic_and_endocrine_disorders_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "neurological_disorders_columns = [\n", + " 'Alzheimer\\'s disease (disorder)',\n", + " 'Brain damage - traumatic',\n", + " 'Chronic intractable migraine without aura',\n", + " 'Chronic low back pain (finding)',\n", + " 'Chronic neck pain (finding)',\n", + " 'Chronic pain',\n", + " 'Chronic paralysis due to lesion of spinal cord',\n", + " 'Chronic sinusitis (disorder)',\n", + " 'Epilepsy',\n", + " 'Familial Alzheimer\\'s disease of early onset (disorder)',\n", + " 'Fibromyalgia (disorder)',\n", + " 'Lupus erythematosus',\n", + " 'Male infertility due to cystic fibrosis (disorder)',\n", + " 'Malignant neoplasm of breast (disorder)',\n", + " 'Malignant tumor of colon',\n", + " 'Seizure disorder',\n", + " 'Sepsis (disorder)',\n", + " 'Sepsis caused by virus (disorder)',\n", + " 'Septic shock (disorder)',\n", + " 'Shock (disorder)',\n", + " 'Sinusitis (disorder)',\n", + " 'Streptococcal sore throat (disorder)',\n", + " 'Suspected COVID-19',\n", + " 'Transformed migraine (disorder)',\n", + " 'Victim of intimate partner abuse (finding)',\n", + " 'Viral sinusitis (disorder)',\n", + " 'Seizure Count Cerebral Cortex Electroencephalogram (EEG)',\n", + " 'Headache (finding)',\n", + " 'Primary fibromyalgia syndrome'\n", + "]\n", + "\n", + "\n", + "df2['Neurological Disorders'] = df2[neurological_disorders_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "orthopedic_injuries_columns = [\n", + " 'Admission to orthopedic department',\n", + " 'Ankle X-ray',\n", + " 'Bone immobilization',\n", + " 'Brain damage - traumatic',\n", + " 'Bullet wound',\n", + " 'Burn injury(morphologic abnormality)',\n", + " 'Chronic kidney disease stage 1 (disorder)',\n", + " 'Clavicle X-ray',\n", + " 'Closed fracture of hip',\n", + " 'Concussion injury of brain',\n", + " 'Concussion with loss of consciousness',\n", + " 'Concussion with no loss of consciousness',\n", + " 'Facial laceration',\n", + " 'Fracture of ankle',\n", + " 'Fracture of clavicle',\n", + " 'Fracture of forearm',\n", + " 'Fracture of rib',\n", + " 'Fracture of the vertebral column with spinal cord injury',\n", + " 'Fracture of vertebral column without spinal cord injury',\n", + " 'Fracture subluxation of wrist',\n", + " 'History of amputation of foot (situation)',\n", + " 'History of lower limb amputation (situation)',\n", + " 'Hypertriglyceridemia (disorder)',\n", + " 'Impacted molars',\n", + " 'Injury of anterior cruciate ligament',\n", + " 'Injury of heart (disorder)',\n", + " 'Injury of kidney (disorder)',\n", + " 'Injury of medial collateral ligament of knee',\n", + " 'Injury of tendon of the rotator cuff of shoulder',\n", + " 'Joint pain (finding)',\n", + " 'Laceration of foot',\n", + " 'Laceration of hand',\n", + " 'Laceration of thigh',\n", + " 'Localized primary osteoarthritis of the hand',\n", + " 'Muscle pain (finding)',\n", + " 'Rupture of appendix',\n", + " 'Rupture of patellar tendon',\n", + " 'Second degree burn',\n", + " 'Sprain of ankle',\n", + " 'Sprain of wrist',\n", + " 'Surgical manipulation of joint of knee',\n", + " 'Surgical manipulation of shoulder joint',\n", + " 'Tear of meniscus of knee',\n", + " 'Total knee replacement',\n", + " 'Whiplash injury to neck',\n", + " 'Third degree burn'\n", + "]\n", + "\n", + "df2[\"Orthopedic Injuries\"] = df2[orthopedic_injuries_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "mental_health_columns = [\n", + " 'Alcoholism',\n", + " 'At risk for suicide (finding)',\n", + " 'Brief general examination (procedure)',\n", + " 'Care regimes assessment (procedure)',\n", + " 'Comprehensive interview and evaluation (procedure)',\n", + " 'Concussion with no loss of consciousness',\n", + " 'Depression screening (procedure)',\n", + " 'Depression screening using Patient Health Questionnaire Nine Item score (procedure)',\n", + " 'Depression screening using Patient Health Questionnaire Two-Item score (procedure)',\n", + " 'Died in hospice (finding)',\n", + " 'Drug overdose',\n", + " 'Evaluation of psychiatric state of patient',\n", + " 'Has a criminal record (finding)',\n", + " 'Homeless (finding)',\n", + " 'Limited social contact (finding)',\n", + " 'Mental health Outpatient Note',\n", + " 'Mental health Telehealth Note',\n", + " 'Mental health screening (procedure)',\n", + " 'Misuses drugs (finding)',\n", + " 'Nausea (finding)',\n", + " 'Not in labor force (finding)',\n", + " 'Part-time employment (finding)',\n", + " 'Passive conjunctival congestion (finding)',\n", + " 'Posttraumatic stress disorder',\n", + " 'Psychiatric follow-up',\n", + " 'Refugee (person)',\n", + " 'Reports of violence in the environment (finding)',\n", + " 'Severe anxiety (panic) (finding',\n", + " 'Social isolation (finding)',\n", + " 'Stress (finding)',\n", + " 'Suicide risk assessment (procedure)',\n", + " 'Victim of intimate partner abuse (finding)',\n", + " 'Attempted suicide - cut/stab',\n", + " 'Attempted suicide - suffocation',\n", + " 'Opioid abuse (disorder)',\n", + " 'Assessment of anxiety (procedure)'\n", + "]\n", + "\n", + "df2['Mental Health'] = df2[mental_health_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "reproductive_and_pregancy_columns = [\n", + " '1 ML medroxyPROGESTERone acetate 150 MG/ML Injection',\n", + " '168 HR Ethinyl Estradiol 0.00146 MG/HR / norelgestromin 0.00625 MG/HR Transdermal System',\n", + " 'Antenatal RhD antibody screening',\n", + " 'Antepartum eclampsia',\n", + " 'Augmentation of labor',\n", + " 'Auscultation of the fetal heart',\n", + " 'Bilateral tubal ligation',\n", + " 'Blighted ovum',\n", + " 'Camila 28 Day Pack',\n", + " 'Counseling for termination of pregnancy',\n", + " 'Cytopathology procedure preparation of smear genital source',\n", + " 'Episiotomy',\n", + " 'Errin 28 Day Pack',\n", + " 'Estrostep Fe 28 Day Pack',\n", + " 'Etonogestrel 68 MG Drug Implant',\n", + " 'Evaluation of uterine fundal height',\n", + " 'Excision of fallopian tube and surgical removal of ectopic pregnancy',\n", + " 'Fetal anatomy study',\n", + " 'Fetus with unknown complication',\n", + " 'Gonorrhea infection test',\n", + " 'Hyperlipidemia',\n", + " 'Induced termination of pregnancy',\n", + " 'Insertion of intrauterine contraceptive device',\n", + " 'Insertion of subcutaneous contraceptive',\n", + " 'Instrumental delivery',\n", + " 'Jolivette 28 Day Pack',\n", + " 'Kyleena 19.5 MG Intrauterine System',\n", + " 'Leronlimab 700 MG Injection',\n", + " 'Levonorgestrel 0.00354 MG/HR Drug Implant',\n", + " 'Levora 0.15/30 28 Day Pack',\n", + " 'Liletta 52 MG Intrauterine System',\n", + " 'Medical induction of labor',\n", + " 'Mestranol / Norethynodrel [Enovid]',\n", + " 'Methotrexate injection into tubal pregnancy',\n", + " 'Mirena 52 MG Intrauterine System',\n", + " 'Miscarriage in first trimester',\n", + " 'Natazia 28 Day Pack',\n", + " 'Norinyl 1+50 28 Day Pack',\n", + " 'NuvaRing 0.12/0.015 MG per 24HR 21 Day Vaginal Ring',\n", + " 'Ortho Tri-Cyclen 28 Day Pack',\n", + " 'Preeclampsia',\n", + " 'Pregnancy termination care',\n", + " 'Premature birth of newborn',\n", + " 'Removal of intrauterine device',\n", + " 'Removal of subcutaneous contraceptive',\n", + " 'Replacement of contraceptive intrauterine device',\n", + " 'RhD passive immunization',\n", + " 'Screening for chromosomal aneuploidy in prenatal amniotic fluid',\n", + " 'Spontaneous breech delivery',\n", + " 'Standard pregnancy test',\n", + " 'Tubal pregnancy',\n", + " 'Ultrasonography of abdomen right upper quadrant and epigastrium',\n", + " 'Ultrasonography of bilateral breasts (procedure)',\n", + " 'Ultrasound scan for fetal viability',\n", + " 'Vaccination for diphtheria pertussis and tetanus',\n", + " 'Vasectomy' \n", + "]\n", + "\n", + "df2['Reproductive and Pregnancy'] = df2[reproductive_and_pregancy_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "pain_relievers_and_analesics_columns = [\n", + " '10 ML Alfentanil 0.5 MG/ML Injection',\n", + " '10 ML Fentanyl 0.05 MG/ML Injection',\n", + " '12 HR Hydrocodone Bitartrate 10 MG Extended Release Oral Capsule',\n", + " '5 ML SUFentanil 0.05 MG/ML Injection',\n", + " '72 HR Fentanyl 0.025 MG/HR Transdermal System',\n", + " 'Abuse-Deterrent 12 HR Oxycodone Hydrochloride 10 MG Extended Release Oral Tablet [Oxycontin]',\n", + " 'Abuse-Deterrent 12 HR Oxycodone Hydrochloride 15 MG Extended Release Oral Tablet',\n", + " 'Acetaminophen 325 MG / oxyCODONE Hydrochloride 2.5 MG Oral Tablet',\n", + " 'Acetaminophen 325 MG / oxyCODONE Hydrochloride 5 MG Oral Tablet',\n", + " 'Acetaminophen 500 MG Oral Tablet',\n", + " 'Acetaminophen 300 MG / Hydrocodone Bitartrate 5 MG Oral Tablet',\n", + " 'Acetaminophen 325 MG / HYDROcodone Bitartrate 7.5 MG Oral Tablet',\n", + " 'Acetaminophen 325 MG / Oxycodone Hydrochloride 10 MG Oral Tablet [Percocet]',\n", + " 'Acetaminophen 325 MG Oral Tablet',\n", + " 'Acetaminophen/Hydrocodone',\n", + " 'Aspirin',\n", + " 'Aspirin 81 MG Oral Tablet',\n", + " 'Carbamazepine[Tegretol]',\n", + " 'Chlorpheniramine Maleate 4 MG Oral Tablet',\n", + " 'Clopidogrel 75 MG Oral Tablet',\n", + " 'Colchicine 0.6 MG Oral Tablet',\n", + " 'Cyclophosphamide 1000 MG Injection',\n", + " 'Diazepam 5 MG Oral Tablet',\n", + " 'Diazepam 5 MG/ML Injectable Solution',\n", + " 'Doxycycline Monohydrate 100 MG Oral Tablet',\n", + " 'Ibuprofen',\n", + " 'Ibuprofen 200 MG Oral Tablet',\n", + " 'Ibuprofen 400 MG Oral Tablet [Ibu]',\n", + " 'Lorazepam 2 MG/ML Injectable Solution',\n", + " 'Meperidine Hydrochloride 50 MG Oral Tablet',\n", + " 'Muscle pain (finding)',\n", + " 'Naproxen 500 MG Oral Tablet',\n", + " 'Naproxen sodium 220 MG Oral Tablet',\n", + " 'Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray',\n", + " 'Phenazopyridine hydrochloride 100 MG Oral Tablet',\n", + " 'Sulfamethoxazole / Trimethoprim',\n", + " 'buprenorphine 2 MG / naloxone 0.5 MG Sublingual Tablet',\n", + " 'diphenhydrAMINE Hydrochloride 25 MG Oral Tablet',\n", + " 'duloxetine 20 MG Delayed Release Oral Capsule',\n", + " 'methadone hydrochloride 10 MG Oral Tablet',\n", + " 'tramadol hydrochloride 50 MG Oral Tablet',\n", + " 'clonazePAM 0.25 MG Oral Tablet',\n", + " 'Midazolam 1 MG/ML Injectable Solution',\n", + " 'Rocuronium bromide 10 MG/ML Injectable Solution',\n", + " 'Epidural anesthesia',\n", + "]\n", + "\n", + "df2['Pain Relievers and Analesics'] = df2[pain_relievers_and_analesics_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "cardiovascular_and_blood_pressure_medications_columns = [\n", + " '0.3 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe',\n", + " '0.4 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe',\n", + " '1 ML Enoxaparin sodium 150 MG/ML Prefilled Syringe',\n", + " '1 ML heparin sodium porcine 5000 UNT/ML Injection',\n", + " '3 ML Amiodarone hydrocholoride 50 MG/ML Prefilled Syringe',\n", + " '4 ML norepinephrine 1 MG/ML Injection',\n", + " 'Acute deep venous thrombosis (disorder)',\n", + " 'Acute pulmonary embolism (disorder)',\n", + " 'Alteplase 100 MG Injection',\n", + " 'Assessment using New York Heart Association Classification (procedure)',\n", + " 'Atorvastatin 80 MG Oral Tablet',\n", + " 'Atropine Sulfate 1 MG/ML Injectable Solution',\n", + " 'Captopril 25 MG Oral Tablet',\n", + " 'Cardiac Arrest',\n", + " 'Cardiovascular stress testing (procedure)',\n", + " 'Catheter ablation of tissue of heart',\n", + " 'Coronary artery bypass grafting',\n", + " 'Digoxin 0.125 MG Oral Tablet',\n", + " 'Echocardiography (procedure)',\n", + " 'Electrical cardioversion',\n", + " 'Electrocardiographic procedure',\n", + " 'Furosemide 40 MG Oral Tablet',\n", + " 'History of myocardial infarction (situation)',\n", + " 'Hydrochlorothiazide 25 MG Oral Tablet',\n", + " 'Implantation of left ventricular assist device (procedure)',\n", + " 'Insertion of biventricular implantable cardioverter defibrillator',\n", + " 'Lisinopril',\n", + " 'Myocardial Infarction',\n", + " 'Nitrofurantoin 5 MG/ML Oral Suspension',\n", + " 'Peripheral blood smear interpretation',\n", + " 'Referral to hypertension clinic',\n", + " 'Shock (disorder)',\n", + " 'Stroke',\n", + " 'Transplantation of heart (procedure)',\n", + " 'Verapamil Hydrochloride 40 MG',\n", + " 'Warfarin Sodium 5 MG Oral Tablet',\n", + " 'carvedilol 25 MG Oral Tablet',\n", + " 'lisinopril 10 MG Oral Tablet',\n", + " 'lisinopril 20 MG Oral Tablet',\n", + " 'losartan potassium 25 MG Oral Tablet',\n", + " 'losartan potassium 50 MG Oral Tablet',\n", + " 'sacubitril 97 MG / valsartan 103 MG Oral Tablet',\n", + " 'remifentanil 2 MG Injection',\n", + " 'pregabalin 100 MG Oral Capsule'\n", + "]\n", + "\n", + "df2['Cardiovascular and Blood Pressure Medications'] = df2[cardiovascular_and_blood_pressure_medications_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "injection_medications_columns = [\n", + " '10 ML Doxorubicin Hydrochloride 2 MG/ML Injection',\n", + " '10 ML oxaliplatin 5 MG/ML Injection',\n", + " '1 ML DOCEtaxel 20 MG/ML Injection',\n", + " '1 ML Epinephrine 1 MG/ML Injection',\n", + " '1 ML Morphine Sulfate 5 MG/ML Injection',\n", + " '1 ML Vasopressin (USP) 20 UNT/ML Injection',\n", + " '10 ML Fluorouracil 50 MG/ML Injection',\n", + " '10 ML Pamidronate Disodium 3 MG/ML Injection',\n", + " '100 ML Epirubicin Hydrochloride 2 MG/ML Injection',\n", + " '100 ML Propofol 10 MG/ML Injection',\n", + " '100 ML zoledronic acid 0.04 MG/ML Injection',\n", + " '150 ML vancomycin 5 MG/ML Injection',\n", + " '2 ML Ondansetron 2 MG/ML Injection',\n", + " '20 ML tocilizumab 20 MG/ML Injection',\n", + " '5 ML hyaluronidase-oysk 2000 UNT/ML / trastuzumab 120 MG/ML Injection',\n", + " 'pneumococcal polysaccharide vaccine 23 valent',\n", + " 'remdesivir 100 MG Injection',\n", + " 'zoster',\n", + " 'Aztreonam 2000 MG Injection',\n", + " 'cefdinir', \n", + " 'Cefdinir',\n", + " 'doxycycline hyclate 100 MG',\n", + " 'Ampicillin 100 MG/ML Injectable Solution',\n", + " 'Penicillin G 375 MG/ML Injectable Solution',\n", + " 'Penicillin V',\n", + " 'Paclitaxel 100 MG Injection',\n", + " 'Piperacillin 4000 MG / tazobactam 500 MG Injection',\n", + " 'Leucovorin 100 MG Injection',\n", + " 'Influenza seasonal injectable preservative free',\n", + " 'Syphilis infection test',\n", + " 'Skin test for tuberculosis',\n", + " 'Urine culture'\n", + "]\n", + "\n", + "\n", + "df2['Injection Medications'] = df2[injection_medications_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "oral_medications_columns = [\n", + " '12 HR Cefaclor 500 MG Extended Release Oral Tablet',\n", + " '24 HR Donepezil hydrochloride 10 MG / Memantine hydrochloride 28 MG Extended Release Oral Capsule',\n", + " '24 HR Metformin hydrochloride 500 MG Extended Release Oral Tablet',\n", + " 'Acetaminophen 300 MG / Codeine Phosphate 15 MG Oral Tablet',\n", + " 'Acetaminophen 325 MG Oral Tablet [Tylenol]',\n", + " 'Alendronic acid 10 MG Oral Tablet',\n", + " 'Allopurinol 100 MG Oral Tablet',\n", + " 'Amlodipine 5 MG Oral Tablet',\n", + " 'Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet',\n", + " 'Amoxicillin 250 MG Oral Capsule',\n", + " 'Aspirin 81 MG Oral Tablet',\n", + " 'Astemizole 10 MG Oral Tablet',\n", + " 'Atorvastatin 80 MG Oral Tablet',\n", + " 'Azithromycin 250 MG Oral Tablet',\n", + " 'Azithromycin 250mg',\n", + " 'Cefuroxime 250 MG Oral Tablet',\n", + " 'Chlorpheniramine Maleate 4 MG Oral Tablet',\n", + " 'Clopidogrel 75 MG Oral Tablet',\n", + " 'Colchicine 0.6 MG Oral Tablet',\n", + " 'Diazepam 5 MG Oral Tablet',\n", + " 'Digoxin 0.125 MG Oral Tablet',\n", + " 'Donepezil hydrochloride 10 MG Oral Tablet',\n", + " 'Donepezil hydrochloride 23 MG Oral Tablet',\n", + " 'Doxycycline Monohydrate 100 MG Oral Tablet',\n", + " 'Fexofenadine hydrochloride 60 MG Oral Tablet',\n", + " 'Furosemide 40 MG Oral Tablet',\n", + " 'Galantamine 4 MG Oral Tablet',\n", + " 'Hydrochlorothiazide 25 MG Oral Tablet',\n", + " 'Levothyroxine Sodium 0.075 MG Oral Tablet',\n", + " 'Loratadine 10 MG Oral Tablet',\n", + " 'Methotrexate 2.5 MG Oral Tablet',\n", + " 'Milnacipran hydrochloride 100 MG Oral Tablet',\n", + " 'Nitrofurantoin 5 MG/ML Oral Suspension',\n", + " 'Penicillin V Potassium 500 MG Oral Tablet',\n", + " 'Phenazopyridine hydrochloride 100 MG Oral Tablet',\n", + " 'Sertraline 100 MG Oral Tablet',\n", + " 'Simvastatin 10 MG Oral Tablet',\n", + " 'Simvastatin 20 MG Oral Tablet',\n", + " 'Tacrine 10 MG Oral Capsule',\n", + " 'Tamoxifen 10 MG Oral Tablet',\n", + " 'Terfenadine 60 MG Oral Tablet',\n", + " 'Verapamil Hydrochloride 40 MG',\n", + " 'Verzenio 100 MG Oral Tablet',\n", + " 'Warfarin Sodium 5 MG Oral Tablet',\n", + " 'palbociclib 100 MG Oral Capsule',\n", + " 'predniSONE 20 MG Oral Tablet',\n", + " 'ribociclib 200 MG Oral Tablet',\n", + " 'neratinib 40 MG Oral Tablet',\n", + " 'Hydroxychloroquine Sulfate 200 MG Oral Tablet',\n", + " 'chloroquine phosphate 500 MG Oral Tablet',\n", + " 'Naltrexone hydrochloride 50 MG Oral Tablet',\n", + " 'lapatinib 250 MG Oral Tablet',\n", + " 'cetirizine hydrochloride 10 MG Oral Tablet',\n", + " 'cycloSPORINE modified 100 MG Oral Capsule',\n", + " 'letrozole 2.5 MG Oral Tablet',\n", + " 'exemestane 25 MG Oral Tablet',\n", + " 'ferrous sulfate 325 MG Oral Tablet'\n", + " \n", + "]\n", + "\n", + "df2['Oral Medications'] = df2[oral_medications_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "other_medications_columns = [\n", + " '0.25 ML Leuprolide Acetate 30 MG/ML Prefilled Syringe',\n", + " '1 ML Epoetin Alfa 4000 UNT/ML Injection [Epogen]',\n", + " '1 ML denosumab 60 MG/ML Prefilled Syringe',\n", + " '10 ML Furosemide 10 MG/ML Injection',\n", + " '3 ML liraglutide 6 MG/ML Pen Injector',\n", + " '5 ML fulvestrant 50 MG/ML Prefilled Syringe',\n", + " '12 HR Cefaclor 500 MG Extended Release Oral Tablet',\n", + " '120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler',\n", + " '60 ACTUAT Fluticasone propionate 0.25 MG/ACTUAT / salmeterol 0.05 MG/ACTUAT Dry Powder Inhaler',\n", + " 'Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution',\n", + " 'Chlamydia antigen test',\n", + " 'Hepatitis B Surface Antigen Measurement',\n", + " 'Hepatitis C antibody test',\n", + " 'Human immunodeficiency virus antigen test',\n", + " 'Insulin Lispro 100 UNT/ML Injectable Solution [Humalog]',\n", + " 'Jolivette 28 Day Pack',\n", + " 'Measurement of Varicella-zoster virus antibody',\n", + " 'Memantine hydrochloride 2 MG/ML Oral Solution',\n", + " 'NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler',\n", + " 'NDA020800 0.3 ML Epinephrine 1 MG/ML Auto-Injector',\n", + " 'NITROFURANTOIN MACROCRYSTALS 50 MG Oral Capsule',\n", + " 'Ortho Tri-Cyclen 28 Day Pack',\n", + " 'Pneumococcal conjugate PCV 13',\n", + " 'Pulmozyme (Dornase Alfa)',\n", + " 'SARS-COV-2 (COVID-19) vaccine mRNA spike protein LNP preservative free 100 mcg/0.5mL dose',\n", + " 'SARS-COV-2 (COVID-19) vaccine mRNA spike protein LNP preservative free 30 mcg/0.3mL dose',\n", + " 'SARS-COV-2 (COVID-19) vaccine vector non-replicating recombinant spike protein-Ad26 preservative free 0.5 mL',\n", + " 'Seasonique 91 Day Pack',\n", + " 'Td (adult) preservative free',\n", + " 'Trinessa 28 Day Pack',\n", + " 'Vaccination for diphtheria pertussis and tetanus',\n", + " 'Vitamin B 12 5 MG/ML Injectable Solution',\n", + " 'Vomiting symptom (finding)',\n", + " 'Yaz 28 Day Pack',\n", + " 'ado-trastuzumab emtansine 100 MG Injection',\n", + " 'albuterol 5 MG/ML Inhalation Solution',\n", + " 'amLODIPine 2.5 MG Oral Tablet',\n", + " 'anastrozole 1 MG Oral Tablet',\n", + " 'baricitinib 2 MG Oral Tablet',\n", + " 'buprenorphine 2 MG / naloxone 0.5 MG Sublingual Tablet',\n", + " 'carvedilol 25 MG Oral Tablet',\n", + " 'sevoflurane 1000 MG/ML Inhalant Solution',\n", + " 'Latex (substance)',\n", + " 'desflurane 1000 MG/ML Inhalation Solution',\n", + " 'Isoflurane 999 MG/ML Inhalant Solution',\n", + " 'Hydrocortisone 10 MG/ML Topical Cream',\n", + " 'Lenzilumab 200 MG IV',\n", + " 'remifentanil 2 MG Injection',\n", + " 'Carboplatin 10 MG/ML Injectable Solution',\n", + " 'chloroquine phosphate 500 MG Oral Tablet',\n", + " 'Naltrexone hydrochloride 50 MG Oral Tablet',\n", + " 'lapatinib 250 MG Oral Tablet',\n", + " 'Sodium Chloride 9 MG/ML Injectable Solution',\n", + " 'insulin human isophane 70 UNT/ML / Regular Insulin Human 30 UNT/ML Injectable Suspension [Humulin]'\n", + "]\n", + "\n", + "\n", + "df2['Other Medications'] = df2[other_medications_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "therapies_and_regimes_columns = [\n", + " '24hr nicotine transdermal patch',\n", + " 'Assessment of health and social care needs (procedure)',\n", + " 'Assessment of substance use (procedure)',\n", + " 'Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)',\n", + " 'Assessment using Morse Fall Scale (procedure)',\n", + " 'Cognitive and behavioral therapy (regime/therapy)',\n", + " 'Combined chemotherapy and radiation therapy (procedure)',\n", + " 'Comprehensive interview and evaluation (procedure)',\n", + " 'Construction of diverting colostomy',\n", + " 'Controlled ventilation procedure and therapy initiation and management (procedure)',\n", + " 'Coordination of care plan (procedure)',\n", + " 'Home health aide service (regime/therapy)',\n", + " 'Hospice care (regime/therapy)',\n", + " 'Interstitial brachytherapy (procedure)',\n", + " 'Intracavitary brachytherapy (procedure)',\n", + " 'Monitoring of patient (regime/therapy)',\n", + " 'Movement therapy (regime/therapy)',\n", + " 'Nursing care/supplementary surveillance (regime/therapy)',\n", + " 'Occupational therapy (regime/therapy)',\n", + " 'Physical examination',\n", + " 'Physical therapy procedure (regime/therapy)',\n", + " 'Professional / ancillary services care (regime/therapy)',\n", + " 'Psychosocial care (regime/therapy)',\n", + " 'Pulmonary rehabilitation (regime/therapy)',\n", + " 'Radiation oncology AND/OR radiotherapy (procedure)',\n", + " 'Radiation therapy care (regime/therapy)',\n", + " 'Referral to home health care service (procedure)',\n", + " 'Referral to hypertension clinic',\n", + " 'Social case work (regime/therapy)',\n", + " 'Speech and language therapy regime (regime/therapy',\n", + " 'Subcutaneous immunotherapy',\n", + " 'Teleradiotherapy procedure (procedure)',\n", + " 'Transplant of lung (procedure)',\n", + " 'Transplantation of heart (procedure)',\n", + " 'Weaning from mechanically assisted ventilation (procedure)',\n", + " 'Microbial culture (procedure)',\n", + " 'Chemotherapy (procedure)'\n", + "]\n", + "\n", + "\n", + "df2['Therapies and Regimes'] = df2[therapies_and_regimes_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "diagnostic_procedures_columns = [\n", + " 'Biopsy of breast (procedure)',\n", + " 'Biopsy of colon',\n", + " 'Biopsy of prostate',\n", + " 'Blood typing RH typing',\n", + " 'Bone density scan (procedure)',\n", + " 'Chlamydia antigen test',\n", + " 'Clavicle X-ray',\n", + " 'Colonoscopy',\n", + " 'Digital examination of rectum',\n", + " 'Echocardiography (procedure)',\n", + " 'Electrocardiographic procedure',\n", + " 'Human epidermal growth factor receptor 2 gene detection by fluorescence in situ hybridization (procedure)',\n", + " 'Human epidermal growth factor receptor 2 gene detection by immunohistochemistry (procedure)',\n", + " 'Human immunodeficiency virus antigen test',\n", + " 'Initial patient assessment (procedure)',\n", + " 'Knee X-ray',\n", + " 'Magnetic resonance imaging of breast (procedure)',\n", + " 'Mammogram - symptomatic (procedure)',\n", + " 'Mammography (procedure)',\n", + " 'Manual pelvic examination (procedure)',\n", + " 'Measurement of Varicella-zoster virus antibody',\n", + " 'Measurement of respiratory function (procedure)',\n", + " 'Nasal sinus endoscopy (procedure)',\n", + " 'Pelvis X-ray',\n", + " 'Plain chest X-ray (procedure)',\n", + " 'Screening for domestic abuse (procedure)',\n", + " 'Screening for drug abuse (procedure)',\n", + " 'Screening for occult blood in feces (procedure)',\n", + " 'Screening mammography (procedure)',\n", + " 'Sentinel lymph node biopsy (procedure)',\n", + " 'Spirometry (procedure)',\n", + " 'Throat culture (procedure)',\n", + " 'Upper arm X-ray',\n", + " 'X-ray or wrist',\n", + " 'Seizure Count Cerebral Cortex Electroencephalogram (EEG)',\n", + " 'Allergy screening test',\n", + " 'Alpha-fetoprotein test',\n", + " 'Bilirubin.total [Presence] in Urine by Test strip',\n", + " 'Urine screening for glucose',\n", + " 'Urine screening test for diabetes',\n", + " 'Urine protein test',\n", + " 'Hemoglobin / Hematocrit / Platelet count',\n", + " 'Assessment of anxiety (procedure)',\n", + " 'Urine culture'\n", + "]\n", + "\n", + "\n", + "df2['Diagnostic Procedures'] = df2[diagnostic_procedures_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "surgerical_interventions_columns = [\n", + " 'Admission to burn unit',\n", + " 'Admission to long stay hospital',\n", + " 'Admission to neurosurgical department',\n", + " 'Admission to trauma surgery department',\n", + " 'Admit to ICU (procedure)',\n", + " 'Amputation of right foot',\n", + " 'Amputation of right leg',\n", + " 'Appendectomy',\n", + " 'Artificial respiration (procedure)',\n", + " 'Brachytherapy of breast (procedure)',\n", + " 'Catheter ablation of tissue of heart',\n", + " 'Coronary artery bypass grafting',\n", + " 'Excision of axillary lymph node (procedure)',\n", + " 'Excision of breast tissue (procedure)',\n", + " 'Excision of sentinel lymph node (procedure)',\n", + " 'Extraction of wisdom tooth',\n", + " 'Implantation of left ventricular assist device (procedure)',\n", + " 'Induced termination of pregnancy',\n", + " 'Insertion of biventricular implantable cardioverter defibrillator',\n", + " 'Insertion of endotracheal tube (procedure)',\n", + " 'Instrumental delivery',\n", + " 'Intubation',\n", + " 'Laparoscopic Removal of Gall Bladder',\n", + " 'Lumpectomy of breast (procedure)',\n", + " 'Lung volume reduction surgery (procedure)',\n", + " 'Open Removal of Gall Bladder',\n", + " 'Partial resection of colon',\n", + " 'Percutaneous coronary intervention',\n", + " 'Percutaneous mechanical thrombectomy of portal vein using fluoroscopic guidance',\n", + " 'Prostatectomy',\n", + " 'Rectal polypectomy',\n", + " 'Removal of endotracheal tube (procedure)',\n", + " 'Removal of subcutaneous contraceptive',\n", + " 'Surgical manipulation of joint of knee',\n", + " 'Surgical manipulation of shoulder joint',\n", + " 'Suture open wound',\n", + " 'Tear of meniscus of knee',\n", + " 'Transplantation of heart (procedure)'\n", + "]\n", + "df2['Surgical Interventions'] = df2[surgerical_interventions_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "patient_care_management_columns = [\n", + " 'Assessment of health and social care needs (procedure)',\n", + " 'Bleeding from anus',\n", + " 'Bullet wound',\n", + " 'Chronic low back pain (finding)',\n", + " 'Contact dermatitis',\n", + " 'Cystitis',\n", + " 'Diarrhea symptom (finding)',\n", + " 'Escherichia coli urinary tract infection',\n", + " 'Evaluation of psychiatric state of patient',\n", + " 'Facial laceration',\n", + " 'First degree burn',\n", + " 'Heart failure education (procedure)',\n", + " 'Hemodialysis (procedure)',\n", + " 'Hep A adult',\n", + " 'History AND physical examination (procedure)',\n", + " 'History of amputation of foot (situation)',\n", + " 'History of appendectomy',\n", + " 'History of cardiac arrest (situation)',\n", + " 'History of lower limb amputation (situation)',\n", + " 'History of single seizure (situation)',\n", + " 'Hospital admission short-term 24 hours',\n", + " 'Information gathering (procedure)',\n", + " 'Injection of tetanus antitoxin',\n", + " 'Intramuscular injection',\n", + " 'Intravenous blood transfusion of packed cells (procedure)',\n", + " 'Intravenous injection (procedure)',\n", + " 'Laceration of foot',\n", + " 'Laceration of forearm',\n", + " 'Laceration of hand',\n", + " 'Laceration of thigh',\n", + " 'Medication Reconciliation (procedure)',\n", + " 'Movement therapy (regime/therapy)',\n", + " 'Notifications (procedure)',\n", + " 'Oxygen administration by mask (procedure)',\n", + " 'Patient discharge (procedure)',\n", + " 'Placing subject in prone position (procedure)',\n", + " 'Postoperative procedure education (procedure)',\n", + " 'Pre-discharge assessment (procedure)',\n", + " 'Radiation oncology AND/OR radiotherapy (procedure)',\n", + " 'Referral to home health care service (procedure)',\n", + " 'Referral to hypertension clinic',\n", + " 'Renal dialysis (procedure)',\n", + " 'Resuscitation using intravenous fluid (procedure)',\n", + " 'Review of systems (procedure)',\n", + " 'Rubella screening',\n", + " 'Screening mammography (procedure)',\n", + " 'Subcutaneous immunotherapy',\n", + " 'Teleradiotherapy procedure (procedure)',\n", + " 'Transfer to stepdown unit (procedure)',\n", + " 'Transfusion of plasma (procedure)',\n", + " 'Weaning from mechanically assisted ventilation (procedure)',\n", + " 'piperacillin 2000 MG / tazobactam 250 MG Injection',\n", + " 'vancomycin 1000 MG Injection',\n", + " 'Recurrent urinary tract infection',\n", + " 'Acquired coagulation disorder (disorder)',\n", + " 'Primary malignant neoplasm of colon',\n", + " 'Pyelonephritis',\n", + " 'Chill (finding)',\n", + " 'Acute Cholecystitis',\n", + " 'Cholelithiasis',\n", + " 'Appendicitis',\n", + " 'Metastasis from malignant tumor of prostate (disorder)',\n", + " 'Overlapping malignant neoplasm of colon',\n", + " 'Polyp of colon',\n", + " 'Neoplasm of prostate',\n", + " 'Carcinoma in situ of prostate (disorder)',\n", + " 'Recurrent rectal polyp',\n", + " 'Discharge from skilled nursing facility (procedure)',\n", + " 'Certification procedure (procedure)',\n", + " 'Development of individualized plan of care (procedure)'\n", + "]\n", + "\n", + "df2['Patient Care Management'] = df2[patient_care_management_columns].sum(axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "categories = ['Respiratory Disorders', 'Heart and Cardiovascular Diseases', 'Metabolic and Endocrine Disorders', 'Neurological Disorders', 'Orthopedic Injuries', 'Mental Health', 'Reproductive and Pregnancy', 'Pain Relievers and Analesics', 'Cardiovascular and Blood Pressure Medications', 'Injection Medications', 'Oral Medications', 'Other Medications', 'Therapies and Regimes', 'Diagnostic Procedures', 'Surgical Interventions', 'Patient Care Management']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Type of these column is object, we will convert them to int." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column: Facial laceration\n", + "Data Type: object\n", + "Column: Norinyl 1+50 28 Day Pack\n", + "Data Type: object\n", + "Column: Intubation\n", + "Data Type: object\n", + "Column: Amlodipine 5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: anastrozole 1 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Take blood sample\n", + "Data Type: float64\n", + "Column: Lack of access to transportation (finding)\n", + "Data Type: object\n", + "Column: Incision of trachea (procedure)\n", + "Data Type: float64\n", + "Column: Alteplase 100 MG Injection\n", + "Data Type: object\n", + "Column: Referral to hypertension clinic\n", + "Data Type: object\n", + "Column: 168 HR Ethinyl Estradiol 0.00146 MG/HR / norelgestromin 0.00625 MG/HR Transdermal System\n", + "Data Type: object\n", + "Column: ferrous sulfate 325 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Certification procedure (procedure)\n", + "Data Type: object\n", + "Column: Pulmonary rehabilitation (regime/therapy)\n", + "Data Type: object\n", + "Column: Aspirin\n", + "Data Type: object\n", + "Column: History of amputation of foot (situation)\n", + "Data Type: object\n", + "Column: Limited social contact (finding)\n", + "Data Type: object\n", + "Column: Azithromycin 250mg\n", + "Data Type: object\n", + "Column: Antepartum eclampsia\n", + "Data Type: object\n", + "Column: Hepatitis B Surface Antigen Measurement\n", + "Data Type: object\n", + "Column: Jolivette 28 Day Pack\n", + "Data Type: object\n", + "Column: Asthma\n", + "Data Type: object\n", + "Column: Died in hospice (finding)\n", + "Data Type: object\n", + "Column: Levonorgestrel 0.00354 MG/HR Drug Implant\n", + "Data Type: object\n", + "Column: Appearance of Urine\n", + "Data Type: object\n", + "Column: Rupture of patellar tendon\n", + "Data Type: object\n", + "Column: Cesarean section\n", + "Data Type: object\n", + "Column: Cystitis\n", + "Data Type: object\n", + "Column: Rupture of appendix\n", + "Data Type: object\n", + "Column: Skin test for tuberculosis\n", + "Data Type: object\n", + "Column: pregabalin 100 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Fracture of rib\n", + "Data Type: object\n", + "Column: Bleeding from anus\n", + "Data Type: object\n", + "Column: cefdinir\n", + "Data Type: object\n", + "Column: Egg white IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Chronic kidney disease stage 1 (disorder)\n", + "Data Type: object\n", + "Column: Shock (disorder)\n", + "Data Type: object\n", + "Column: Epidural anesthesia\n", + "Data Type: object\n", + "Column: RhD passive immunization\n", + "Data Type: object\n", + "Column: Urine culture\n", + "Data Type: object\n", + "Column: Stress (finding)\n", + "Data Type: object\n", + "Column: COVID-19\n", + "Data Type: object\n", + "Column: Bone density scan (procedure)\n", + "Data Type: object\n", + "Column: Alpha-fetoprotein test\n", + "Data Type: object\n", + "Column: ribociclib 200 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Coordination of care plan (procedure)\n", + "Data Type: object\n", + "Column: Pneumococcal conjugate PCV 13\n", + "Data Type: object\n", + "Column: Face mask (physical object)\n", + "Data Type: object\n", + "Column: Prostatectomy\n", + "Data Type: object\n", + "Column: Information gathering (procedure)\n", + "Data Type: object\n", + "Column: Microalbumin Creatinine Ratio\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Protracted diarrhea\n", + "Data Type: object\n", + "Column: Functional capacity NYHA\n", + "Data Type: object\n", + "Column: Vaccination for diphtheria pertussis and tetanus\n", + "Data Type: object\n", + "Column: Methotrexate injection into tubal pregnancy\n", + "Data Type: object\n", + "Column: Hydrocortisone 10 MG/ML Topical Cream\n", + "Data Type: object\n", + "Column: doxycycline hyclate 100 MG\n", + "Data Type: object\n", + "Column: White Blood Cell (Elevated)\n", + "Data Type: float64\n", + "Column: Furosemide 40 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Human epidermal growth factor receptor 2 gene detection by immunohistochemistry (procedure)\n", + "Data Type: object\n", + "Column: Lorazepam 2 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Intravenous antibiotic therapy\n", + "Data Type: float64\n", + "Column: Color of Urine\n", + "Data Type: object\n", + "Column: lisinopril 20 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Osteoarthritis of knee\n", + "Data Type: object\n", + "Column: Alcoholism\n", + "Data Type: object\n", + "Column: Intravenous blood transfusion of packed cells (procedure)\n", + "Data Type: object\n", + "Column: Teleradiotherapy procedure (procedure)\n", + "Data Type: object\n", + "Column: Triglycerides\n", + "Data Type: object\n", + "Column: lisinopril 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Hospital admission short-term 24 hours\n", + "Data Type: object\n", + "Column: Fracture of vertebral column without spinal cord injury\n", + "Data Type: object\n", + "Column: Body Mass Index\n", + "Data Type: object\n", + "Column: Objective assessment of cardiovascular disease NYHA\n", + "Data Type: object\n", + "Column: Honey bee IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Diarrhea symptom (finding)\n", + "Data Type: object\n", + "Column: Cardiac Arrest\n", + "Data Type: object\n", + "Column: Laceration of foot\n", + "Data Type: object\n", + "Column: Admission to long stay hospital\n", + "Data Type: object\n", + "Column: Biopsy of prostate\n", + "Data Type: object\n", + "Column: Lumpectomy of breast (procedure)\n", + "Data Type: object\n", + "Column: Assessment of substance use (procedure)\n", + "Data Type: object\n", + "Column: Grass pollen (substance)\n", + "Data Type: object\n", + "Column: Glucose [Mass/volume] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Sputum Culture\n", + "Data Type: float64\n", + "Column: Spirometry (procedure)\n", + "Data Type: object\n", + "Column: Pregnancy termination care\n", + "Data Type: object\n", + "Column: Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Subcutaneous immunotherapy\n", + "Data Type: object\n", + "Column: History of myocardial infarction (situation)\n", + "Data Type: object\n", + "Column: Controlled ventilation procedure and therapy initiation and management (procedure)\n", + "Data Type: object\n", + "Column: Platelets [#/volume] in Blood by Automated count\n", + "Data Type: object\n", + "Column: X-ray or wrist\n", + "Data Type: object\n", + "Column: Bee venom (substance)\n", + "Data Type: float64\n", + "Column: Urine screening for glucose\n", + "Data Type: object\n", + "Column: History of single seizure (situation)\n", + "Data Type: object\n", + "Column: Burn injury(morphologic abnormality)\n", + "Data Type: object\n", + "Column: Nasal congestion (finding)\n", + "Data Type: object\n", + "Column: Sinusitis (disorder)\n", + "Data Type: object\n", + "Column: Abuse Status [OMAHA]\n", + "Data Type: object\n", + "Column: Alzheimer's disease (disorder)\n", + "Data Type: object\n", + "Column: Ultrasound scan for fetal viability\n", + "Data Type: object\n", + "Column: Diazepam 5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Animal dander (substance)\n", + "Data Type: object\n", + "Column: Laceration of thigh\n", + "Data Type: object\n", + "Column: Cyclophosphamide 1000 MG Injection\n", + "Data Type: object\n", + "Column: Cladosporium herbarum IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Blighted ovum\n", + "Data Type: object\n", + "Column: Etonogestrel 68 MG Drug Implant\n", + "Data Type: object\n", + "Column: Osteoporosis (disorder)\n", + "Data Type: object\n", + "Column: Patient discharge (procedure)\n", + "Data Type: object\n", + "Column: Sprain of ankle\n", + "Data Type: object\n", + "Column: sevoflurane 1000 MG/ML Inhalant Solution\n", + "Data Type: object\n", + "Column: sacubitril 97 MG / valsartan 103 MG Oral Tablet\n", + "Data Type: object\n", + "Column: NuvaRing 0.12/0.015 MG per 24HR 21 Day Vaginal Ring\n", + "Data Type: object\n", + "Column: Chronic neck pain (finding)\n", + "Data Type: object\n", + "Column: Ibuprofen\n", + "Data Type: object\n", + "Column: Viral sinusitis (disorder)\n", + "Data Type: object\n", + "Column: Placing subject in prone position (procedure)\n", + "Data Type: object\n", + "Column: 1 ML Vasopressin (USP) 20 UNT/ML Injection\n", + "Data Type: object\n", + "Column: Cefdinir\n", + "Data Type: object\n", + "Column: Naproxen 500 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Respiratory distress (finding)\n", + "Data Type: object\n", + "Column: Stage group.clinical Cancer\n", + "Data Type: object\n", + "Column: Contact dermatitis\n", + "Data Type: object\n", + "Column: 72 HR Fentanyl 0.025 MG/HR Transdermal System\n", + "Data Type: object\n", + "Column: Brain damage - traumatic\n", + "Data Type: object\n", + "Column: Kyleena 19.5 MG Intrauterine System\n", + "Data Type: object\n", + "Column: Common Ragweed IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Fracture subluxation of wrist\n", + "Data Type: object\n", + "Column: Iron binding capacity [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Measurement of respiratory function (procedure)\n", + "Data Type: object\n", + "Column: Simvastatin 20 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Injection of tetanus antitoxin\n", + "Data Type: object\n", + "Column: High Density Lipoprotein Cholesterol\n", + "Data Type: object\n", + "Column: Housing status\n", + "Data Type: object\n", + "Column: 0.3 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Nonproliferative diabetic retinopathy due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: pH of Urine by Test strip\n", + "Data Type: object\n", + "Column: Erythrocyte distribution width [Ratio] by Automated count\n", + "Data Type: object\n", + "Column: losartan potassium 50 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Tear of meniscus of knee\n", + "Data Type: object\n", + "Column: SARS-COV-2 (COVID-19) vaccine vector non-replicating recombinant spike protein-Ad26 preservative free 0.5 mL\n", + "Data Type: object\n", + "Column: Excision of breast tissue (procedure)\n", + "Data Type: object\n", + "Column: Unhealthy alcohol drinking behavior (finding)\n", + "Data Type: object\n", + "Column: Clavicle X-ray\n", + "Data Type: object\n", + "Column: Capillary refill [Time] of Nail bed\n", + "Data Type: object\n", + "Column: Calcium\n", + "Data Type: object\n", + "Column: Admission to trauma surgery department\n", + "Data Type: object\n", + "Column: Primary fibromyalgia syndrome\n", + "Data Type: object\n", + "Column: Human epidermal growth factor receptor 2 gene detection by fluorescence in situ hybridization (procedure)\n", + "Data Type: object\n", + "Column: Nitrofurantoin 5 MG/ML Oral Suspension\n", + "Data Type: object\n", + "Column: Chronic intractable migraine without aura\n", + "Data Type: object\n", + "Column: Refugee (person)\n", + "Data Type: object\n", + "Column: Joint pain (finding)\n", + "Data Type: object\n", + "Column: Chronic obstructive bronchitis (disorder)\n", + "Data Type: object\n", + "Column: Interstitial brachytherapy (procedure)\n", + "Data Type: object\n", + "Column: carvedilol 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Insertion of biventricular implantable cardioverter defibrillator\n", + "Data Type: object\n", + "Column: Standard pregnancy test\n", + "Data Type: object\n", + "Column: Smokes tobacco daily\n", + "Data Type: object\n", + "Column: Bone immobilization\n", + "Data Type: object\n", + "Column: Hypertension\n", + "Data Type: object\n", + "Column: palbociclib 100 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Appendicitis\n", + "Data Type: object\n", + "Column: Digital examination of rectum\n", + "Data Type: object\n", + "Column: Localized primary osteoarthritis of the hand\n", + "Data Type: object\n", + "Column: Seizure Count Cerebral Cortex Electroencephalogram (EEG)\n", + "Data Type: object\n", + "Column: Speech and language therapy regime (regime/therapy\n", + "Data Type: object\n", + "Column: Levothyroxine Sodium 0.075 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Platelet Count\n", + "Data Type: float64\n", + "Column: Polyp size greatest dimension by CAP cancer protocols\n", + "Data Type: object\n", + "Column: Azithromycin 250 MG Oral Tablet\n", + "Data Type: object\n", + "Column: state\n", + "Data Type: object\n", + "Column: Nitrite [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Closed fracture of hip\n", + "Data Type: object\n", + "Column: 3 ML Amiodarone hydrocholoride 50 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Not in labor force (finding)\n", + "Data Type: object\n", + "Column: Errin 28 Day Pack\n", + "Data Type: object\n", + "Column: Total Cholesterol\n", + "Data Type: object\n", + "Column: Amputation of right foot\n", + "Data Type: object\n", + "Column: 24 HR Donepezil hydrochloride 10 MG / Memantine hydrochloride 28 MG Extended Release Oral Capsule\n", + "Data Type: object\n", + "Column: Lenzilumab 200 MG IV\n", + "Data Type: object\n", + "Column: Magnesium [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Amoxicillin 250 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Chloride\n", + "Data Type: object\n", + "Column: Cholelithiasis\n", + "Data Type: object\n", + "Column: Transformed migraine (disorder)\n", + "Data Type: object\n", + "Column: Creatinine\n", + "Data Type: object\n", + "Column: Knee X-ray\n", + "Data Type: object\n", + "Column: Ankle X-ray\n", + "Data Type: object\n", + "Column: Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain\n", + "Data Type: object\n", + "Column: methadone hydrochloride 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Anion Gap\n", + "Data Type: float64\n", + "Column: Whiplash injury to neck\n", + "Data Type: object\n", + "Column: Spontaneous breech delivery\n", + "Data Type: object\n", + "Column: Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay\n", + "Data Type: object\n", + "Column: Implantation of left ventricular assist device (procedure)\n", + "Data Type: object\n", + "Column: Infection caused by Staphylococcus aureus\n", + "Data Type: float64\n", + "Column: Macular edema and retinopathy due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Posttraumatic stress disorder\n", + "Data Type: object\n", + "Column: Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Review of systems (procedure)\n", + "Data Type: object\n", + "Column: Hemoglobin [Mass/volume] in Blood\n", + "Data Type: object\n", + "Column: 120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler\n", + "Data Type: object\n", + "Column: Iron [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Latex (substance)\n", + "Data Type: object\n", + "Column: Milnacipran hydrochloride 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: 10 ML Fluorouracil 50 MG/ML Injection\n", + "Data Type: object\n", + "Column: 1 ML Epinephrine 1 MG/ML Injection\n", + "Data Type: object\n", + "Column: Meperidine Hydrochloride 50 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Fracture of forearm\n", + "Data Type: object\n", + "Column: Methotrexate 2.5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: 5 ML SUFentanil 0.05 MG/ML Injection\n", + "Data Type: object\n", + "Column: Drug overdose\n", + "Data Type: object\n", + "Column: Acute bronchitis (disorder)\n", + "Data Type: object\n", + "Column: C reactive protein [Mass/volume] in Serum or Plasma\n", + "Data Type: float64\n", + "Column: Chronic paralysis due to lesion of spinal cord\n", + "Data Type: object\n", + "Column: Parainfluenza virus 2 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Catheter ablation of tissue of heart\n", + "Data Type: object\n", + "Column: Screening for domestic abuse (procedure)\n", + "Data Type: object\n", + "Column: Attempted suicide - suffocation\n", + "Data Type: object\n", + "Column: Fibrin D-dimer FEU [Mass/volume] in Platelet poor plasma\n", + "Data Type: float64\n", + "Column: Nausea (finding)\n", + "Data Type: object\n", + "Column: History of cardiac arrest (situation)\n", + "Data Type: object\n", + "Column: Excision of fallopian tube and surgical removal of ectopic pregnancy\n", + "Data Type: object\n", + "Column: Intravenous infusion (procedure)\n", + "Data Type: float64\n", + "Column: Levora 0.15/30 28 Day Pack\n", + "Data Type: object\n", + "Column: Erythrocyte distribution width [Entitic volume] by Automated count\n", + "Data Type: object\n", + "Column: Screening for occult blood in feces (procedure)\n", + "Data Type: object\n", + "Column: Left ventricular Ejection fraction\n", + "Data Type: object\n", + "Column: 100 ML Propofol 10 MG/ML Injection\n", + "Data Type: object\n", + "Column: History of upper limb amputation (situation)\n", + "Data Type: float64\n", + "Column: Captopril 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Ultrasonography of abdomen right upper quadrant and epigastrium\n", + "Data Type: object\n", + "Column: Extraction of wisdom tooth\n", + "Data Type: object\n", + "Column: NDA020800 0.3 ML Epinephrine 1 MG/ML Auto-Injector\n", + "Data Type: object\n", + "Column: Peripheral blood smear interpretation\n", + "Data Type: object\n", + "Column: Insertion of endotracheal tube (procedure)\n", + "Data Type: object\n", + "Column: Urine screening test for diabetes\n", + "Data Type: object\n", + "Column: History of lower limb amputation (situation)\n", + "Data Type: object\n", + "Column: Stroke\n", + "Data Type: object\n", + "Column: Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Screening for chromosomal aneuploidy in prenatal amniotic fluid\n", + "Data Type: object\n", + "Column: Proliferative diabetic retinopathy due to type II diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: ethnic\n", + "Data Type: object\n", + "Column: Male infertility due to cystic fibrosis (disorder)\n", + "Data Type: float64\n", + "Column: exemestane 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Evaluation of psychiatric state of patient\n", + "Data Type: object\n", + "Column: Tubal pregnancy\n", + "Data Type: object\n", + "Column: label\n", + "Data Type: int64\n", + "Column: Episiotomy\n", + "Data Type: object\n", + "Column: 150 ML vancomycin 5 MG/ML Injection\n", + "Data Type: object\n", + "Column: Naltrexone hydrochloride 50 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Transfusion of plasma (procedure)\n", + "Data Type: float64\n", + "Column: Oxygen/Inspired gas setting [Volume Fraction] Ventilator\n", + "Data Type: float64\n", + "Column: Loratadine 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: 3 ML liraglutide 6 MG/ML Pen Injector\n", + "Data Type: object\n", + "Column: Cough (finding)\n", + "Data Type: object\n", + "Column: Alendronic acid 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: 4 ML norepinephrine 1 MG/ML Injection\n", + "Data Type: object\n", + "Column: Bacterial infectious disease (disorder)\n", + "Data Type: object\n", + "Column: Chronic pain\n", + "Data Type: object\n", + "Column: Surgical manipulation of shoulder joint\n", + "Data Type: object\n", + "Column: Hemodialysis (procedure)\n", + "Data Type: object\n", + "Column: Impacted molars\n", + "Data Type: object\n", + "Column: Lisinopril\n", + "Data Type: object\n", + "Column: Body temperature\n", + "Data Type: object\n", + "Column: Cystic Fibrosis\n", + "Data Type: float64\n", + "Column: Assessment of health and social care needs (procedure)\n", + "Data Type: object\n", + "Column: NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler\n", + "Data Type: object\n", + "Column: HER2 [Presence] in Breast cancer specimen by Immune stain\n", + "Data Type: object\n", + "Column: Abuse-Deterrent 12 HR Oxycodone Hydrochloride 10 MG Extended Release Oral Tablet [Oxycontin]\n", + "Data Type: object\n", + "Column: Streptococcal sore throat (disorder)\n", + "Data Type: object\n", + "Column: Coronary artery bypass grafting\n", + "Data Type: object\n", + "Column: American house dust mite IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Penicillin V\n", + "Data Type: object\n", + "Column: Seizure disorder\n", + "Data Type: object\n", + "Column: Oxygen saturation in Arterial blood\n", + "Data Type: object\n", + "Column: Social isolation (finding)\n", + "Data Type: object\n", + "Column: Physical examination\n", + "Data Type: object\n", + "Column: Acquired coagulation disorder (disorder)\n", + "Data Type: object\n", + "Column: Glucose [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Suture open wound\n", + "Data Type: object\n", + "Column: Lupus erythematosus\n", + "Data Type: object\n", + "Column: Rectal polypectomy\n", + "Data Type: object\n", + "Column: Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Brachytherapy of breast (procedure)\n", + "Data Type: object\n", + "Column: Recurrent urinary tract infection\n", + "Data Type: object\n", + "Column: Memantine hydrochloride 2 MG/ML Oral Solution\n", + "Data Type: object\n", + "Column: Terfenadine 60 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Construction of diverting colostomy\n", + "Data Type: object\n", + "Column: Cytopathology procedure preparation of smear genital source\n", + "Data Type: object\n", + "Column: Notifications (procedure)\n", + "Data Type: object\n", + "Column: Coronary Heart Disease\n", + "Data Type: object\n", + "Column: Excision of sentinel lymph node (procedure)\n", + "Data Type: object\n", + "Column: Microbial culture (procedure)\n", + "Data Type: object\n", + "Column: Systolic Blood Pressure\n", + "Data Type: object\n", + "Column: Lactate [Mass/volume] in Blood\n", + "Data Type: object\n", + "Column: Secondary malignant neoplasm of colon\n", + "Data Type: object\n", + "Column: tramadol hydrochloride 50 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Body mass index 40+ - severely obese (finding)\n", + "Data Type: float64\n", + "Column: Latex IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Walnut IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Plain chest X-ray (procedure)\n", + "Data Type: object\n", + "Column: Percutaneous coronary intervention\n", + "Data Type: object\n", + "Column: Septic shock (disorder)\n", + "Data Type: object\n", + "Column: Fracture of clavicle\n", + "Data Type: object\n", + "Column: INR in Platelet poor plasma by Coagulation assay\n", + "Data Type: float64\n", + "Column: Nursing care/supplementary surveillance (regime/therapy)\n", + "Data Type: object\n", + "Column: Thyroxine (T4) free [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Insertion of subcutaneous contraceptive\n", + "Data Type: object\n", + "Column: Protein [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Hydrochlorothiazide 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Social migrant (finding)\n", + "Data Type: object\n", + "Column: Clopidogrel 75 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Depression screening using Patient Health Questionnaire Two-Item score (procedure)\n", + "Data Type: object\n", + "Column: Injury of heart (disorder)\n", + "Data Type: object\n", + "Column: pneumococcal polysaccharide vaccine 23 valent\n", + "Data Type: object\n", + "Column: Simvastatin 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Natazia 28 Day Pack\n", + "Data Type: object\n", + "Column: Weaning from mechanically assisted ventilation (procedure)\n", + "Data Type: object\n", + "Column: Screening for drug abuse (procedure)\n", + "Data Type: object\n", + "Column: Bilirubin.total [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Mental health screening (procedure)\n", + "Data Type: object\n", + "Column: Colchicine 0.6 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Homeless (finding)\n", + "Data Type: object\n", + "Column: MCV\n", + "Data Type: float64\n", + "Column: Chill (finding)\n", + "Data Type: object\n", + "Column: Pyelonephritis\n", + "Data Type: object\n", + "Column: Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Acute respiratory distress syndrome (disorder)\n", + "Data Type: object\n", + "Column: Acetaminophen 500 MG Oral Tablet\n", + "Data Type: object\n", + "Column: MCHC [Mass/volume] by Automated count\n", + "Data Type: object\n", + "Column: Evaluation of uterine fundal height\n", + "Data Type: object\n", + "Column: Acute viral pharyngitis (disorder)\n", + "Data Type: object\n", + "Column: HIV status\n", + "Data Type: object\n", + "Column: Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain\n", + "Data Type: object\n", + "Column: Assessment using Morse Fall Scale (procedure)\n", + "Data Type: object\n", + "Column: Combined chemotherapy and radiation therapy (procedure)\n", + "Data Type: object\n", + "Column: Depression screening (procedure)\n", + "Data Type: object\n", + "Column: Mestranol / Norethynodrel [Enovid]\n", + "Data Type: object\n", + "Column: Manual pelvic examination (procedure)\n", + "Data Type: object\n", + "Column: Physical therapy procedure (regime/therapy)\n", + "Data Type: object\n", + "Column: Hemoglobin A1c/Hemoglobin.total in Blood\n", + "Data Type: object\n", + "Column: Third degree burn\n", + "Data Type: object\n", + "Column: Midazolam 1 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: 10 ML Fentanyl 0.05 MG/ML Injection\n", + "Data Type: object\n", + "Column: Ferritin [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Albumin\n", + "Data Type: object\n", + "Column: clonazePAM 0.25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Colonoscopy\n", + "Data Type: object\n", + "Column: Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method\n", + "Data Type: object\n", + "Column: Basophils [#/volume] in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Rubella screening\n", + "Data Type: object\n", + "Column: Psychiatric follow-up\n", + "Data Type: object\n", + "Column: Respiratory syncytial virus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Diabetic renal disease (disorder)\n", + "Data Type: object\n", + "Column: Cardiovascular stress testing (procedure)\n", + "Data Type: object\n", + "Column: Acetaminophen 300 MG / Codeine Phosphate 15 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Chronic kidney disease stage 2 (disorder)\n", + "Data Type: object\n", + "Column: Hyperglycemia (disorder)\n", + "Data Type: object\n", + "Column: Fever (finding)\n", + "Data Type: object\n", + "Column: Parainfluenza virus 3 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Normal pregnancy\n", + "Data Type: object\n", + "Column: Clarity of Urine\n", + "Data Type: object\n", + "Column: cycloSPORINE modified 100 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Induced termination of pregnancy\n", + "Data Type: object\n", + "Column: 5 ML hyaluronidase-oysk 2000 UNT/ML / trastuzumab 120 MG/ML Injection\n", + "Data Type: object\n", + "Column: Severe anxiety (panic) (finding\n", + "Data Type: object\n", + "Column: Reports of violence in the environment (finding)\n", + "Data Type: object\n", + "Column: Cat dander IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Monitoring of patient (regime/therapy)\n", + "Data Type: object\n", + "Column: buprenorphine 2 MG / naloxone 0.5 MG Sublingual Tablet\n", + "Data Type: object\n", + "Column: Rheumatoid arthritis\n", + "Data Type: object\n", + "Column: Suspected COVID-19\n", + "Data Type: object\n", + "Column: letrozole 2.5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Insertion of intrauterine contraceptive device\n", + "Data Type: object\n", + "Column: Sweat Test\n", + "Data Type: float64\n", + "Column: Instrumental delivery\n", + "Data Type: object\n", + "Column: Antenatal RhD antibody screening\n", + "Data Type: object\n", + "Column: Glucose\n", + "Data Type: object\n", + "Column: Percutaneous mechanical thrombectomy of portal vein using fluoroscopic guidance\n", + "Data Type: object\n", + "Column: Replacement of contraceptive intrauterine device\n", + "Data Type: object\n", + "Column: Low Density Lipoprotein Cholesterol\n", + "Data Type: object\n", + "Column: Seasonique 91 Day Pack\n", + "Data Type: object\n", + "Column: Aspirin 81 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Headache (finding)\n", + "Data Type: object\n", + "Column: Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Tacrine 10 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Medical induction of labor\n", + "Data Type: object\n", + "Column: Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method\n", + "Data Type: object\n", + "Column: Isoflurane 999 MG/ML Inhalant Solution\n", + "Data Type: object\n", + "Column: Upper arm X-ray\n", + "Data Type: object\n", + "Column: Heart failure education (procedure)\n", + "Data Type: object\n", + "Column: Wheezing (finding)\n", + "Data Type: object\n", + "Column: duloxetine 20 MG Delayed Release Oral Capsule\n", + "Data Type: object\n", + "Column: Response to cancer treatment\n", + "Data Type: object\n", + "Column: Ketones [Mass/volume] in Urine by Test strip\n", + "Data Type: object\n", + "Column: 20 ML tocilizumab 20 MG/ML Injection\n", + "Data Type: object\n", + "Column: Verapamil Hydrochloride 40 MG\n", + "Data Type: object\n", + "Column: Human metapneumovirus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Liletta 52 MG Intrauterine System\n", + "Data Type: object\n", + "Column: race\n", + "Data Type: object\n", + "Column: Penicillin V Potassium 500 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Vomiting symptom (finding)\n", + "Data Type: object\n", + "Column: 0.67 ML anakinra 149 MG/ML Prefilled Syringe\n", + "Data Type: float64\n", + "Column: Escherichia coli urinary tract infection\n", + "Data Type: object\n", + "Column: Chemotherapy (procedure)\n", + "Data Type: object\n", + "Column: Radiation oncology AND/OR radiotherapy (procedure)\n", + "Data Type: object\n", + "Column: Procalcitonin [Mass/volume] in Serum or Plasma\n", + "Data Type: float64\n", + "Column: Doxycycline Monohydrate 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Ketones [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Passive conjunctival congestion (finding)\n", + "Data Type: object\n", + "Column: Oxygen Therapy\n", + "Data Type: object\n", + "Column: Estrostep Fe 28 Day Pack\n", + "Data Type: object\n", + "Column: Blood typing RH typing\n", + "Data Type: object\n", + "Column: Anemia (disorder)\n", + "Data Type: object\n", + "Column: Throat culture (procedure)\n", + "Data Type: object\n", + "Column: Admission to burn unit\n", + "Data Type: object\n", + "Column: Pelvis X-ray\n", + "Data Type: object\n", + "Column: Carbon Dioxide\n", + "Data Type: object\n", + "Column: Creatine kinase [Enzymatic activity/volume] in Serum or Plasma\n", + "Data Type: float64\n", + "Column: canagliflozin 100 MG Oral Tablet\n", + "Data Type: float64\n", + "Column: Admission to neurosurgical department\n", + "Data Type: object\n", + "Column: Iron saturation [Mass Fraction] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Syphilis infection test\n", + "Data Type: object\n", + "Column: HER2 [Presence] in Breast cancer specimen by FISH\n", + "Data Type: object\n", + "Column: 100 ML zoledronic acid 0.04 MG/ML Injection\n", + "Data Type: object\n", + "Column: Prediabetes\n", + "Data Type: object\n", + "Column: Cow milk IgE Ab in Serum\n", + "Data Type: object\n", + "Column: albuterol 5 MG/ML Inhalation Solution\n", + "Data Type: object\n", + "Column: Respiratory rate\n", + "Data Type: object\n", + "Column: Intramuscular injection\n", + "Data Type: object\n", + "Column: Piperacillin 4000 MG / tazobactam 500 MG Injection\n", + "Data Type: object\n", + "Column: Polyp of colon\n", + "Data Type: object\n", + "Column: Appendectomy\n", + "Data Type: object\n", + "Column: Treatment status Cancer\n", + "Data Type: object\n", + "Column: Concussion with no loss of consciousness\n", + "Data Type: object\n", + "Column: Aztreonam 2000 MG Injection\n", + "Data Type: object\n", + "Column: Acute deep venous thrombosis (disorder)\n", + "Data Type: object\n", + "Column: Hep A adult\n", + "Data Type: object\n", + "Column: Pulmonary emphysema (disorder)\n", + "Data Type: object\n", + "Column: Idiopathic atrophic hypothyroidism\n", + "Data Type: object\n", + "Column: diphenhydrAMINE Hydrochloride 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Hematocrit [Volume Fraction] of Blood by Automated count\n", + "Data Type: object\n", + "Column: Hospice care (regime/therapy)\n", + "Data Type: object\n", + "Column: Brief general examination (procedure)\n", + "Data Type: object\n", + "Column: Mammography (procedure)\n", + "Data Type: object\n", + "Column: Total Bilirubin (Elevated)\n", + "Data Type: float64\n", + "Column: 10 ML Pamidronate Disodium 3 MG/ML Injection\n", + "Data Type: object\n", + "Column: History of disarticulation at wrist (situation)\n", + "Data Type: float64\n", + "Column: DXA [T-score] Bone density\n", + "Data Type: object\n", + "Column: Loss of taste (finding)\n", + "Data Type: object\n", + "Column: Carboplatin 10 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Fexofenadine hydrochloride 60 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Laparoscopic Removal of Gall Bladder\n", + "Data Type: object\n", + "Column: FEV1/FVC\n", + "Data Type: object\n", + "Column: Are you covered by health insurance or some other kind of health care plan [PhenX]\n", + "Data Type: object\n", + "Column: Acetaminophen 300 MG / Hydrocodone Bitartrate 5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Blindness due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Childbirth\n", + "Data Type: object\n", + "Column: Prostate specific Ag [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Allergy screening test\n", + "Data Type: object\n", + "Column: Sputum finding (finding)\n", + "Data Type: object\n", + "Column: Wheat IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Carbamazepine[Tegretol]\n", + "Data Type: object\n", + "Column: Pathological fracture due to osteoporosis (disorder)\n", + "Data Type: object\n", + "Column: 12 HR Hydrocodone Bitartrate 10 MG Extended Release Oral Capsule\n", + "Data Type: object\n", + "Column: Admit to ICU (procedure)\n", + "Data Type: object\n", + "Column: Chronic low back pain (finding)\n", + "Data Type: object\n", + "Column: Hypertriglyceridemia (disorder)\n", + "Data Type: object\n", + "Column: Donepezil hydrochloride 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Proteinuria due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Prothrombin time (PT)\n", + "Data Type: float64\n", + "Column: Removal of subcutaneous contraceptive\n", + "Data Type: object\n", + "Column: NITROFURANTOIN MACROCRYSTALS 50 MG Oral Capsule\n", + "Data Type: object\n", + "Column: Monocytes/100 leukocytes in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Suicide risk assessment (procedure)\n", + "Data Type: object\n", + "Column: Rhinovirus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Concussion injury of brain\n", + "Data Type: object\n", + "Column: Diabetes from Cystic Fibrosis\n", + "Data Type: float64\n", + "Column: Leukocyte esterase [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Urine protein test\n", + "Data Type: object\n", + "Column: cetirizine hydrochloride 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Injury of kidney (disorder)\n", + "Data Type: object\n", + "Column: marital\n", + "Data Type: object\n", + "Column: 0.25 ML Leuprolide Acetate 30 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Sepsis caused by Pseudomonas (disorder)\n", + "Data Type: float64\n", + "Column: Peanut IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Urea nitrogen [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Gonorrhea infection test\n", + "Data Type: object\n", + "Column: 24hr nicotine transdermal patch\n", + "Data Type: object\n", + "Column: Oxygen administration by mask (procedure)\n", + "Data Type: object\n", + "Column: Fibromyalgia (disorder)\n", + "Data Type: object\n", + "Column: MCV [Entitic volume] by Automated count\n", + "Data Type: object\n", + "Column: Part-time employment (finding)\n", + "Data Type: object\n", + "Column: Primary malignant neoplasm of colon\n", + "Data Type: object\n", + "Column: Calcium [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: 24 HR Metformin hydrochloride 500 MG Extended Release Oral Tablet\n", + "Data Type: object\n", + "Column: Intravenous injection (procedure)\n", + "Data Type: object\n", + "Column: Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution\n", + "Data Type: object\n", + "Column: Heart rate\n", + "Data Type: object\n", + "Column: 100 ML Epirubicin Hydrochloride 2 MG/ML Injection\n", + "Data Type: object\n", + "Column: Monocytes [#/volume] in Blood by Automated count\n", + "Data Type: float64\n", + "Column: scc\n", + "Data Type: int64\n", + "Column: gender\n", + "Data Type: object\n", + "Column: Atorvastatin 80 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Cefuroxime 250 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Tamoxifen 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: At risk for suicide (finding)\n", + "Data Type: object\n", + "Column: Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "Data Type: object\n", + "Column: Atrial Fibrillation\n", + "Data Type: object\n", + "Column: Fatigue (finding)\n", + "Data Type: object\n", + "Column: Intracavitary brachytherapy (procedure)\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG / HYDROcodone Bitartrate 7.5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Neutrophils/100 leukocytes in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Preeclampsia\n", + "Data Type: object\n", + "Column: desflurane 1000 MG/ML Inhalation Solution\n", + "Data Type: object\n", + "Column: Assessment using Alcohol Use Disorders Identification Test - Consumption (procedure)\n", + "Data Type: object\n", + "Column: Depression screening using Patient Health Questionnaire Nine Item score (procedure)\n", + "Data Type: object\n", + "Column: Electrical cardioversion\n", + "Data Type: object\n", + "Column: Diabetic retinopathy associated with type II diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Lymphocytes/100 leukocytes in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Total knee replacement\n", + "Data Type: object\n", + "Column: Erythrocytes [#/volume] in Blood by Automated count\n", + "Data Type: object\n", + "Column: Red Blood Cell\n", + "Data Type: float64\n", + "Column: 10 ML Furosemide 10 MG/ML Injection\n", + "Data Type: object\n", + "Column: Rocuronium bromide 10 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Shrimp IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Penicillin G 375 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Epilepsy\n", + "Data Type: object\n", + "Column: First degree burn\n", + "Data Type: object\n", + "Column: Laceration of hand\n", + "Data Type: object\n", + "Column: Mental health Telehealth Note\n", + "Data Type: float64\n", + "Column: Victim of intimate partner abuse (finding)\n", + "Data Type: object\n", + "Column: Pneumonia (disorder)\n", + "Data Type: object\n", + "Column: Urea Nitrogen\n", + "Data Type: object\n", + "Column: Transfer to stepdown unit (procedure)\n", + "Data Type: object\n", + "Column: Osteoarthritis of hip\n", + "Data Type: object\n", + "Column: MCH [Entitic mass] by Automated count\n", + "Data Type: object\n", + "Column: Atropine Sulfate 1 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Chlamydia antigen test\n", + "Data Type: object\n", + "Column: Acute bacterial sinusitis (disorder)\n", + "Data Type: object\n", + "Column: chloroquine phosphate 500 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Miscarriage in first trimester\n", + "Data Type: object\n", + "Column: Potassium\n", + "Data Type: object\n", + "Column: Microalbuminuria due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Platelet mean volume [Entitic volume] in Blood by Automated count\n", + "Data Type: object\n", + "Column: Pre-discharge assessment (procedure)\n", + "Data Type: object\n", + "Column: Biopsy of breast (procedure)\n", + "Data Type: object\n", + "Column: Acute Cholecystitis\n", + "Data Type: object\n", + "Column: predniSONE 20 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Eosinophils [#/volume] in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Tobacco smoking status NHIS\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG / oxyCODONE Hydrochloride 2.5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG / oxyCODONE Hydrochloride 5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Hypoxemia (disorder)\n", + "Data Type: object\n", + "Column: Sepsis caused by virus (disorder)\n", + "Data Type: object\n", + "Column: 10 ML Doxorubicin Hydrochloride 2 MG/ML Injection\n", + "Data Type: object\n", + "Column: Tree pollen (substance)\n", + "Data Type: object\n", + "Column: Sodium\n", + "Data Type: object\n", + "Column: NT-proBNP\n", + "Data Type: object\n", + "Column: Streptococcus pneumoniae group B antigen test\n", + "Data Type: object\n", + "Column: Suicidal deliberate poisoning\n", + "Data Type: float64\n", + "Column: RBC Distribution Width\n", + "Data Type: float64\n", + "Column: Diabetes\n", + "Data Type: object\n", + "Column: Hepatitis C antibody test\n", + "Data Type: object\n", + "Column: Basophils/100 leukocytes in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Surgical manipulation of joint of knee\n", + "Data Type: object\n", + "Column: Cognitive and behavioral therapy (regime/therapy)\n", + "Data Type: object\n", + "Column: Hemoglobin / Hematocrit / Platelet count\n", + "Data Type: object\n", + "Column: Human immunodeficiency virus antigen test\n", + "Data Type: object\n", + "Column: 1 ML medroxyPROGESTERone acetate 150 MG/ML Injection\n", + "Data Type: object\n", + "Column: 1 ML Morphine Sulfate 5 MG/ML Injection\n", + "Data Type: object\n", + "Column: Assessment of anxiety (procedure)\n", + "Data Type: object\n", + "Column: Pancreatin 600 MG Oral Tablet\n", + "Data Type: float64\n", + "Column: Concussion with loss of consciousness\n", + "Data Type: object\n", + "Column: Sodium Chloride 9 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Hemoptysis (finding)\n", + "Data Type: object\n", + "Column: Sore throat symptom (finding)\n", + "Data Type: object\n", + "Column: Body mass index 30+ - obesity (finding)\n", + "Data Type: object\n", + "Column: Acute pulmonary embolism (disorder)\n", + "Data Type: object\n", + "Column: Movement therapy (regime/therapy)\n", + "Data Type: object\n", + "Column: Resuscitation using intravenous fluid (procedure)\n", + "Data Type: object\n", + "Column: Vasectomy\n", + "Data Type: object\n", + "Column: Allopurinol 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Protein [Mass/volume] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Leukocytes [#/volume] in Blood by Automated count\n", + "Data Type: object\n", + "Column: Measurement of Varicella-zoster virus antibody\n", + "Data Type: object\n", + "Column: Augmentation of labor\n", + "Data Type: object\n", + "Column: Abuse-Deterrent 12 HR Oxycodone Hydrochloride 15 MG Extended Release Oral Tablet\n", + "Data Type: object\n", + "Column: Diastolic Blood Pressure\n", + "Data Type: object\n", + "Column: Transplant of lung (procedure)\n", + "Data Type: object\n", + "Column: 20 Gene mutation test\n", + "Data Type: float64\n", + "Column: Biopsy of colon\n", + "Data Type: object\n", + "Column: Misuses drugs (finding)\n", + "Data Type: object\n", + "Column: zoster\n", + "Data Type: object\n", + "Column: Recurrent rectal polyp\n", + "Data Type: object\n", + "Column: Gram positive blood culture panel by Probe in Positive blood culture\n", + "Data Type: object\n", + "Column: Asthma screening\n", + "Data Type: object\n", + "Column: Dyspnea (finding)\n", + "Data Type: object\n", + "Column: Oxygen [Partial pressure] in Arterial blood\n", + "Data Type: float64\n", + "Column: Metastasis from malignant tumor of prostate (disorder)\n", + "Data Type: object\n", + "Column: Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay\n", + "Data Type: object\n", + "Column: pH of Arterial blood\n", + "Data Type: float64\n", + "Column: Home health aide service (regime/therapy)\n", + "Data Type: object\n", + "Column: Specific gravity of Urine by Test strip\n", + "Data Type: object\n", + "Column: Ibuprofen 200 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Oxygen Saturation\n", + "Data Type: object\n", + "Column: Injury of anterior cruciate ligament\n", + "Data Type: object\n", + "Column: ado-trastuzumab emtansine 100 MG Injection\n", + "Data Type: object\n", + "Column: Mold (organism)\n", + "Data Type: object\n", + "Column: Professional / ancillary services care (regime/therapy)\n", + "Data Type: object\n", + "Column: Renal dialysis (procedure)\n", + "Data Type: object\n", + "Column: Psychosocial care (regime/therapy)\n", + "Data Type: object\n", + "Column: Initial patient assessment (procedure)\n", + "Data Type: object\n", + "Column: Sepsis (disorder)\n", + "Data Type: object\n", + "Column: House dust mite (organism)\n", + "Data Type: object\n", + "Column: SARS-COV-2 (COVID-19) vaccine mRNA spike protein LNP preservative free 30 mcg/0.3mL dose\n", + "Data Type: object\n", + "Column: Hydroxychloroquine Sulfate 200 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Injury of medial collateral ligament of knee\n", + "Data Type: object\n", + "Column: Pulmozyme (Dornase Alfa)\n", + "Data Type: float64\n", + "Column: 12 HR Cefaclor 500 MG Extended Release Oral Tablet\n", + "Data Type: object\n", + "Column: Amputation of right leg\n", + "Data Type: object\n", + "Column: Comprehensive interview and evaluation (procedure)\n", + "Data Type: object\n", + "Column: Discharge from skilled nursing facility (procedure)\n", + "Data Type: object\n", + "Column: Chronic kidney disease stage 3 (disorder)\n", + "Data Type: object\n", + "Column: Referral to home health care service (procedure)\n", + "Data Type: object\n", + "Column: baricitinib 2 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Total replacement of hip\n", + "Data Type: float64\n", + "Column: Radiation therapy care (regime/therapy)\n", + "Data Type: object\n", + "Column: Lung Transplant\n", + "Data Type: float64\n", + "Column: Neutrophils [#/volume] in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray\n", + "Data Type: object\n", + "Column: Occupational therapy (regime/therapy)\n", + "Data Type: object\n", + "Column: insulin human isophane 70 UNT/ML / Regular Insulin Human 30 UNT/ML Injectable Suspension [Humulin]\n", + "Data Type: object\n", + "Column: Overlapping malignant neoplasm of colon\n", + "Data Type: object\n", + "Column: Familial Alzheimer's disease of early onset (disorder)\n", + "Data Type: object\n", + "Column: Open Removal of Gall Bladder\n", + "Data Type: object\n", + "Column: Paclitaxel 100 MG Injection\n", + "Data Type: object\n", + "Column: 1 ML denosumab 60 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Premature birth of newborn\n", + "Data Type: object\n", + "Column: Vitamin B 12 5 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Postoperative procedure education (procedure)\n", + "Data Type: object\n", + "Column: age\n", + "Data Type: object\n", + "Column: Lymphocytes [#/volume] in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Bilirubin.total [Mass/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Hyperlipidemia\n", + "Data Type: object\n", + "Column: Mental health Outpatient Note\n", + "Data Type: float64\n", + "Column: Has a criminal record (finding)\n", + "Data Type: object\n", + "Column: Malignant tumor of colon\n", + "Data Type: object\n", + "Column: Assessment using New York Heart Association Classification (procedure)\n", + "Data Type: object\n", + "Column: Nasal sinus endoscopy (procedure)\n", + "Data Type: object\n", + "Column: Lung volume reduction surgery (procedure)\n", + "Data Type: object\n", + "Column: Auscultation of the fetal heart\n", + "Data Type: object\n", + "Column: Development of individualized plan of care (procedure)\n", + "Data Type: object\n", + "Column: Galantamine 4 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Insulin Lispro 100 UNT/ML Injectable Solution [Humalog]\n", + "Data Type: object\n", + "Column: Malignant neoplasm of breast (disorder)\n", + "Data Type: object\n", + "Column: Glomerular filtration rate/1.73 sq M.predicted\n", + "Data Type: object\n", + "Column: Leucovorin 100 MG Injection\n", + "Data Type: object\n", + "Column: Ibuprofen 400 MG Oral Tablet [Ibu]\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG Oral Tablet [Tylenol]\n", + "Data Type: object\n", + "Column: Full-time employment (finding)\n", + "Data Type: object\n", + "Column: Verzenio 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Soybean IgE Ab in Serum\n", + "Data Type: object\n", + "Column: neratinib 40 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Diazepam 5 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: Mean blood pressure\n", + "Data Type: object\n", + "Column: Mammogram - symptomatic (procedure)\n", + "Data Type: object\n", + "Column: Mirena 52 MG Intrauterine System\n", + "Data Type: object\n", + "Column: Thyrotropin [Units/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Carcinoma in situ of prostate (disorder)\n", + "Data Type: object\n", + "Column: 60 ACTUAT Fluticasone propionate 0.25 MG/ACTUAT / salmeterol 0.05 MG/ACTUAT Dry Powder Inhaler\n", + "Data Type: object\n", + "Column: Ortho Tri-Cyclen 28 Day Pack\n", + "Data Type: object\n", + "Column: Lactate dehydrogenase [Enzymatic activity/volume] in Serum or Plasma by Lactate to pyruvate reaction\n", + "Data Type: float64\n", + "Column: Bilateral tubal ligation\n", + "Data Type: object\n", + "Column: Medication Reconciliation (procedure)\n", + "Data Type: object\n", + "Column: Second degree burn\n", + "Data Type: object\n", + "Column: Globulin [Mass/volume] in Serum by calculation\n", + "Data Type: object\n", + "Column: Artificial respiration (procedure)\n", + "Data Type: object\n", + "Column: Chronic congestive heart failure (disorder)\n", + "Data Type: object\n", + "Column: Removal of endotracheal tube (procedure)\n", + "Data Type: object\n", + "Column: 10 ML Alfentanil 0.5 MG/ML Injection\n", + "Data Type: object\n", + "Column: Excision of axillary lymph node (procedure)\n", + "Data Type: object\n", + "Column: Warfarin Sodium 5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: SARS-COV-2 (COVID-19) vaccine mRNA spike protein LNP preservative free 100 mcg/0.5mL dose\n", + "Data Type: object\n", + "Column: 1 ML DOCEtaxel 20 MG/ML Injection\n", + "Data Type: object\n", + "Column: Phenazopyridine hydrochloride 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Codfish IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Chronic sinusitis (disorder)\n", + "Data Type: object\n", + "Column: Care regimes assessment (procedure)\n", + "Data Type: object\n", + "Column: Ampicillin 100 MG/ML Injectable Solution\n", + "Data Type: object\n", + "Column: lapatinib 250 MG Oral Tablet\n", + "Data Type: object\n", + "Column: losartan potassium 25 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Attempted suicide - cut/stab\n", + "Data Type: object\n", + "Column: Influenza seasonal injectable preservative free\n", + "Data Type: object\n", + "Column: Drugs of abuse 5 panel - Urine by Screen method\n", + "Data Type: object\n", + "Column: Electrocardiographic procedure\n", + "Data Type: object\n", + "Column: Counseling for termination of pregnancy\n", + "Data Type: object\n", + "Column: Heart failure (disorder)\n", + "Data Type: object\n", + "Column: White oak IgE Ab in Serum\n", + "Data Type: object\n", + "Column: Echocardiography (procedure)\n", + "Data Type: object\n", + "Column: 10 ML oxaliplatin 5 MG/ML Injection\n", + "Data Type: object\n", + "Column: Injury of tendon of the rotator cuff of shoulder\n", + "Data Type: object\n", + "Column: Partial resection of colon\n", + "Data Type: object\n", + "Column: Non-low risk pregnancy\n", + "Data Type: object\n", + "Column: Ultrasonography of bilateral breasts (procedure)\n", + "Data Type: object\n", + "Column: Acetaminophen/Hydrocodone\n", + "Data Type: object\n", + "Column: Yaz 28 Day Pack\n", + "Data Type: object\n", + "Column: Admission to orthopedic department\n", + "Data Type: object\n", + "Column: History AND physical examination (procedure)\n", + "Data Type: object\n", + "Column: Bullet wound\n", + "Data Type: object\n", + "Column: Td (adult) preservative free\n", + "Data Type: object\n", + "Column: Chloride [Moles/volume] in Serum or Plasma\n", + "Data Type: object\n", + "Column: Interleukin 6 [Mass/volume] in Serum or Plasma\n", + "Data Type: float64\n", + "Column: Muscle pain (finding)\n", + "Data Type: object\n", + "Column: Tumor marker Cancer\n", + "Data Type: object\n", + "Column: Laceration of forearm\n", + "Data Type: object\n", + "Column: Sepsis caused by Staphylococcus aureus\n", + "Data Type: float64\n", + "Column: Myocardial Infarction\n", + "Data Type: object\n", + "Column: Sputum examination (procedure)\n", + "Data Type: object\n", + "Column: Otitis media\n", + "Data Type: object\n", + "Column: Bilirubin.total [Mass/volume] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Pain severity - 0-10 verbal numeric rating [Score] - Reported\n", + "Data Type: object\n", + "Column: Neoplasm of prostate\n", + "Data Type: object\n", + "Column: Gout\n", + "Data Type: object\n", + "Column: Vancomycin 50 MG/ML Injectable Solution\n", + "Data Type: float64\n", + "Column: Naproxen sodium 220 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Glucose [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Magnetic resonance imaging of breast (procedure)\n", + "Data Type: object\n", + "Column: 2 ML Ondansetron 2 MG/ML Injection\n", + "Data Type: object\n", + "Column: Social case work (regime/therapy)\n", + "Data Type: object\n", + "Column: Carbon dioxide [Partial pressure] in Arterial blood\n", + "Data Type: float64\n", + "Column: Fetus with unknown complication\n", + "Data Type: object\n", + "Column: US Guidance for biopsy of Prostate\n", + "Data Type: object\n", + "Column: Fracture of the vertebral column with spinal cord injury\n", + "Data Type: object\n", + "Column: Oral Glucose Tolerance Test\n", + "Data Type: float64\n", + "Column: Digoxin 0.125 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Metabolic syndrome X (disorder)\n", + "Data Type: object\n", + "Column: Neuropathy due to type 2 diabetes mellitus (disorder)\n", + "Data Type: object\n", + "Column: Sprain of wrist\n", + "Data Type: object\n", + "Column: 0.4 ML Enoxaparin sodium 100 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: 1 ML Enoxaparin sodium 150 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Fracture of ankle\n", + "Data Type: object\n", + "Column: Donepezil hydrochloride 23 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Removal of intrauterine device\n", + "Data Type: object\n", + "Column: Eosinophils/100 leukocytes in Blood by Automated count\n", + "Data Type: float64\n", + "Column: Transplantation of heart (procedure)\n", + "Data Type: object\n", + "Column: vancomycin 1000 MG Injection\n", + "Data Type: object\n", + "Column: piperacillin 2000 MG / tazobactam 250 MG Injection\n", + "Data Type: object\n", + "Column: Opioid abuse (disorder)\n", + "Data Type: object\n", + "Column: amLODIPine 2.5 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Leronlimab 700 MG Injection\n", + "Data Type: object\n", + "Column: Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain\n", + "Data Type: object\n", + "Column: SARS-CoV-2 RNA Pnl Resp NAA+probe\n", + "Data Type: object\n", + "Column: remdesivir 100 MG Injection\n", + "Data Type: object\n", + "Column: 1 ML Epoetin Alfa 4000 UNT/ML Injection [Epogen]\n", + "Data Type: object\n", + "Column: Trinessa 28 Day Pack\n", + "Data Type: object\n", + "Column: 1 ML heparin sodium porcine 5000 UNT/ML Injection\n", + "Data Type: object\n", + "Column: Hemoglobin [Presence] in Urine by Test strip\n", + "Data Type: object\n", + "Column: Astemizole 10 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Sentinel lymph node biopsy (procedure)\n", + "Data Type: object\n", + "Column: 5 ML fulvestrant 50 MG/ML Prefilled Syringe\n", + "Data Type: object\n", + "Column: Hematocrit [Volume Fraction] of Blood\n", + "Data Type: object\n", + "Column: Acetaminophen 325 MG / Oxycodone Hydrochloride 10 MG Oral Tablet [Percocet]\n", + "Data Type: object\n", + "Column: Camila 28 Day Pack\n", + "Data Type: object\n", + "Column: History of appendectomy\n", + "Data Type: object\n", + "Column: remifentanil 2 MG Injection\n", + "Data Type: object\n", + "Column: Chlorpheniramine Maleate 4 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Fetal anatomy study\n", + "Data Type: object\n", + "Column: Bicarbonate [Moles/volume] in Arterial blood\n", + "Data Type: float64\n", + "Column: Screening mammography (procedure)\n", + "Data Type: object\n", + "Column: Sertraline 100 MG Oral Tablet\n", + "Data Type: object\n", + "Column: Transport problems (finding)\n", + "Data Type: object\n", + "Column: Sulfamethoxazole / Trimethoprim\n", + "Data Type: object\n", + "Column: Respiratory Disorders\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Heart and Cardiovascular Diseases\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Metabolic and Endocrine Disorders\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Neurological Disorders\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Orthopedic Injuries\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Mental Health\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Reproductive and Pregnancy\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Pain Relievers and Analesics\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Cardiovascular and Blood Pressure Medications\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Injection Medications\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Oral Medications\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Other Medications\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Therapies and Regimes\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Diagnostic Procedures\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Surgical Interventions\n", + "Data Type: object\n", + "Data Type: int64\n", + "Column: Patient Care Management\n", + "Data Type: object\n", + "Data Type: int64\n" + ] + } + ], + "source": [ + "for col in df2.columns:\n", + " # print data type of column\n", + " print(f\"Column: {col}\") \n", + " print(f\"Data Type: {df2[col].dtype}\")\n", + " if col in categories:\n", + " # change data type to numeric\n", + " df2[col] = pd.to_numeric(df2[col], errors='coerce')\n", + " print(f\"Data Type: {df2[col].dtype}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dropping unnecessary columns" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Not important columns" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "unimportant_columns = ['race', 'ethnic', 'state', 'Social migrant (finding)', 'Lack of access to transportation (finding)', 'Transport problems (finding)', 'Full-time employment (finding)', 'Face mask (physical object)', 'Childbirth', 'Cesarean section', 'Normal pregnancy', 'Non-low risk pregnancy']\n", + "df2.drop(columns=unimportant_columns, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Columns used in new features" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "used_columns = respiratory_disorders_columns + heart_and_cardiovascular_diseases_columns + metabolic_and_endocrine_disorders_columns + neurological_disorders_columns + orthopedic_injuries_columns + mental_health_columns + reproductive_and_pregancy_columns + pain_relievers_and_analesics_columns + cardiovascular_and_blood_pressure_medications_columns + injection_medications_columns + oral_medications_columns + other_medications_columns + therapies_and_regimes_columns + diagnostic_procedures_columns + surgerical_interventions_columns + patient_care_management_columns\n", + "df2.drop(columns=used_columns, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Columns with only null values" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column Null Values Percentage\n", + "---------------------------------------------------------------------------------------------------- ------------------------\n", + "Take blood sample 100\n", + "Infection caused by Staphylococcus aureus 100\n", + "History of upper limb amputation (situation) 100\n", + "Cystic Fibrosis 100\n", + "Body mass index 40+ - severely obese (finding) 100\n", + "Sweat Test 100\n", + "Incision of trachea (procedure) 100\n", + "canagliflozin 100 MG Oral Tablet 100\n", + "History of disarticulation at wrist (situation) 100\n", + "Diabetes from Cystic Fibrosis 100\n", + "Sepsis caused by Pseudomonas (disorder) 100\n", + "Suicidal deliberate poisoning 100\n", + "Pancreatin 600 MG Oral Tablet 100\n", + "20 Gene mutation test 100\n", + "Total replacement of hip 100\n", + "Lung Transplant 100\n", + "Sepsis caused by Staphylococcus aureus 100\n", + "Vancomycin 50 MG/ML Injectable Solution 100\n", + "Oral Glucose Tolerance Test 100\n", + "Intravenous infusion (procedure) 100\n", + "0.67 ML anakinra 149 MG/ML Prefilled Syringe 100\n", + "Bee venom (substance) 100\n", + "Intravenous antibiotic therapy 100\n", + "Sputum Culture 100\n", + "Interleukin 6 [Mass/volume] in Serum or Plasma 99.9889\n", + "Tumor marker Cancer 99.7679\n", + "Thyroxine (T4) free [Mass/volume] in Serum or Plasma 99.4695\n", + "Thyrotropin [Units/volume] in Serum or Plasma 99.4695\n", + "Bicarbonate [Moles/volume] in Arterial blood 98.6848\n", + "Oxygen/Inspired gas setting [Volume Fraction] Ventilator 98.6848\n", + "Oxygen [Partial pressure] in Arterial blood 98.6848\n", + "pH of Arterial blood 98.6848\n", + "Carbon dioxide [Partial pressure] in Arterial blood 98.6848\n", + "Abuse Status [OMAHA] 98.5301\n", + "Housing status 98.5301\n", + "HIV status 98.5301\n", + "Are you covered by health insurance or some other kind of health care plan [PhenX] 98.5301\n", + "Total Bilirubin (Elevated) 98.3311\n", + "Red Blood Cell 98.3311\n", + "RBC Distribution Width 98.3311\n", + "MCV 98.3311\n", + "Platelet Count 98.3311\n", + "Anion Gap 98.3311\n", + "White Blood Cell (Elevated) 98.3311\n", + "Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay 98.3201\n", + "Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay 98.3201\n", + "Oxygen Saturation 98.2427\n", + "Gram positive blood culture panel by Probe in Positive blood culture 98.2427\n", + "Mean blood pressure 98.2427\n", + "Lactate [Mass/volume] in Blood 98.2427\n", + "Capillary refill [Time] of Nail bed 98.2427\n", + "Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain 97.8117\n", + "Prothrombin time (PT) 97.1706\n", + "Eosinophils/100 leukocytes in Blood by Automated count 97.1706\n", + "Procalcitonin [Mass/volume] in Serum or Plasma 97.1706\n", + "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma 97.1706\n", + "Basophils [#/volume] in Blood by Automated count 97.1706\n", + "INR in Platelet poor plasma by Coagulation assay 97.1706\n", + "Neutrophils/100 leukocytes in Blood by Automated count 97.1706\n", + "Monocytes/100 leukocytes in Blood by Automated count 97.1706\n", + "Monocytes [#/volume] in Blood by Automated count 97.1706\n", + "Lymphocytes/100 leukocytes in Blood by Automated count 97.1706\n", + "Lactate dehydrogenase [Enzymatic activity/volume] in Serum or Plasma by Lactate to pyruvate reaction 97.1706\n", + "Lymphocytes [#/volume] in Blood by Automated count 97.1706\n", + "Eosinophils [#/volume] in Blood by Automated count 97.1706\n", + "Neutrophils [#/volume] in Blood by Automated count 97.1706\n", + "Basophils/100 leukocytes in Blood by Automated count 97.1706\n", + "C reactive protein [Mass/volume] in Serum or Plasma 97.1706\n", + "Fibrin D-dimer FEU [Mass/volume] in Platelet poor plasma 97.1706\n", + "Treatment status Cancer 96.7949\n", + "Parainfluenza virus 2 RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Respiratory syncytial virus RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Human metapneumovirus RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Rhinovirus RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Parainfluenza virus 3 RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection 96.7396\n", + "American house dust mite IgE Ab in Serum 96.5517\n", + "Latex IgE Ab in Serum 96.5517\n", + "Peanut IgE Ab in Serum 96.5517\n", + "White oak IgE Ab in Serum 96.5517\n", + "Egg white IgE Ab in Serum 96.5517\n", + "Shrimp IgE Ab in Serum 96.5517\n", + "Codfish IgE Ab in Serum 96.5517\n", + "Wheat IgE Ab in Serum 96.5517\n", + "Soybean IgE Ab in Serum 96.5517\n", + "Honey bee IgE Ab in Serum 96.5517\n", + "Cladosporium herbarum IgE Ab in Serum 96.5517\n", + "Cow milk IgE Ab in Serum 96.5517\n", + "Common Ragweed IgE Ab in Serum 96.5517\n", + "Cat dander IgE Ab in Serum 96.5517\n", + "Walnut IgE Ab in Serum 96.5517\n", + "Stage group.clinical Cancer 96.397\n", + "HER2 [Presence] in Breast cancer specimen by Immune stain 96.397\n", + "Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain 96.397\n", + "Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain 96.397\n", + "HER2 [Presence] in Breast cancer specimen by FISH 96.397\n", + "Response to cancer treatment 96.2312\n", + "Smokes tobacco daily 96.1096\n", + "SARS-CoV-2 RNA Pnl Resp NAA+probe 95.0597\n", + "FEV1/FVC 93.3245\n", + "Drugs of abuse 5 panel - Urine by Screen method 89.2352\n", + "DXA [T-score] Bone density 88.7821\n", + "Hematocrit [Volume Fraction] of Blood 87.2679\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method 85.1569\n", + "Polyp size greatest dimension by CAP cancer protocols 85.1569\n", + "US Guidance for biopsy of Prostate 81.1561\n", + "Clarity of Urine 81.0234\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip 81.0234\n", + "Protein [Presence] in Urine by Test strip 81.0234\n", + "Ketones [Presence] in Urine by Test strip 81.0234\n", + "Color of Urine 81.0234\n", + "Leukocyte esterase [Presence] in Urine by Test strip 81.0234\n", + "Nitrite [Presence] in Urine by Test strip 81.0234\n", + "pH of Urine by Test strip 81.0234\n", + "Protein [Mass/volume] in Urine by Test strip 81.0234\n", + "Ketones [Mass/volume] in Urine by Test strip 81.0234\n", + "Glucose [Presence] in Urine by Test strip 81.0234\n", + "Specific gravity of Urine by Test strip 81.0234\n", + "Glucose [Mass/volume] in Urine by Test strip 81.0234\n", + "Appearance of Urine 81.0234\n", + "Hemoglobin [Presence] in Urine by Test strip 81.0234\n", + "Microalbumin Creatinine Ratio 77.542\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma 75.0663\n", + "Functional capacity NYHA 73.9943\n", + "Objective assessment of cardiovascular disease NYHA 73.9943\n", + "Left ventricular Ejection fraction 73.519\n", + "Iron saturation [Mass Fraction] in Serum or Plasma 73.4637\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma 73.4637\n", + "Magnesium [Mass/volume] in Serum or Plasma 73.4637\n", + "NT-proBNP 73.4637\n", + "Iron [Mass/volume] in Serum or Plasma 73.4637\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method 70.6897\n", + "Ferritin [Mass/volume] in Serum or Plasma 70.6897\n", + "Erythrocyte distribution width [Ratio] by Automated count 70.6897\n", + "Oxygen saturation in Arterial blood 70.2697\n", + "Chloride [Moles/volume] in Serum or Plasma 69.1866\n", + "Glucose [Mass/volume] in Serum or Plasma 69.1866\n", + "Carbon Dioxide 69.1866\n", + "Urea nitrogen [Mass/volume] in Serum or Plasma 69.1866\n", + "Calcium [Mass/volume] in Serum or Plasma 69.1866\n", + "Globulin [Mass/volume] in Serum by calculation 61.0522\n", + "Body temperature 48.4195\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma 39.0141\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Albumin 38.141\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Glomerular filtration rate/1.73 sq M.predicted 29.1888\n", + "Hemoglobin A1c/Hemoglobin.total in Blood 23.1101\n", + "Chloride 13.2294\n", + "Glucose 13.2294\n", + "Urea Nitrogen 13.2294\n", + "Calcium 13.2294\n", + "Sodium 12.1905\n", + "Creatinine 12.1905\n", + "Potassium 12.1905\n", + "Erythrocyte distribution width [Entitic volume] by Automated count 0.939434\n", + "Platelet mean volume [Entitic volume] in Blood by Automated count 0.939434\n", + "Hematocrit [Volume Fraction] of Blood by Automated count 0.862069\n", + "Platelets [#/volume] in Blood by Automated count 0.851017\n", + "MCV [Entitic volume] by Automated count 0.851017\n", + "MCHC [Mass/volume] by Automated count 0.851017\n", + "Erythrocytes [#/volume] in Blood by Automated count 0.851017\n", + "MCH [Entitic mass] by Automated count 0.851017\n", + "Leukocytes [#/volume] in Blood by Automated count 0.851017\n", + "Hemoglobin [Mass/volume] in Blood 0.806808\n", + "Heart rate 0.0552608\n", + "Diastolic Blood Pressure 0.0552608\n", + "Body Mass Index 0.0552608\n", + "Systolic Blood Pressure 0.0552608\n", + "Respiratory rate 0.0552608\n", + "Total Cholesterol 0.0552608\n", + "Low Density Lipoprotein Cholesterol 0.0552608\n", + "High Density Lipoprotein Cholesterol 0.0552608\n", + "Tobacco smoking status NHIS 0.0552608\n", + "Triglycerides 0.0552608\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported 0.0442087\n" + ] + } + ], + "source": [ + "null_values = df2.isnull().sum()\n", + "null_values = null_values[null_values > 0]\n", + "null_values = 100 * null_values / len(df2)\n", + "null_values = null_values.sort_values(ascending=False)\n", + "null_values_df = null_values.reset_index()\n", + "null_values_df.columns = ['Column', 'Null Values Percentage']\n", + "print(tabulate(null_values_df, headers='keys', tablefmt='simple', showindex=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['Take blood sample', 'Incision of trachea (procedure)',\n", + " 'Intravenous antibiotic therapy', 'Sputum Culture',\n", + " 'Bee venom (substance)', 'Infection caused by Staphylococcus aureus',\n", + " 'Intravenous infusion (procedure)',\n", + " 'History of upper limb amputation (situation)', 'Cystic Fibrosis',\n", + " 'Body mass index 40+ - severely obese (finding)', 'Sweat Test',\n", + " '0.67 ML anakinra 149 MG/ML Prefilled Syringe',\n", + " 'canagliflozin 100 MG Oral Tablet',\n", + " 'History of disarticulation at wrist (situation)',\n", + " 'Diabetes from Cystic Fibrosis',\n", + " 'Sepsis caused by Pseudomonas (disorder)',\n", + " 'Suicidal deliberate poisoning', 'Pancreatin 600 MG Oral Tablet',\n", + " '20 Gene mutation test', 'Total replacement of hip', 'Lung Transplant',\n", + " 'Sepsis caused by Staphylococcus aureus',\n", + " 'Vancomycin 50 MG/ML Injectable Solution',\n", + " 'Oral Glucose Tolerance Test'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "# drop columns with only null values\n", + "null_columns = df2.columns[df2.isnull().all()]\n", + "print(null_columns)\n", + "df2.dropna(axis=1, how='all', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "177" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df2.columns)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Encoding" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Age " + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "df_one_hot = pd.get_dummies(df2['age'], prefix='age')\n", + "df2 = pd.concat([df2, df_one_hot], axis=1)\n", + "df2['age_30t50'] = df2['age_30t50'].astype(int)\n", + "df2['age_50t70'] = df2['age_50t70'].astype(int)\n", + "df2['age_gt70'] = df2['age_gt70'].astype(int)\n", + "df2.drop(columns=['age'], inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Columns with normal/abnormal values" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma\n", + "Albumin\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma\n", + "American house dust mite IgE Ab in Serum\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip\n", + "Body Mass Index\n", + "Body temperature\n", + "Calcium\n", + "Calcium [Mass/volume] in Serum or Plasma\n", + "Carbon Dioxide\n", + "Cat dander IgE Ab in Serum\n", + "Chloride\n", + "Chloride [Moles/volume] in Serum or Plasma\n", + "Cladosporium herbarum IgE Ab in Serum\n", + "Codfish IgE Ab in Serum\n", + "Common Ragweed IgE Ab in Serum\n", + "Cow milk IgE Ab in Serum\n", + "Creatinine\n", + "DXA [T-score] Bone density\n", + "Diastolic Blood Pressure\n", + "Egg white IgE Ab in Serum\n", + "Erythrocyte distribution width [Entitic volume] by Automated count\n", + "Erythrocyte distribution width [Ratio] by Automated count\n", + "Erythrocytes [#/volume] in Blood by Automated count\n", + "FEV1/FVC\n", + "Ferritin [Mass/volume] in Serum or Plasma\n", + "Globulin [Mass/volume] in Serum by calculation\n", + "Glomerular filtration rate/1.73 sq M.predicted\n", + "Glucose\n", + "Glucose [Mass/volume] in Serum or Plasma\n", + "Glucose [Mass/volume] in Urine by Test strip\n", + "Heart rate\n", + "Hematocrit [Volume Fraction] of Blood\n", + "Hematocrit [Volume Fraction] of Blood by Automated count\n", + "Hemoglobin A1c/Hemoglobin.total in Blood\n", + "Hemoglobin [Mass/volume] in Blood\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method\n", + "High Density Lipoprotein Cholesterol\n", + "Honey bee IgE Ab in Serum\n", + "Iron [Mass/volume] in Serum or Plasma\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma\n", + "Iron saturation [Mass Fraction] in Serum or Plasma\n", + "Lactate [Mass/volume] in Blood\n", + "Latex IgE Ab in Serum\n", + "Left ventricular Ejection fraction\n", + "Leukocytes [#/volume] in Blood by Automated count\n", + "Low Density Lipoprotein Cholesterol\n", + "MCH [Entitic mass] by Automated count\n", + "MCHC [Mass/volume] by Automated count\n", + "MCV [Entitic volume] by Automated count\n", + "Magnesium [Mass/volume] in Serum or Plasma\n", + "Mean blood pressure\n", + "Microalbumin Creatinine Ratio\n", + "NT-proBNP\n", + "Oxygen Saturation\n", + "Oxygen saturation in Arterial blood\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported\n", + "Peanut IgE Ab in Serum\n", + "Platelet mean volume [Entitic volume] in Blood by Automated count\n", + "Platelets [#/volume] in Blood by Automated count\n", + "Polyp size greatest dimension by CAP cancer protocols\n", + "Potassium\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma\n", + "Protein [Mass/volume] in Urine by Test strip\n", + "Respiratory rate\n", + "Shrimp IgE Ab in Serum\n", + "Sodium\n", + "Soybean IgE Ab in Serum\n", + "Specific gravity of Urine by Test strip\n", + "Systolic Blood Pressure\n", + "Thyrotropin [Units/volume] in Serum or Plasma\n", + "Thyroxine (T4) free [Mass/volume] in Serum or Plasma\n", + "Total Cholesterol\n", + "Triglycerides\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method\n", + "US Guidance for biopsy of Prostate\n", + "Urea Nitrogen\n", + "Urea nitrogen [Mass/volume] in Serum or Plasma\n", + "Walnut IgE Ab in Serum\n", + "Wheat IgE Ab in Serum\n", + "White oak IgE Ab in Serum\n", + "pH of Urine by Test strip\n" + ] + } + ], + "source": [ + "n_abn_columns = []\n", + "for col in df2.columns:\n", + " if df2[col].isin(['normal', 'abnormal']).any():\n", + " n_abn_columns.append(col)\n", + "n_abn_columns.sort()\n", + "for col in n_abn_columns:\n", + " print(col)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "How many null values are in these columns?" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column Null Values Percentage\n", + "------------------------------------------------------------------------------ ------------------------\n", + "Thyroxine (T4) free [Mass/volume] in Serum or Plasma 99.4695\n", + "Thyrotropin [Units/volume] in Serum or Plasma 99.4695\n", + "Mean blood pressure 98.2427\n", + "Oxygen Saturation 98.2427\n", + "Lactate [Mass/volume] in Blood 98.2427\n", + "Honey bee IgE Ab in Serum 96.5517\n", + "Latex IgE Ab in Serum 96.5517\n", + "Peanut IgE Ab in Serum 96.5517\n", + "Egg white IgE Ab in Serum 96.5517\n", + "Cow milk IgE Ab in Serum 96.5517\n", + "Common Ragweed IgE Ab in Serum 96.5517\n", + "Codfish IgE Ab in Serum 96.5517\n", + "Cladosporium herbarum IgE Ab in Serum 96.5517\n", + "Shrimp IgE Ab in Serum 96.5517\n", + "Cat dander IgE Ab in Serum 96.5517\n", + "Soybean IgE Ab in Serum 96.5517\n", + "Walnut IgE Ab in Serum 96.5517\n", + "Wheat IgE Ab in Serum 96.5517\n", + "American house dust mite IgE Ab in Serum 96.5517\n", + "White oak IgE Ab in Serum 96.5517\n", + "FEV1/FVC 93.3245\n", + "DXA [T-score] Bone density 88.7821\n", + "Hematocrit [Volume Fraction] of Blood 87.2679\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method 85.1569\n", + "Polyp size greatest dimension by CAP cancer protocols 85.1569\n", + "US Guidance for biopsy of Prostate 81.1561\n", + "Glucose [Mass/volume] in Urine by Test strip 81.0234\n", + "pH of Urine by Test strip 81.0234\n", + "Protein [Mass/volume] in Urine by Test strip 81.0234\n", + "Specific gravity of Urine by Test strip 81.0234\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip 81.0234\n", + "Microalbumin Creatinine Ratio 77.542\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma 75.0663\n", + "Left ventricular Ejection fraction 73.519\n", + "Magnesium [Mass/volume] in Serum or Plasma 73.4637\n", + "NT-proBNP 73.4637\n", + "Iron saturation [Mass Fraction] in Serum or Plasma 73.4637\n", + "Iron [Mass/volume] in Serum or Plasma 73.4637\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma 73.4637\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method 70.6897\n", + "Erythrocyte distribution width [Ratio] by Automated count 70.6897\n", + "Ferritin [Mass/volume] in Serum or Plasma 70.6897\n", + "Oxygen saturation in Arterial blood 70.2697\n", + "Chloride [Moles/volume] in Serum or Plasma 69.1866\n", + "Carbon Dioxide 69.1866\n", + "Calcium [Mass/volume] in Serum or Plasma 69.1866\n", + "Glucose [Mass/volume] in Serum or Plasma 69.1866\n", + "Urea nitrogen [Mass/volume] in Serum or Plasma 69.1866\n", + "Globulin [Mass/volume] in Serum by calculation 61.0522\n", + "Body temperature 48.4195\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma 39.0141\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma 38.141\n", + "Albumin 38.141\n", + "Glomerular filtration rate/1.73 sq M.predicted 29.1888\n", + "Hemoglobin A1c/Hemoglobin.total in Blood 23.1101\n", + "Chloride 13.2294\n", + "Urea Nitrogen 13.2294\n", + "Glucose 13.2294\n", + "Calcium 13.2294\n", + "Potassium 12.1905\n", + "Creatinine 12.1905\n", + "Sodium 12.1905\n", + "Erythrocyte distribution width [Entitic volume] by Automated count 0.939434\n", + "Platelet mean volume [Entitic volume] in Blood by Automated count 0.939434\n", + "Hematocrit [Volume Fraction] of Blood by Automated count 0.862069\n", + "Platelets [#/volume] in Blood by Automated count 0.851017\n", + "Erythrocytes [#/volume] in Blood by Automated count 0.851017\n", + "MCV [Entitic volume] by Automated count 0.851017\n", + "MCHC [Mass/volume] by Automated count 0.851017\n", + "MCH [Entitic mass] by Automated count 0.851017\n", + "Leukocytes [#/volume] in Blood by Automated count 0.851017\n", + "Hemoglobin [Mass/volume] in Blood 0.806808\n", + "Respiratory rate 0.0552608\n", + "Diastolic Blood Pressure 0.0552608\n", + "Systolic Blood Pressure 0.0552608\n", + "Total Cholesterol 0.0552608\n", + "Triglycerides 0.0552608\n", + "Body Mass Index 0.0552608\n", + "Low Density Lipoprotein Cholesterol 0.0552608\n", + "Heart rate 0.0552608\n", + "High Density Lipoprotein Cholesterol 0.0552608\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported 0.0442087\n" + ] + } + ], + "source": [ + "n_abn_df = df2[n_abn_columns]\n", + "null_values = n_abn_df.isnull().sum()\n", + "null_values = null_values[null_values > 0]\n", + "null_values = 100 * null_values / len(n_abn_df)\n", + "null_values = null_values.sort_values(ascending=False)\n", + "null_values_df = null_values.reset_index()\n", + "null_values_df.columns = ['Column', 'Null Values Percentage']\n", + "print(tabulate(null_values_df, headers='keys', tablefmt='simple', showindex=False))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Many of them have > 90 % null values. We will drop them." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of columns to drop: 21\n" + ] + } + ], + "source": [ + "null_values_cols = []\n", + "for col in n_abn_columns:\n", + " if df2[col].isnull().sum()/len(df2) > 0.9:\n", + " null_values_cols.append(col)\n", + "print('Number of columns to drop:', len(null_values_cols))\n", + "df2.drop(columns=null_values_cols, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['American house dust mite IgE Ab in Serum',\n", + " 'Cat dander IgE Ab in Serum',\n", + " 'Cladosporium herbarum IgE Ab in Serum',\n", + " 'Codfish IgE Ab in Serum',\n", + " 'Common Ragweed IgE Ab in Serum',\n", + " 'Cow milk IgE Ab in Serum',\n", + " 'Egg white IgE Ab in Serum',\n", + " 'FEV1/FVC',\n", + " 'Honey bee IgE Ab in Serum',\n", + " 'Lactate [Mass/volume] in Blood',\n", + " 'Latex IgE Ab in Serum',\n", + " 'Mean blood pressure',\n", + " 'Oxygen Saturation',\n", + " 'Peanut IgE Ab in Serum',\n", + " 'Shrimp IgE Ab in Serum',\n", + " 'Soybean IgE Ab in Serum',\n", + " 'Thyrotropin [Units/volume] in Serum or Plasma',\n", + " 'Thyroxine (T4) free [Mass/volume] in Serum or Plasma',\n", + " 'Walnut IgE Ab in Serum',\n", + " 'Wheat IgE Ab in Serum',\n", + " 'White oak IgE Ab in Serum']" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "null_values_cols" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rest of them will be encoded using get_dummies." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "n_abn_columns = [col for col in n_abn_columns if col not in null_values_cols]\n", + "df2 = pd.get_dummies(df2, columns=n_abn_columns, prefix_sep='_', dtype=int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Other non-numeric columns will be encoded using get_dummies." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Appearance of Urine\n", + "[nan 'cloudy']\n", + "Functional capacity NYHA\n", + "[nan 'classii' 'classiii' 'classi' 'classiv']\n", + "White Blood Cell (Elevated)\n", + "[ nan 11.6 12.9 13.3 14.1 14.5 12.3 11.4 14. 13.7 13.9 14.2 12.4 14.3\n", + " 12. 12.8 11.7 12.1 13.5 14.4 14.6 13.2 14.8 13.6 11.9 11.8 11.5 12.2\n", + " 13.8 13.4 11.3 15. 12.7 11.1 14.7 12.6 13. 11.2 14.9 12.5]\n", + "Color of Urine\n", + "[nan 'reddish' 'brown']\n", + "Objective assessment of cardiovascular disease NYHA\n", + "[nan 'minimal' 'severe' 'mod-severe']\n", + "Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Abuse Status [OMAHA]\n", + "[nan 'no' 'severe']\n", + "Stage group.clinical Cancer\n", + "[nan 'earlystage' 'latestage']\n", + "Housing status\n", + "[nan 'homeless']\n", + "Capillary refill [Time] of Nail bed\n", + "[nan 'increased']\n", + "Smokes tobacco daily\n", + "[nan True]\n", + "Platelet Count\n", + "[ nan 162.1 291.7 444. 411. 368.2 250. 217.9 178.7 292.5 428.9 254.8\n", + " 423.7 427.7 306.4 421.5 340.9 227.8 322.3 192.7 323.4 440.8 349.8 429.9\n", + " 328.2 374.3 228.3 204.6 308.6 200.3 429.3 174.3 155.7 329.5 173.6 204.9\n", + " 260.9 420.7 356.7 449.9 387.3 171.6 440.4 384.8 417.1 186. 396.8 307.1\n", + " 239.1 175.1 234.4 268. 331.7 326.5 324.6 391.3 336.6 210.8 348.6 407.5\n", + " 417.6 198.2 226.9 231.5 243.1 378.9 325.2 446.3 266.9 244.4 371. 442.4\n", + " 432.1 258.9 214.5 264.5 335. 338.2 444.9 278.5 211.3 385.1 350.5 290.1\n", + " 248. 421.3 448.4 304.1 265.8 184.5 281.8 434.9 291.9 297.8 205.6 191.5\n", + " 293.7 287. 327. 249.9 412.8 346.9 422.3 320.4 155.4 173.3 339.4 402.8\n", + " 394.8 182.9 185.9 299.4 252.4 265.1 155.3 299.9 212.9 344.8 428.8 405.8\n", + " 298.4 385.8 257.1 439.4 210.5 254.2 303.7 283.6 329.8 162.3 317. 365.6\n", + " 273.4 447.5 209.6 196.1 415.3 197.6 295. 357.9 342.9 266. 224.8 181.3\n", + " 424.9 265.2 303.6 162.8 367.1]\n", + "Nitrite [Presence] in Urine by Test strip\n", + "[nan 'negative']\n", + "Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain\n", + "[nan 'positive' 'negative']\n", + "Anion Gap\n", + "[ nan 9.4 7.2 9.5 13.2 3.5 11.8 3.4 2.4 8.1 4.9 2.3 12.1 4.6\n", + " 7.6 7.7 5.6 10.7 4.1 10.2 11.4 12.2 7.5 13.9 6.7 6.4 6.9 13.6\n", + " 10.1 3.1 3.9 13.3 5.3 4.5 12. 6.2 11.3 8.9 10.5 10.6 11.9 5.8\n", + " 12.6 11.6 14.9 8.7 11.1 12.5 4.3 10. 2.1 5.5 14. 11. 14.7 8.5\n", + " 12.3 14.1 5.9 6.1 2. 7.1 3.7 5.4 13.8 8.8 10.3 2.7 11.7 2.8\n", + " 5. 10.8 9. 9.9 14.3 12.7 14.8 7. 9.1 9.8 8.2 5.2 8.3 4.8\n", + " 12.8 6.3 8.4 7.3 11.2 7.9 13.4 4. 6. 3.3 3.6]\n", + "Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay\n", + "[nan False True]\n", + "C reactive protein [Mass/volume] in Serum or Plasma\n", + "[ nan 10.27 9.87 10.57 10.13 10.18 9.84 10.23 10.08 10.73 10.42 10.32\n", + " 12.84 9.42 10.14 9.77 10.01 9.89 14.09 9.98 13.15 10.7 9.9 13.43\n", + " 10.21 13.54 9.78 9.66 10.64 12.89 10.31 10.4 9.7 13.09 10.05 10.45\n", + " 9.94 10.24 10.38 10.06 10.26 10.37 10.17 13.56 13.03 9.6 10.6 9.51\n", + " 9.32 13.57 10.9 10.8 10.36 10.78 9.4 9.48 12.45 9.14 10.3 10.1\n", + " 9.86 9.88 13.16 10.44 13.64 10.02 9.93 9.68 10.04 9.8 9.76 10.11\n", + " 10.69 10.22 9.3 13.39 9.96 8.91 10.03 9.83 11.2 13.74 9.08 9.63\n", + " 10.49 9.52 10.07 10.58 12.49 9.91 10.43 10.34 9.65 10.52 9.62 9.54\n", + " 13.82 10.96 9.43 10.83 10.59 9.5 10.09 10.2 9.58 12.9 9.1 11.\n", + " 10.76 10.16 9.69 9.64 12.21 9.46 12.94 9.67 12.46 11.28 10.84 9.06\n", + " 10.85 10.72 9.82 10.63 15.4 9.56 9.27 9.47 9.72 9.11 10.91 9.24\n", + " 10.54 10.95 10.47 9.92 13.67 9.44 12.6 9.04 13.07 9.13 9.19 9.15\n", + " 9.81 9.73 9.97 13.1 12.3 13.14 10.39 11.14 14.45 10.12 13.11 12.82\n", + " 10.28 9.57 12.09]\n", + "Parainfluenza virus 2 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Fibrin D-dimer FEU [Mass/volume] in Platelet poor plasma\n", + "[ nan 0.42 0.47 0.43 0.38 0.45 0.4 0.34 11.01 0.56 5.2 11.35\n", + " 0.44 7.23 11.38 0.46 8.89 11.76 0.36 0.51 2.12 7.67 7.83 6.51\n", + " 0.49 6.52 0.53 10.17 0.41 10.02 0.39 0.37 13.81 13.34 0.5 7.94\n", + " 0.54 0.35 10.14 8.09 1.96 0.3 6.45 10.19 12.61 8.73 1.43 4.95\n", + " 1.73 1.37 0.32 9.84 8.56 1.5 9.88 11.7 14.05 5.89 0.55 7.09]\n", + "label\n", + "[0 1]\n", + "Oxygen/Inspired gas setting [Volume Fraction] Ventilator\n", + "[ nan 69.41 65.75 60.01 59.06 77.46 67.87 73.69 63.76 57.75 56.91 71.23\n", + " 68.25 63.79 61.4 60.19 69.44 65.89 61.81 70.96 68.06 61.94 62.55 60.05\n", + " 61.73 59.5 62.46 66.76 71.5 66.16 68.96 64.94 65.71 71.08 64.1 59.46\n", + " 70.17 60.64 55.96 63.64 59.38 61.76 66.34 64.53 60.16 61.69 71.42 71.07\n", + " 54.98 65.87 59.32 66.7 72.12 63.82 68.18 63.61 60.77 70.26 59.45 64.42\n", + " 59.19 65.42 59.85 57.18 71.11 64.75 66.46 56.26 55.83 64.61 69.65 58.36\n", + " 68.37 68.27 75.45 58.97 62.9 64.13 66.98 61.06 62.16 69.7 70.94 69.67\n", + " 64.85 70.65 54.01 67.29 62.26 54.76 61.66 60.95 70.62 61.7 61.24 56.48\n", + " 65.92 70.51 56.17 72. 68.82 75.88 66.49 54.43 64.93 62.64 63.55 68.23\n", + " 66.55 66.3 68.28 73.22 65.82 61.49]\n", + "HER2 [Presence] in Breast cancer specimen by Immune stain\n", + "[nan 'positive' 'negative']\n", + "Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "INR in Platelet poor plasma by Coagulation assay\n", + "[ nan 2.05 1.95 2.17 2.12 1.82 1.77 2.25 1.39 3.9 1.97 1.84 2.38 2.63\n", + " 1.76 3.99 4.08 2.08 1.79 3.8 2.01 3.98 2.3 1.64 4. 1.94 2.04 2.\n", + " 4.2 1.62 2.23 2.1 2.02 1.88 3.82 1.34 2.24 1.65 4.1 4.04 1.81 1.89\n", + " 2.16 3.81 2.22 2.54 2.37 1.86 2.11 4.14 2.21 2.41 1.83 2.14 4.27 2.07\n", + " 3.91 2.18 1.87 1.63 1.72 3.97 2.2 1.61 1.7 1.55 2.06 1.59 3.73 1.68\n", + " 1.78 1.92 2.13 2.26 1.56 1.93 1.8 2.28 4.03 1.58 2.44 1.85 3.78 1.98\n", + " 1.66 1.96 3.89 2.27 1.71 3.94 1.73 1.9 1.6 3.83 2.15 2.32 2.36 2.33\n", + " 2.6 4.36 1.67 3.85 2.43 2.58 2.48 2.29 4.09 4.07 3.95 1.91 3.86 3.75\n", + " 4.16 3.79]\n", + "Protein [Presence] in Urine by Test strip\n", + "[nan '3+' '1+' '2+']\n", + "MCV\n", + "[ nan 92.6 94.3 93.5 82.1 91.4 90.9 88.4 88.2 89.2 92.7 91.5 89.1 83.1\n", + " 85.9 85.3 89.5 90.4 87.5 91.3 90.5 83.6 81.7 82.5 85.5 88. 93.4 81.5\n", + " 81.8 92.2 91.1 88.6 80.6 94. 87.1 82.2 88.5 82.8 85.2 81.3 81.2 83.7\n", + " 95.2 82.3 95.9 80.1 95.6 80.2 84.2 92.4 90.1 87.4 84.3 90.7 84.9 84.4\n", + " 85.4 91.7 94.1 91.9 80.7 83.8 88.8 94.5 91.8 89.6 84.5 95.8 95.3 91.2\n", + " 86.8 93.1 90.2 87.2 93.9 81.4 82.4 89.4 87. 93.3 82. 80.9 90.3 93.2\n", + " 88.9 86.5 87.8 96. 83. 87.6 93.8 89. 84.7 88.1 80. 85.1 86.2 86.3]\n", + "HIV status\n", + "[nan 'negative' 'positive']\n", + "Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain\n", + "[nan 'positive' 'negative']\n", + "Basophils [#/volume] in Blood by Automated count\n", + "[ nan 0.32 0.3 0.33 0.31 0.28 0.29 0.27 0.34 0.26 0.25 0.24 0.35]\n", + "Respiratory syncytial virus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Parainfluenza virus 3 RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Clarity of Urine\n", + "[nan 'cloudy' 'translucent']\n", + "Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative' 'positive']\n", + "Response to cancer treatment\n", + "[nan 'improving' 'worsening']\n", + "Ketones [Mass/volume] in Urine by Test strip\n", + "[nan 'low' 'medium']\n", + "Human metapneumovirus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Procalcitonin [Mass/volume] in Serum or Plasma\n", + "[ nan 0.08 0.07 0.13 0.14 0.1 0.12 0.24 0.09 0.2 0.32 0.19 0.22 0.16\n", + " 0.26 0.17 0.06 0.28 0.04 0.3 0.21 0.11 0.05 0.25 0.15 0.02 0.27 0.29\n", + " 0.37 0.23 0.18]\n", + "Ketones [Presence] in Urine by Test strip\n", + "[nan '3+' 'trace' '1+' '2+']\n", + "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma\n", + "[ nan 27.88 38.47 38.32 38.02 36.5 36.83 25.42 36.82 38.51\n", + " 35.18 35.33 104.54 32.64 32.87 23.1 31.18 123.83 21. 124.27\n", + " 27. 35.97 26.14 161.34 31.8 99.11 33.87 38.3 32.81 123.23\n", + " 29.99 41.63 34.07 34.51 28.3 120.23 35. 38.64 26.73 25.94\n", + " 26.63 32.18 108.32 24.08 42.39 37.18 31.35 115.12 116.18 43.\n", + " 33.08 32.89 19.72 32.1 53.88 38.34 40. 34.94 27.25 31.74\n", + " 34.04 27.6 32.73 93.33 31.36 34.87 29.13 26.27 32.3 107.05\n", + " 30.26 131.25 24.93 29.87 37.72 32.58 28.88 31.94 41.44 32.22\n", + " 39.07 38.54 30.7 36.6 28.14 111.28 39.2 30.39 109.73 25.8\n", + " 35.36 28.46 31.52 29.41 22.06 32.42 125.7 40.54 34.06 30.24\n", + " 31.66 37.37 35.38 27.78 35.93 148.09 28.72 28.93 34.83 30.29\n", + " 27.61 30.13 33.99 27.91 33.94 25.72 31.28 28.32 40.28 35.77\n", + " 29.05 33.63 125.73 26.94 20.26 32.96 32.5 42.57 37.21 30.76\n", + " 30.47 26.12 36.66 135.96 33.88 20.98 30.44 32.98 23.98 38.96\n", + " 41.26 29.7 33.14 36.42 118.72 33.06 33.32 43.54 22.91 29.56\n", + " 105.54 31.08 21.3 40.77 36. 37.04 38.74 33.37 128.96 118.76\n", + " 33.79 25.24 36.73 39.36 34.95 27.52 35.9 35.99 31.84 26.67\n", + " 105.27 34.86 36.09 29.28 38.12 30.55 30.3 98.22 34.67 38.36\n", + " 34.24 36.51 29.45 37.57 36.23 27.63 26.39 33.21 39.1 30.64\n", + " 145.97 28.58 37.44 132.17 27.64 28.96 38.53 27.86 113.06 29.4\n", + " 31.34 31.23 40.85 35.51 37.92 20.72 138.17 30.72 30.33 26.78\n", + " 30.58 33.49 34.44 27.31 35.85 30.2 25.44 37.77 32.47 38.22\n", + " 36.2 36.94 104.26 129.87 92.5 31.72 32.08 33.23 107.88 31.62\n", + " 97.99 102.82 35.46 22.35 40.66 35.7 113.65 41.33 35.91 114.33]\n", + "HER2 [Presence] in Breast cancer specimen by FISH\n", + "[nan 'positive' 'negative']\n", + "Treatment status Cancer\n", + "[nan 'changed']\n", + "Total Bilirubin (Elevated)\n", + "[nan 2.4 1.3 1.9 2.2 1.6 2.3 1.7 1.2 1.4 1.5 2.5 2.1 2. 1.8]\n", + "Are you covered by health insurance or some other kind of health care plan [PhenX]\n", + "[nan 'yes' 'no']\n", + "Prothrombin time (PT)\n", + "[ nan 11.93 11.5 11.85 11.88 12.02 11.32 11.68 11.54 11.74 11.07 12.48\n", + " 11.27 11.78 11.86 11.57 12.46 12.39 11.14 10.93 11.61 12.83 10.87 12.65\n", + " 11.47 11.73 12.38 12.29 11.43 11.7 12.44 11.83 12.24 11.52 11.82 12.34\n", + " 12.12 13.21 11.94 11.49 10.83 11.44 12.71 12.1 11.34 12.06 11.9 11.17\n", + " 11.84 11.04 11.63 11.23 11.77 10.96 12.87 11.59 12.57 11.4 11.42 12.26\n", + " 10.84 11.48 11.37 12.6 12.59 11.81 11.99 12.07 11.26 11.66 12.28 11.21\n", + " 11.53 11.22 10.6 11.8 11.87 11.33 11.39 11.38 11.18 12. 11.55 11.65\n", + " 11.96 12.78 11.3 11.92 10.97 11.28 11.24 11.46 12.13 11.62 11.06 11.76\n", + " 11.36 12.7 11.2 11.6 12.73 11.98 10.86 11.29 11.67 12.18 12.55 11.79\n", + " 12.05 11.71 11.19 12.4 11.75 12.79 11.02 11.09 11.41 13. 11.13 12.8\n", + " 11.89 10.98 11.16 10.99 12.04 12.89 12.2 12.98 11.05 10.92 12.45 11.12\n", + " 12.19]\n", + "Monocytes/100 leukocytes in Blood by Automated count\n", + "[ nan 10.06 9.52 9.73 10.04 10.92 11.15 9.18 11.04 10.54 7.97 9.82\n", + " 9.13 9.89 8.93 10.32 10.97 8.59 10.18 10.11 10.26 9.35 8.73 11.22\n", + " 10.56 10.52 10.3 9.56 10.76 10.07 10.12 10.79 10.36 9.42 10.13 10.29\n", + " 11.27 10.5 9.87 9.34 9.07 11.43 9.41 10.82 10.34 9.75 10.03 11.16\n", + " 10.55 10.64 8.96 10.17 10.94 9.39 9.8 10.2 9.78 10.45 8.36 9.1\n", + " 9.74 11.3 11.34 9.9 9.99 9.94 9.48 10.91 11.39 10.42 10.58 10.37\n", + " 11.73 10.68 9.58 9.86 9.79 7.91 10.02 10.22 9.71 10.81 8.79 10.41\n", + " 9.62 11.91 10.75 9.43 9.36 9.61 9.84 11.44 10.47 10.08 9.98 10.1\n", + " 9.01 10.71 11.25 9.29 9.88 9.7 9.32 10.31 8.92 9.4 9.95 9.93\n", + " 10.83 11.29 9.3 11.2 8.97 9.02 9.66 10.27 10.46 11.08 10.72 10.51\n", + " 9.44 11.13 9.91 9.22 9.72 9.92 10.39 9.83 9.24 11.17 9.64 9.12\n", + " 8.94 8.55 10.62 8.06 10.44 9.31 10.14 11. 8.81 8.71 9.27 9.26\n", + " 8.12 11.72 11.28 12.27 9.76 8.89 8.9 9.2 10.7 10.73 9.67 10.\n", + " 10.25 9.25 11.1 10.43 9.6 9.14 10.78 9.53 10.74 10.57]\n", + "Rhinovirus RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative']\n", + "Leukocyte esterase [Presence] in Urine by Test strip\n", + "[nan 'negative']\n", + "marital\n", + "['m' 's']\n", + "Monocytes [#/volume] in Blood by Automated count\n", + "[ nan 0.96 0.75 0.9 0.94 1.02 0.98 0.99 1. 0.8 0.93 1.03 0.87 1.05\n", + " 0.82 0.89 1.06 0.81 0.95 0.88 1.01 0.91 0.97 0.86 0.92 1.16 1.11 1.04\n", + " 1.08 0.85 1.09 0.76 0.84 1.07 0.78 1.1 1.12 0.83]\n", + "scc\n", + "[101 110 127 129 69 111 76 105 106 119 103 63 55 107 112 59 32 90\n", + " 83 123 66 117 116 46 141 86 100 113 102 108 115 124 109 104 99 150\n", + " 126 52 98 139 120 89 118 61 114 65 62 145 136 137 58 74 71 97\n", + " 68 96 87 122 128 54 130 60 133 73 121 132 138 53 149 72 51 57\n", + " 47 134 140 143 82 91 135 75 80 146 151 131 64 67 125 50 48 34\n", + " 93 43 142 153 156 70 78 77 160 170 49 88 81 174 158 84 95 79\n", + " 56 169 92 148 161 175 172 44 85 19 41 144 45 40 152 157 147 94\n", + " 35 165 20 177 37 154 21 155 167 166 181 184 42 39 164 190 38 14\n", + " 168 171 9 29 159 28]\n", + "gender\n", + "['m' 'f']\n", + "Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection\n", + "[nan 'negative' 'positive']\n", + "Neutrophils/100 leukocytes in Blood by Automated count\n", + "[ nan 27.85 26.78 30.78 27.69 28.91 27.15 25.12 28.56 26.64 28.4 28.97\n", + " 25.21 31.12 29.06 25.02 28.81 27.83 27.08 31.5 31.13 31.79 24.54 28.34\n", + " 26.81 25.62 30.44 27.99 27.51 26.36 28.83 28.12 30.79 30.1 31.76 23.98\n", + " 28.86 25.46 29.87 30.72 24.72 22.8 24.62 29.57 27.12 25.91 28.03 29.72\n", + " 27.49 24.13 27.52 34.85 18.61 29.81 26.97 32.32 25.65 27.28 28.28 24.21\n", + " 28.29 28.32 31.44 29.19 24.99 29.66 27.18 27.94 23.81 18.64 26.61 25.34\n", + " 33.41 26.1 28.01 29.07 31.42 32.06 31.99 26.06 29.39 23.64 29.5 24.53\n", + " 28.39 28.02 26.49 29.86 23.38 22.51 33.83 29.04 23.37 32.04 27.07 30.24\n", + " 31.2 28.47 20.61 23.27 22.17 29.63 32.88 26.32 28.93 23.96 27.75 31.59\n", + " 22.96 28.16 30.71 22.11 28.61 23.6 27.45 27.79 25.17 27.71 29.1 24.7\n", + " 27.66 29.49 23.86 28.92 25.22 31.06 29.65 29.89 25.99 23.01 25.45 33.02\n", + " 32.29 26.09 24.16 20.5 26.77 24.66 28.2 30.37 30.21 30.19 22.86 27.56\n", + " 30.51 25.53 28.49 33.92 23.08 28.74 22.85 27.39 29.51 26.52 26.47 31.7\n", + " 28.69 25.33 26.14 29.16 25.01 31.38 22.66 31.86 25.69 26.08 26.93 28.05\n", + " 26.86 28.53 33.1 24.6 24.46 32.92 29.25 25.18 25.8 28.89 30.32 25.86\n", + " 26.79 27.93 28.68 21.16 33.05 27.47 21.27 26.31 27.2 23.66 25.49 34.77\n", + " 23.54 30.39 29.11 24.69 28.15 21.11 28.43 25.75 26.9 26.34 29.95 29.71\n", + " 22.26 24.79 24.77 24.55 27.14 23.67 25.82 24.47 30.47 29.38 29.94 29.43\n", + " 27.31 28.44 34.19 31.33 28.06 23.83 33.23 31.74 33.8 27.16 25.48]\n", + "Lymphocytes/100 leukocytes in Blood by Automated count\n", + "[ nan 15.09 14.29 15.67 15.57 15.17 16.88 16.07 12.23 14.82 13.94 13.37\n", + " 15.79 15.31 12.33 14.09 16.49 14.18 15.02 13.84 12.7 12.13 14.1 14.7\n", + " 18.52 17.23 17.09 15.84 13.36 15.46 15.12 14.21 17.22 18.47 16.57 16.59\n", + " 15.94 14.58 16.55 13.47 13.24 13.02 17.35 13.64 17.03 14.04 14.06 13.2\n", + " 13.61 16.52 16.46 15.4 14.67 19.64 11.5 15.62 13.91 14.4 14.31 14.03\n", + " 16.13 11.42 15.66 14.25 14.87 12.94 16.65 13.86 13.09 15.58 15.87 13.99\n", + " 16.34 16.38 16.18 15.22 14.26 16.44 14.78 11.04 12.66 17.01 15.95 13.74\n", + " 15.49 17.38 15.33 14.28 18.22 13.69 12.91 19.07 15.14 13.5 16.12 13.45\n", + " 15.47 13.92 15.68 15.63 16.22 15.89 14.46 14.72 13.41 18.5 13.22 13.62\n", + " 17.59 15.54 14.61 18.78 14.41 15.51 14.48 14.91 11.97 17.37 13.54 10.5\n", + " 17.25 16.99 16.48 15.21 16.54 14.53 14.49 15.37 16.79 16.2 17.07 16.43\n", + " 15.64 14.45 14.05 17.12 14.73 15.39 17.36 17.2 15.29 17.08 14.83 14.76\n", + " 14.92 14.44 15.76 10.61 17.75 14.23 14.02 16.98 15.81 18.05 13.89 18.39\n", + " 15.16 20.15 15.41 11. 12.74 14.47 13.55 17.24 15.52 15.98 17.27 17.18\n", + " 15.59 16.32 18.28 16.76 13.49 15.56 10.33 14.5 16.58 15.82 14.66 10.32\n", + " 14.15 16.81 13.52 15.75 14.6 12.36 14.65 12.35 14.63 15.13 14.3 14.74\n", + " 17.05 15.72 15.88 12.77 13.98 14.95 13.38 12.89 13.93 14.77 16.83 17.11\n", + " 17.63 16.7 16.4 16.37 14.89 14.93 13.72 14.24]\n", + "Red Blood Cell\n", + "[nan 5.5 5.4 4.9 4.8 4.7 5.3 5. 5.7 4.6 5.8 5.2 5.9 4.5 5.6 5.1]\n", + "Eosinophils [#/volume] in Blood by Automated count\n", + "[ nan 0.41 0.36 0.4 0.38 0.42 0.32 0.47 0.37 0.43 0.39 0.46 0.34 0.44\n", + " 0.45 0.48 0.35 0.33 0.3 ]\n", + "Tobacco smoking status NHIS\n", + "['former' 'never' nan]\n", + "RBC Distribution Width\n", + "[ nan 12.4 14.4 13.6 12.1 13.5 11.8 13.2 14.5 14.3 12.2 14.6 14.2 12.8\n", + " 13.4 12. 13.9 13.8 12.3 11.6 11.9 12.6 11.7 13.3 13.1 12.9 14. 12.5\n", + " 14.1 12.7 13.7 13. ]\n", + "Basophils/100 leukocytes in Blood by Automated count\n", + "[ nan 3.2 3.05 3.01 2.9 2.95 3.09 2.89 3.16 2.99 3.11 2.57 2.84 2.96\n", + " 3.12 2.85 2.87 2.64 2.77 3.06 3.02 3.04 2.75 2.92 2.66 3.27 2.93 3.28\n", + " 3.13 3.1 3.22 3.14 2.94 3.21 3.19 2.83 3.43 2.76 3.3 3.23 3. 2.91\n", + " 2.86 3.46 3.15 2.98 3.07 3.17 2.79 3.08 2.81 2.88 3.03 2.8 2.78 3.18\n", + " 2.63 3.25 2.68 2.97 2.56 2.74 3.49 2.73 3.34 3.26 3.38 2.65 3.24 2.82\n", + " 3.32 3.31 2.71 3.33 3.29 3.36]\n", + "Gram positive blood culture panel by Probe in Positive blood culture\n", + "[nan 'positive']\n", + "Oxygen [Partial pressure] in Arterial blood\n", + "[ nan 51.12 47.66 46.48 46.73 50.86 46.94 48.78 45.2 49.18 51.67 45.34\n", + " 49.25 45.85 45.67 50.47 48.48 50.48 47.68 49.42 50.88 49.85 45.5 52.16\n", + " 49.49 52.44 47.32 51.72 49.15 50.62 49.89 50.76 48.15 47.25 47.62 52.65\n", + " 50.46 49.28 45.59 50.74 46.21 51.1 46.07 47.76 47.8 48.39 51.85 47.72\n", + " 49.47 48.27 51.13 48.64 47.26 49.61 48.47 44.14 47.54 48.61 50.63 44.49\n", + " 46.43 50.29 46.88 49.34 47.93 50.36 49.38 50.79 49.99 46.85 50.82 48.83\n", + " 47.18 50.11 50.18 48.5 46.03 49.96 48.82 49.55 48.25 47.63 51.36 48.94\n", + " 46.91 48.05 49.56 49.94 48.97 48.12 52.19 47.19 49.6 45.09 47.38 51.84\n", + " 47.11 47.75 49.19 50.25 48.7 48.72 49.37 48.16 48.43 45.03 47.79 50.33\n", + " 47.89 49.95]\n", + "Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay\n", + "[nan False True]\n", + "pH of Arterial blood\n", + "[ nan 7.02 7.03 7.14 7.09 6.99 7.1 7.05 7.11 7.04 7.07 7.06 6.98 7.12\n", + " 7.15 7.01 7. 7.08 7.13 6.96 7.17 6.97]\n", + "Neutrophils [#/volume] in Blood by Automated count\n", + "[ nan 2.33 2.55 2.78 2.88 2.48 2.34 2.86 2.53 2.68 2.58 3.06 2.64 2.66\n", + " 3.26 2.72 2.61 2.47 2.75 2.31 2.69 2.98 2.49 2.74 2.83 2.62 2.08 2.59\n", + " 2.87 2.7 2.73 2.99 2.71 3.04 2.95 2.77 2.52 2.91 2.79 2.56 3.1 2.85\n", + " 2.63 2.18 2.38 2.43 3.08 2.4 2.67 2.93 2.54 2.76 2.89 2.82 2.28 2.25\n", + " 2.5 2.3 3.09 2.46 2.97 2.94 2.81 2.65 2.6 2.92 2.27 2.45 2.96 2.57\n", + " 2.36 2.32 2.26 2.22 2.8 2.84 2.24 2.2 2.39 2.9 2.14 2.51 3.01 3.16\n", + " 3. 3.23 3.24]\n", + "Lymphocytes [#/volume] in Blood by Automated count\n", + "[ nan 1. 1.02 0.97 0.98 1.06 1.03 1.01 0.99 0.56 1.07 0.64 0.59 1.05\n", + " 0.61 1.09 0.58 0.63 0.96 1.04 1.08 0.52 0.6 0.57 0.53 0.65 0.55 0.5\n", + " 0.66 0.62 0.54]\n", + "Lactate dehydrogenase [Enzymatic activity/volume] in Serum or Plasma by Lactate to pyruvate reaction\n", + "[ nan 237.9 230.62 254.82 234.23 247.3 249.74 235.6 233.1 257.67\n", + " 226.88 243.15 362.44 239.37 232.82 239.5 247.96 261.87 339.76 235.82\n", + " 363.56 230.18 252.47 246.86 353.84 263.44 363.18 247.23 246.81 248.03\n", + " 359.35 246.96 251.64 242.28 244.4 240.56 366.12 236.07 250.07 248.62\n", + " 226.72 250.32 232.44 322.02 237.02 245.5 249.02 232.87 352.6 350.98\n", + " 238.16 251. 227.18 225.2 349.26 221.36 233.9 246.57 235.15 253.14\n", + " 236.02 257.38 348.04 247.74 243.54 261.2 246.94 225.9 363.01 238.34\n", + " 364.52 242.95 251.86 248.17 233.46 233.22 230.48 236.14 228.48 254.43\n", + " 246.79 246.3 255.97 232.2 378.97 222.32 381.57 245.2 238.56 230.25\n", + " 242.71 222.08 353.06 261.29 220.96 254.63 261.48 243.97 244.35 240.64\n", + " 237.2 256.73 366.77 229.57 254.61 263.59 249.15 264.43 240.76 242.73\n", + " 251.7 244.44 227.32 230.44 225.92 241.63 243.82 243.45 230.32 247.4\n", + " 249.13 232.36 251.4 360.48 227.62 220.56 233.34 259.76 248.08 249.24\n", + " 250.79 247.64 248.93 231.22 264.06 325.2 231.02 230.3 232.22 228.58\n", + " 233.58 234.74 228.52 234.22 256.61 227.64 347.1 251.53 222.22 260.76\n", + " 245.52 255.27 254.34 363.27 238.83 223.2 259.7 227.2 219.12 239.31\n", + " 240.7 367.89 358.92 257.89 224.98 240.84 251.8 226. 258.06 253.47\n", + " 243.6 248.66 228.74 337.47 228.38 251.06 228.26 241.62 231.58 242.02\n", + " 229. 336.5 259.14 240.35 249.7 227.76 246.19 239.67 243.5 251.51\n", + " 242.78 250.9 249.6 257.47 234.64 323.35 228.92 235.44 333.8 244.36\n", + " 233.8 227.74 230.2 236.54 365.58 227.92 246.91 243.27 249.59 226.14\n", + " 242.37 360.11 219.46 251.34 231. 232.9 262.3 230.24 256. 250.25\n", + " 240.4 230.72 248.15 245.18 254.64 224.48 364.41 324.23 359.18 226.58\n", + " 235.23 366.9 221.02 231.66 380.42 342.6 231.46 244.15 229.06 372.57\n", + " 244.73 255.46 347.77]\n", + "Drugs of abuse 5 panel - Urine by Screen method\n", + "[nan 'negative' 'positive']\n", + "Interleukin 6 [Mass/volume] in Serum or Plasma\n", + "[ nan 5.33]\n", + "Tumor marker Cancer\n", + "[nan 'negative']\n", + "Glucose [Presence] in Urine by Test strip\n", + "[nan '2+']\n", + "Carbon dioxide [Partial pressure] in Arterial blood\n", + "[ nan 40.45 41.06 39.46 40.8 38.71 40.52 41.19 40.59 38.75 39.44 40.38\n", + " 41.44 40.08 40.79 39.95 39.98 40.49 39.58 40.63 42.71 39.61 37.45 38.77\n", + " 40.23 40.87 39. 40.41 38.6 39.26 39.23 40.77 40.55 40.01 39.83 40.68\n", + " 40.7 39.09 40.11 41.26 41.61 40.34 41.56 41.38 41.49 39.51 39.77 40.26\n", + " 40.74 39.02 40.64 39.15 38.81 40.14 40.56 40.19 40.51 39.38 39.5 41.22\n", + " 40.07 39.57 38.36 39.85 40.48 40.54 40.44 39.14 38.56 40.31 42.62 40.46\n", + " 38.83 42.07 39.74 38.98 42.94 39.89 41.8 39.88 40.36 38.96 41.69 40.04\n", + " 38.23 40.06 40.88 39.08 39.59 40.18 40.47 40.53 40.22 40.96 40.16 39.91\n", + " 39.49 37.46 40.24 39.06 39.79 39.41 40.82 39.78]\n", + "Eosinophils/100 leukocytes in Blood by Automated count\n", + "[ nan 4.45 4.4 4.59 3.74 4.35 4.61 4.46 4.51 5.05 4.73 4.72 4.65 4.15\n", + " 4.22 4.81 4.63 4.34 4.37 4.16 4.23 4.04 3.98 4.86 4.66 4.56 4.68 4.12\n", + " 4.83 4.25 4.92 4.75 4.32 4.58 4.11 4.71 4.41 4.21 4.17 4.89 4.19 4.43\n", + " 4.57 4.39 4.5 4.42 4.54 3.91 4.7 4.28 4.31 4.69 4.38 4.55 4.76 4.6\n", + " 4.78 4.29 4.74 4.98 4.93 4.88 4.33 4.94 4.24 4.49 4.48 4.36 4.47 5.08\n", + " 4.03 4.87 4.52 4.62 4.44 4.8 4.64 5.19 4.06 3.95 4.97 4.96 4.53 4.85\n", + " 4.08 3.87 3.92 4.18 4.84 3.84 4.27 3.99 5.06 5.24 4.82 4.79]\n", + "Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain\n", + "[nan 'positive' 'negative']\n", + "SARS-CoV-2 RNA Pnl Resp NAA+probe\n", + "[nan False True]\n", + "Hemoglobin [Presence] in Urine by Test strip\n", + "[nan 'positive' 'negative']\n", + "Bicarbonate [Moles/volume] in Arterial blood\n", + "[ nan 24.1 25.27 24.3 24.34 24.36 25.05 24.41 24.58 24.82 24.48 24.25\n", + " 24.22 23.65 24.33 24.69 23.9 24.62 24.89 24.71 24.7 24.09 24.56 24.08\n", + " 24.47 24.55 24.21 23.95 24.72 24.53 23.87 23.8 24.75 24.38 24.57 23.81\n", + " 24.06 24.93 25.07 24.64 23.67 24.23 23.94 24.28 24.44 24.17 24.45 24.37\n", + " 24.54 24.46 24.52 24.49 24.43 24.94 23.68 24.02 25.06 24.19 24.29 24.\n", + " 23.78 24.84 23.85 24.67 24.91 24.63 25.14 25.37 23.5 24.13 24.61 24.81\n", + " 24.15 24.32 24.85 23.55 25.21 23.18 24.76 24.07 24.92 25.15 24.86 24.12\n", + " 23.7 23.71 23.82]\n", + "Respiratory Disorders\n", + "[ 5 1 2 6 0 4 3 13 12 8 7 11 17 16 10 9 15 14]\n", + "Heart and Cardiovascular Diseases\n", + "[0 3 2 5 1 4 6 7 9 8]\n", + "Metabolic and Endocrine Disorders\n", + "[ 4 3 6 2 1 5 9 7 8 0 10 11 12]\n", + "Neurological Disorders\n", + "[2 0 3 4 1 6 5 8 7 9]\n", + "Orthopedic Injuries\n", + "[4 0 2 3 5 1 6 7]\n", + "Mental Health\n", + "[11 8 10 12 7 9 6 5 13 15 14 4 3 16 0 2 17 1]\n", + "Reproductive and Pregnancy\n", + "[ 0 1 15 18 14 23 19 12 2 13 5 16 20 21 24 11 17 8 9 10 3 4 22 6\n", + " 7 25 26]\n", + "Pain Relievers and Analesics\n", + "[3 2 1 4 0 5 7 6 8 9]\n", + "Cardiovascular and Blood Pressure Medications\n", + "[ 2 4 7 5 1 0 8 6 3 11 9 10 12 16 13 15 14 20]\n", + "Injection Medications\n", + "[ 3 1 2 4 6 7 5 8 0 10 9]\n", + "Oral Medications\n", + "[ 2 3 1 8 0 4 5 6 7 9 10 12]\n", + "Other Medications\n", + "[ 5 1 3 2 7 8 0 4 12 9 10 6 11 13 14 15]\n", + "Therapies and Regimes\n", + "[ 9 2 8 7 3 4 6 5 10 11 13 1 12 14 15 0]\n", + "Diagnostic Procedures\n", + "[ 8 7 9 5 10 14 16 1 3 6 15 13 4 19 12 18 2 11 17 20 24 21 25 23\n", + " 22 0]\n", + "Surgical Interventions\n", + "[1 2 0 3 4 5 6 7]\n", + "Patient Care Management\n", + "[ 8 10 13 12 3 6 2 5 9 7 4 11 14 15 1 16 19 0 17]\n", + "age_30t50\n", + "[0 1]\n", + "age_50t70\n", + "[1 0]\n", + "age_gt70\n", + "[0 1]\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Albumin_abnormal\n", + "[0 1]\n", + "Albumin_normal\n", + "[0 1]\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip_abnormal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Body Mass Index_abnormal\n", + "[1 0]\n", + "Body Mass Index_normal\n", + "[0 1]\n", + "Body temperature_abnormal\n", + "[0 1]\n", + "Body temperature_normal\n", + "[1 0]\n", + "Calcium_normal\n", + "[1 0]\n", + "Calcium [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Carbon Dioxide_abnormal\n", + "[0 1]\n", + "Carbon Dioxide_normal\n", + "[0 1]\n", + "Chloride_abnormal\n", + "[0 1]\n", + "Chloride_normal\n", + "[1 0]\n", + "Chloride [Moles/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Chloride [Moles/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Creatinine_abnormal\n", + "[0 1]\n", + "Creatinine_normal\n", + "[1 0]\n", + "DXA [T-score] Bone density_abnormal\n", + "[0 1]\n", + "DXA [T-score] Bone density_normal\n", + "[1 0]\n", + "Diastolic Blood Pressure_abnormal\n", + "[1 0]\n", + "Diastolic Blood Pressure_normal\n", + "[0 1]\n", + "Erythrocyte distribution width [Entitic volume] by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocyte distribution width [Entitic volume] by Automated count_normal\n", + "[1 0]\n", + "Erythrocyte distribution width [Ratio] by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocyte distribution width [Ratio] by Automated count_normal\n", + "[0 1]\n", + "Erythrocytes [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocytes [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Ferritin [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Ferritin [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Globulin [Mass/volume] in Serum by calculation_abnormal\n", + "[0 1]\n", + "Globulin [Mass/volume] in Serum by calculation_normal\n", + "[0 1]\n", + "Glomerular filtration rate/1.73 sq M.predicted_abnormal\n", + "[0 1]\n", + "Glomerular filtration rate/1.73 sq M.predicted_normal\n", + "[0 1]\n", + "Glucose_abnormal\n", + "[0 1]\n", + "Glucose_normal\n", + "[1 0]\n", + "Glucose [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Glucose [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Glucose [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Heart rate_abnormal\n", + "[0 1]\n", + "Heart rate_normal\n", + "[1 0]\n", + "Hematocrit [Volume Fraction] of Blood_abnormal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood_normal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood by Automated count_abnormal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood by Automated count_normal\n", + "[1 0]\n", + "Hemoglobin A1c/Hemoglobin.total in Blood_abnormal\n", + "[1 0]\n", + "Hemoglobin A1c/Hemoglobin.total in Blood_normal\n", + "[0 1]\n", + "Hemoglobin [Mass/volume] in Blood_abnormal\n", + "[0 1]\n", + "Hemoglobin [Mass/volume] in Blood_normal\n", + "[1 0]\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method_abnormal\n", + "[0 1]\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method_normal\n", + "[0 1]\n", + "High Density Lipoprotein Cholesterol_abnormal\n", + "[1 0]\n", + "High Density Lipoprotein Cholesterol_normal\n", + "[0 1]\n", + "Iron [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Iron saturation [Mass Fraction] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron saturation [Mass Fraction] in Serum or Plasma_normal\n", + "[0 1]\n", + "Left ventricular Ejection fraction_abnormal\n", + "[0 1]\n", + "Left ventricular Ejection fraction_normal\n", + "[0 1]\n", + "Leukocytes [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Leukocytes [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Low Density Lipoprotein Cholesterol_abnormal\n", + "[0 1]\n", + "Low Density Lipoprotein Cholesterol_normal\n", + "[1 0]\n", + "MCH [Entitic mass] by Automated count_abnormal\n", + "[0 1]\n", + "MCH [Entitic mass] by Automated count_normal\n", + "[1 0]\n", + "MCHC [Mass/volume] by Automated count_normal\n", + "[1 0]\n", + "MCV [Entitic volume] by Automated count_normal\n", + "[1 0]\n", + "Magnesium [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Magnesium [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Microalbumin Creatinine Ratio_abnormal\n", + "[0 1]\n", + "Microalbumin Creatinine Ratio_normal\n", + "[0 1]\n", + "NT-proBNP_abnormal\n", + "[0 1]\n", + "Oxygen saturation in Arterial blood_abnormal\n", + "[0 1]\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported_abnormal\n", + "[1 0]\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported_normal\n", + "[0 1]\n", + "Platelet mean volume [Entitic volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Platelets [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Platelets [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Polyp size greatest dimension by CAP cancer protocols_abnormal\n", + "[0 1]\n", + "Polyp size greatest dimension by CAP cancer protocols_normal\n", + "[0 1]\n", + "Potassium_normal\n", + "[1 0]\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Protein [Mass/volume] in Urine by Test strip_abnormal\n", + "[0 1]\n", + "Protein [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Respiratory rate_abnormal\n", + "[0 1]\n", + "Respiratory rate_normal\n", + "[1 0]\n", + "Sodium_normal\n", + "[1 0]\n", + "Specific gravity of Urine by Test strip_abnormal\n", + "[0 1]\n", + "Systolic Blood Pressure_abnormal\n", + "[1 0]\n", + "Systolic Blood Pressure_normal\n", + "[0 1]\n", + "Total Cholesterol_abnormal\n", + "[0 1]\n", + "Total Cholesterol_normal\n", + "[1 0]\n", + "Triglycerides_abnormal\n", + "[0 1]\n", + "Triglycerides_normal\n", + "[1 0]\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method_abnormal\n", + "[0 1]\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method_normal\n", + "[0 1]\n", + "US Guidance for biopsy of Prostate_abnormal\n", + "[0 1]\n", + "US Guidance for biopsy of Prostate_normal\n", + "[0 1]\n", + "Urea Nitrogen_normal\n", + "[1 0]\n", + "Urea nitrogen [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "pH of Urine by Test strip_normal\n", + "[0 1]\n" + ] + } + ], + "source": [ + "# what columns are still not numeric?\n", + "for col in df2.columns:\n", + " print(col)\n", + " print(df2[col].unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "columns_to_encode = [\n", + " 'HER2 [Presence] in Breast cancer specimen by FISH',\n", + " 'Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain',\n", + " 'Ketones [Mass/volume] in Urine by Test strip',\n", + " 'marital',\n", + " 'Abuse Status [OMAHA]',\n", + " 'Interleukin 6 [Mass/volume] in Serum or Plasma',\n", + " 'Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain',\n", + " 'Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Parainfluenza virus 2 RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Response to cancer treatment',\n", + " 'Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain',\n", + " 'Hemoglobin [Presence] in Urine by Test strip',\n", + " 'Housing status',\n", + " 'Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Objective assessment of cardiovascular disease NYHA',\n", + " 'Drugs of abuse 5 panel - Urine by Screen method',\n", + " 'Leukocyte esterase [Presence] in Urine by Test strip',\n", + " 'Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Protein [Presence] in Urine by Test strip',\n", + " 'Appearance of Urine',\n", + " 'Capillary refill [Time] of Nail bed',\n", + " 'Treatment status Cancer',\n", + " 'Gram positive blood culture panel by Probe in Positive blood culture',\n", + " 'Glucose [Presence] in Urine by Test strip',\n", + " 'Respiratory syncytial virus RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Functional capacity NYHA',\n", + " 'Color of Urine',\n", + " 'Nitrite [Presence] in Urine by Test strip',\n", + " 'Tumor marker Cancer',\n", + " 'Tobacco smoking status NHIS',\n", + " 'gender',\n", + " 'HIV status',\n", + " 'Are you covered by health insurance or some other kind of health care plan [PhenX]',\n", + " 'Human metapneumovirus RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'Ketones [Presence] in Urine by Test strip',\n", + " 'Clarity of Urine',\n", + " 'Stage group.clinical Cancer',\n", + " 'Rhinovirus RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'HER2 [Presence] in Breast cancer specimen by Immune stain',\n", + " 'Smokes tobacco daily',\n", + " 'Parainfluenza virus 3 RNA [Presence] in Respiratory specimen by NAA with probe detection',\n", + " 'SARS-CoV-2 RNA Pnl Resp NAA+probe',\n", + " 'Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay',\n", + " 'Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay'\n", + "]\n", + "\n", + "df2 = pd.get_dummies(df2, columns=columns_to_encode, prefix_sep='_', dtype=int)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's see one more time if we have any null values." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column Null Values Percentage\n", + "---------------------------------------------------------------------------------------------------- ------------------------\n", + "Bicarbonate [Moles/volume] in Arterial blood 98.6848\n", + "Carbon dioxide [Partial pressure] in Arterial blood 98.6848\n", + "pH of Arterial blood 98.6848\n", + "Oxygen [Partial pressure] in Arterial blood 98.6848\n", + "Oxygen/Inspired gas setting [Volume Fraction] Ventilator 98.6848\n", + "Total Bilirubin (Elevated) 98.3311\n", + "RBC Distribution Width 98.3311\n", + "Red Blood Cell 98.3311\n", + "Platelet Count 98.3311\n", + "White Blood Cell (Elevated) 98.3311\n", + "Anion Gap 98.3311\n", + "MCV 98.3311\n", + "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma 97.1706\n", + "Basophils/100 leukocytes in Blood by Automated count 97.1706\n", + "Eosinophils/100 leukocytes in Blood by Automated count 97.1706\n", + "Lactate dehydrogenase [Enzymatic activity/volume] in Serum or Plasma by Lactate to pyruvate reaction 97.1706\n", + "Lymphocytes [#/volume] in Blood by Automated count 97.1706\n", + "Neutrophils [#/volume] in Blood by Automated count 97.1706\n", + "C reactive protein [Mass/volume] in Serum or Plasma 97.1706\n", + "Fibrin D-dimer FEU [Mass/volume] in Platelet poor plasma 97.1706\n", + "INR in Platelet poor plasma by Coagulation assay 97.1706\n", + "Procalcitonin [Mass/volume] in Serum or Plasma 97.1706\n", + "Eosinophils [#/volume] in Blood by Automated count 97.1706\n", + "Lymphocytes/100 leukocytes in Blood by Automated count 97.1706\n", + "Neutrophils/100 leukocytes in Blood by Automated count 97.1706\n", + "Basophils [#/volume] in Blood by Automated count 97.1706\n", + "Monocytes/100 leukocytes in Blood by Automated count 97.1706\n", + "Prothrombin time (PT) 97.1706\n", + "Monocytes [#/volume] in Blood by Automated count 97.1706\n" + ] + } + ], + "source": [ + "null_values = df2.isnull().sum()\n", + "null_values = null_values[null_values > 0]\n", + "null_values = 100 * null_values / len(df2)\n", + "null_values = null_values.sort_values(ascending=False)\n", + "null_values_df = null_values.reset_index()\n", + "null_values_df.columns = ['Column', 'Null Values Percentage']\n", + "print(tabulate(null_values_df, headers='keys', tablefmt='simple', showindex=False))" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "White Blood Cell (Elevated)\n", + "[ nan 11.6 12.9 13.3 14.1 14.5 12.3 11.4 14. 13.7 13.9 14.2 12.4 14.3\n", + " 12. 12.8 11.7 12.1 13.5 14.4 14.6 13.2 14.8 13.6 11.9 11.8 11.5 12.2\n", + " 13.8 13.4 11.3 15. 12.7 11.1 14.7 12.6 13. 11.2 14.9 12.5]\n", + "Platelet Count\n", + "[ nan 162.1 291.7 444. 411. 368.2 250. 217.9 178.7 292.5 428.9 254.8\n", + " 423.7 427.7 306.4 421.5 340.9 227.8 322.3 192.7 323.4 440.8 349.8 429.9\n", + " 328.2 374.3 228.3 204.6 308.6 200.3 429.3 174.3 155.7 329.5 173.6 204.9\n", + " 260.9 420.7 356.7 449.9 387.3 171.6 440.4 384.8 417.1 186. 396.8 307.1\n", + " 239.1 175.1 234.4 268. 331.7 326.5 324.6 391.3 336.6 210.8 348.6 407.5\n", + " 417.6 198.2 226.9 231.5 243.1 378.9 325.2 446.3 266.9 244.4 371. 442.4\n", + " 432.1 258.9 214.5 264.5 335. 338.2 444.9 278.5 211.3 385.1 350.5 290.1\n", + " 248. 421.3 448.4 304.1 265.8 184.5 281.8 434.9 291.9 297.8 205.6 191.5\n", + " 293.7 287. 327. 249.9 412.8 346.9 422.3 320.4 155.4 173.3 339.4 402.8\n", + " 394.8 182.9 185.9 299.4 252.4 265.1 155.3 299.9 212.9 344.8 428.8 405.8\n", + " 298.4 385.8 257.1 439.4 210.5 254.2 303.7 283.6 329.8 162.3 317. 365.6\n", + " 273.4 447.5 209.6 196.1 415.3 197.6 295. 357.9 342.9 266. 224.8 181.3\n", + " 424.9 265.2 303.6 162.8 367.1]\n", + "Anion Gap\n", + "[ nan 9.4 7.2 9.5 13.2 3.5 11.8 3.4 2.4 8.1 4.9 2.3 12.1 4.6\n", + " 7.6 7.7 5.6 10.7 4.1 10.2 11.4 12.2 7.5 13.9 6.7 6.4 6.9 13.6\n", + " 10.1 3.1 3.9 13.3 5.3 4.5 12. 6.2 11.3 8.9 10.5 10.6 11.9 5.8\n", + " 12.6 11.6 14.9 8.7 11.1 12.5 4.3 10. 2.1 5.5 14. 11. 14.7 8.5\n", + " 12.3 14.1 5.9 6.1 2. 7.1 3.7 5.4 13.8 8.8 10.3 2.7 11.7 2.8\n", + " 5. 10.8 9. 9.9 14.3 12.7 14.8 7. 9.1 9.8 8.2 5.2 8.3 4.8\n", + " 12.8 6.3 8.4 7.3 11.2 7.9 13.4 4. 6. 3.3 3.6]\n", + "C reactive protein [Mass/volume] in Serum or Plasma\n", + "[ nan 10.27 9.87 10.57 10.13 10.18 9.84 10.23 10.08 10.73 10.42 10.32\n", + " 12.84 9.42 10.14 9.77 10.01 9.89 14.09 9.98 13.15 10.7 9.9 13.43\n", + " 10.21 13.54 9.78 9.66 10.64 12.89 10.31 10.4 9.7 13.09 10.05 10.45\n", + " 9.94 10.24 10.38 10.06 10.26 10.37 10.17 13.56 13.03 9.6 10.6 9.51\n", + " 9.32 13.57 10.9 10.8 10.36 10.78 9.4 9.48 12.45 9.14 10.3 10.1\n", + " 9.86 9.88 13.16 10.44 13.64 10.02 9.93 9.68 10.04 9.8 9.76 10.11\n", + " 10.69 10.22 9.3 13.39 9.96 8.91 10.03 9.83 11.2 13.74 9.08 9.63\n", + " 10.49 9.52 10.07 10.58 12.49 9.91 10.43 10.34 9.65 10.52 9.62 9.54\n", + " 13.82 10.96 9.43 10.83 10.59 9.5 10.09 10.2 9.58 12.9 9.1 11.\n", + " 10.76 10.16 9.69 9.64 12.21 9.46 12.94 9.67 12.46 11.28 10.84 9.06\n", + " 10.85 10.72 9.82 10.63 15.4 9.56 9.27 9.47 9.72 9.11 10.91 9.24\n", + " 10.54 10.95 10.47 9.92 13.67 9.44 12.6 9.04 13.07 9.13 9.19 9.15\n", + " 9.81 9.73 9.97 13.1 12.3 13.14 10.39 11.14 14.45 10.12 13.11 12.82\n", + " 10.28 9.57 12.09]\n", + "Fibrin D-dimer FEU [Mass/volume] in Platelet poor plasma\n", + "[ nan 0.42 0.47 0.43 0.38 0.45 0.4 0.34 11.01 0.56 5.2 11.35\n", + " 0.44 7.23 11.38 0.46 8.89 11.76 0.36 0.51 2.12 7.67 7.83 6.51\n", + " 0.49 6.52 0.53 10.17 0.41 10.02 0.39 0.37 13.81 13.34 0.5 7.94\n", + " 0.54 0.35 10.14 8.09 1.96 0.3 6.45 10.19 12.61 8.73 1.43 4.95\n", + " 1.73 1.37 0.32 9.84 8.56 1.5 9.88 11.7 14.05 5.89 0.55 7.09]\n", + "label\n", + "[0 1]\n", + "Oxygen/Inspired gas setting [Volume Fraction] Ventilator\n", + "[ nan 69.41 65.75 60.01 59.06 77.46 67.87 73.69 63.76 57.75 56.91 71.23\n", + " 68.25 63.79 61.4 60.19 69.44 65.89 61.81 70.96 68.06 61.94 62.55 60.05\n", + " 61.73 59.5 62.46 66.76 71.5 66.16 68.96 64.94 65.71 71.08 64.1 59.46\n", + " 70.17 60.64 55.96 63.64 59.38 61.76 66.34 64.53 60.16 61.69 71.42 71.07\n", + " 54.98 65.87 59.32 66.7 72.12 63.82 68.18 63.61 60.77 70.26 59.45 64.42\n", + " 59.19 65.42 59.85 57.18 71.11 64.75 66.46 56.26 55.83 64.61 69.65 58.36\n", + " 68.37 68.27 75.45 58.97 62.9 64.13 66.98 61.06 62.16 69.7 70.94 69.67\n", + " 64.85 70.65 54.01 67.29 62.26 54.76 61.66 60.95 70.62 61.7 61.24 56.48\n", + " 65.92 70.51 56.17 72. 68.82 75.88 66.49 54.43 64.93 62.64 63.55 68.23\n", + " 66.55 66.3 68.28 73.22 65.82 61.49]\n", + "INR in Platelet poor plasma by Coagulation assay\n", + "[ nan 2.05 1.95 2.17 2.12 1.82 1.77 2.25 1.39 3.9 1.97 1.84 2.38 2.63\n", + " 1.76 3.99 4.08 2.08 1.79 3.8 2.01 3.98 2.3 1.64 4. 1.94 2.04 2.\n", + " 4.2 1.62 2.23 2.1 2.02 1.88 3.82 1.34 2.24 1.65 4.1 4.04 1.81 1.89\n", + " 2.16 3.81 2.22 2.54 2.37 1.86 2.11 4.14 2.21 2.41 1.83 2.14 4.27 2.07\n", + " 3.91 2.18 1.87 1.63 1.72 3.97 2.2 1.61 1.7 1.55 2.06 1.59 3.73 1.68\n", + " 1.78 1.92 2.13 2.26 1.56 1.93 1.8 2.28 4.03 1.58 2.44 1.85 3.78 1.98\n", + " 1.66 1.96 3.89 2.27 1.71 3.94 1.73 1.9 1.6 3.83 2.15 2.32 2.36 2.33\n", + " 2.6 4.36 1.67 3.85 2.43 2.58 2.48 2.29 4.09 4.07 3.95 1.91 3.86 3.75\n", + " 4.16 3.79]\n", + "MCV\n", + "[ nan 92.6 94.3 93.5 82.1 91.4 90.9 88.4 88.2 89.2 92.7 91.5 89.1 83.1\n", + " 85.9 85.3 89.5 90.4 87.5 91.3 90.5 83.6 81.7 82.5 85.5 88. 93.4 81.5\n", + " 81.8 92.2 91.1 88.6 80.6 94. 87.1 82.2 88.5 82.8 85.2 81.3 81.2 83.7\n", + " 95.2 82.3 95.9 80.1 95.6 80.2 84.2 92.4 90.1 87.4 84.3 90.7 84.9 84.4\n", + " 85.4 91.7 94.1 91.9 80.7 83.8 88.8 94.5 91.8 89.6 84.5 95.8 95.3 91.2\n", + " 86.8 93.1 90.2 87.2 93.9 81.4 82.4 89.4 87. 93.3 82. 80.9 90.3 93.2\n", + " 88.9 86.5 87.8 96. 83. 87.6 93.8 89. 84.7 88.1 80. 85.1 86.2 86.3]\n", + "Basophils [#/volume] in Blood by Automated count\n", + "[ nan 0.32 0.3 0.33 0.31 0.28 0.29 0.27 0.34 0.26 0.25 0.24 0.35]\n", + "Procalcitonin [Mass/volume] in Serum or Plasma\n", + "[ nan 0.08 0.07 0.13 0.14 0.1 0.12 0.24 0.09 0.2 0.32 0.19 0.22 0.16\n", + " 0.26 0.17 0.06 0.28 0.04 0.3 0.21 0.11 0.05 0.25 0.15 0.02 0.27 0.29\n", + " 0.37 0.23 0.18]\n", + "Creatine kinase [Enzymatic activity/volume] in Serum or Plasma\n", + "[ nan 27.88 38.47 38.32 38.02 36.5 36.83 25.42 36.82 38.51\n", + " 35.18 35.33 104.54 32.64 32.87 23.1 31.18 123.83 21. 124.27\n", + " 27. 35.97 26.14 161.34 31.8 99.11 33.87 38.3 32.81 123.23\n", + " 29.99 41.63 34.07 34.51 28.3 120.23 35. 38.64 26.73 25.94\n", + " 26.63 32.18 108.32 24.08 42.39 37.18 31.35 115.12 116.18 43.\n", + " 33.08 32.89 19.72 32.1 53.88 38.34 40. 34.94 27.25 31.74\n", + " 34.04 27.6 32.73 93.33 31.36 34.87 29.13 26.27 32.3 107.05\n", + " 30.26 131.25 24.93 29.87 37.72 32.58 28.88 31.94 41.44 32.22\n", + " 39.07 38.54 30.7 36.6 28.14 111.28 39.2 30.39 109.73 25.8\n", + " 35.36 28.46 31.52 29.41 22.06 32.42 125.7 40.54 34.06 30.24\n", + " 31.66 37.37 35.38 27.78 35.93 148.09 28.72 28.93 34.83 30.29\n", + " 27.61 30.13 33.99 27.91 33.94 25.72 31.28 28.32 40.28 35.77\n", + " 29.05 33.63 125.73 26.94 20.26 32.96 32.5 42.57 37.21 30.76\n", + " 30.47 26.12 36.66 135.96 33.88 20.98 30.44 32.98 23.98 38.96\n", + " 41.26 29.7 33.14 36.42 118.72 33.06 33.32 43.54 22.91 29.56\n", + " 105.54 31.08 21.3 40.77 36. 37.04 38.74 33.37 128.96 118.76\n", + " 33.79 25.24 36.73 39.36 34.95 27.52 35.9 35.99 31.84 26.67\n", + " 105.27 34.86 36.09 29.28 38.12 30.55 30.3 98.22 34.67 38.36\n", + " 34.24 36.51 29.45 37.57 36.23 27.63 26.39 33.21 39.1 30.64\n", + " 145.97 28.58 37.44 132.17 27.64 28.96 38.53 27.86 113.06 29.4\n", + " 31.34 31.23 40.85 35.51 37.92 20.72 138.17 30.72 30.33 26.78\n", + " 30.58 33.49 34.44 27.31 35.85 30.2 25.44 37.77 32.47 38.22\n", + " 36.2 36.94 104.26 129.87 92.5 31.72 32.08 33.23 107.88 31.62\n", + " 97.99 102.82 35.46 22.35 40.66 35.7 113.65 41.33 35.91 114.33]\n", + "Total Bilirubin (Elevated)\n", + "[nan 2.4 1.3 1.9 2.2 1.6 2.3 1.7 1.2 1.4 1.5 2.5 2.1 2. 1.8]\n", + "Prothrombin time (PT)\n", + "[ nan 11.93 11.5 11.85 11.88 12.02 11.32 11.68 11.54 11.74 11.07 12.48\n", + " 11.27 11.78 11.86 11.57 12.46 12.39 11.14 10.93 11.61 12.83 10.87 12.65\n", + " 11.47 11.73 12.38 12.29 11.43 11.7 12.44 11.83 12.24 11.52 11.82 12.34\n", + " 12.12 13.21 11.94 11.49 10.83 11.44 12.71 12.1 11.34 12.06 11.9 11.17\n", + " 11.84 11.04 11.63 11.23 11.77 10.96 12.87 11.59 12.57 11.4 11.42 12.26\n", + " 10.84 11.48 11.37 12.6 12.59 11.81 11.99 12.07 11.26 11.66 12.28 11.21\n", + " 11.53 11.22 10.6 11.8 11.87 11.33 11.39 11.38 11.18 12. 11.55 11.65\n", + " 11.96 12.78 11.3 11.92 10.97 11.28 11.24 11.46 12.13 11.62 11.06 11.76\n", + " 11.36 12.7 11.2 11.6 12.73 11.98 10.86 11.29 11.67 12.18 12.55 11.79\n", + " 12.05 11.71 11.19 12.4 11.75 12.79 11.02 11.09 11.41 13. 11.13 12.8\n", + " 11.89 10.98 11.16 10.99 12.04 12.89 12.2 12.98 11.05 10.92 12.45 11.12\n", + " 12.19]\n", + "Monocytes/100 leukocytes in Blood by Automated count\n", + "[ nan 10.06 9.52 9.73 10.04 10.92 11.15 9.18 11.04 10.54 7.97 9.82\n", + " 9.13 9.89 8.93 10.32 10.97 8.59 10.18 10.11 10.26 9.35 8.73 11.22\n", + " 10.56 10.52 10.3 9.56 10.76 10.07 10.12 10.79 10.36 9.42 10.13 10.29\n", + " 11.27 10.5 9.87 9.34 9.07 11.43 9.41 10.82 10.34 9.75 10.03 11.16\n", + " 10.55 10.64 8.96 10.17 10.94 9.39 9.8 10.2 9.78 10.45 8.36 9.1\n", + " 9.74 11.3 11.34 9.9 9.99 9.94 9.48 10.91 11.39 10.42 10.58 10.37\n", + " 11.73 10.68 9.58 9.86 9.79 7.91 10.02 10.22 9.71 10.81 8.79 10.41\n", + " 9.62 11.91 10.75 9.43 9.36 9.61 9.84 11.44 10.47 10.08 9.98 10.1\n", + " 9.01 10.71 11.25 9.29 9.88 9.7 9.32 10.31 8.92 9.4 9.95 9.93\n", + " 10.83 11.29 9.3 11.2 8.97 9.02 9.66 10.27 10.46 11.08 10.72 10.51\n", + " 9.44 11.13 9.91 9.22 9.72 9.92 10.39 9.83 9.24 11.17 9.64 9.12\n", + " 8.94 8.55 10.62 8.06 10.44 9.31 10.14 11. 8.81 8.71 9.27 9.26\n", + " 8.12 11.72 11.28 12.27 9.76 8.89 8.9 9.2 10.7 10.73 9.67 10.\n", + " 10.25 9.25 11.1 10.43 9.6 9.14 10.78 9.53 10.74 10.57]\n", + "Monocytes [#/volume] in Blood by Automated count\n", + "[ nan 0.96 0.75 0.9 0.94 1.02 0.98 0.99 1. 0.8 0.93 1.03 0.87 1.05\n", + " 0.82 0.89 1.06 0.81 0.95 0.88 1.01 0.91 0.97 0.86 0.92 1.16 1.11 1.04\n", + " 1.08 0.85 1.09 0.76 0.84 1.07 0.78 1.1 1.12 0.83]\n", + "scc\n", + "[101 110 127 129 69 111 76 105 106 119 103 63 55 107 112 59 32 90\n", + " 83 123 66 117 116 46 141 86 100 113 102 108 115 124 109 104 99 150\n", + " 126 52 98 139 120 89 118 61 114 65 62 145 136 137 58 74 71 97\n", + " 68 96 87 122 128 54 130 60 133 73 121 132 138 53 149 72 51 57\n", + " 47 134 140 143 82 91 135 75 80 146 151 131 64 67 125 50 48 34\n", + " 93 43 142 153 156 70 78 77 160 170 49 88 81 174 158 84 95 79\n", + " 56 169 92 148 161 175 172 44 85 19 41 144 45 40 152 157 147 94\n", + " 35 165 20 177 37 154 21 155 167 166 181 184 42 39 164 190 38 14\n", + " 168 171 9 29 159 28]\n", + "Neutrophils/100 leukocytes in Blood by Automated count\n", + "[ nan 27.85 26.78 30.78 27.69 28.91 27.15 25.12 28.56 26.64 28.4 28.97\n", + " 25.21 31.12 29.06 25.02 28.81 27.83 27.08 31.5 31.13 31.79 24.54 28.34\n", + " 26.81 25.62 30.44 27.99 27.51 26.36 28.83 28.12 30.79 30.1 31.76 23.98\n", + " 28.86 25.46 29.87 30.72 24.72 22.8 24.62 29.57 27.12 25.91 28.03 29.72\n", + " 27.49 24.13 27.52 34.85 18.61 29.81 26.97 32.32 25.65 27.28 28.28 24.21\n", + " 28.29 28.32 31.44 29.19 24.99 29.66 27.18 27.94 23.81 18.64 26.61 25.34\n", + " 33.41 26.1 28.01 29.07 31.42 32.06 31.99 26.06 29.39 23.64 29.5 24.53\n", + " 28.39 28.02 26.49 29.86 23.38 22.51 33.83 29.04 23.37 32.04 27.07 30.24\n", + " 31.2 28.47 20.61 23.27 22.17 29.63 32.88 26.32 28.93 23.96 27.75 31.59\n", + " 22.96 28.16 30.71 22.11 28.61 23.6 27.45 27.79 25.17 27.71 29.1 24.7\n", + " 27.66 29.49 23.86 28.92 25.22 31.06 29.65 29.89 25.99 23.01 25.45 33.02\n", + " 32.29 26.09 24.16 20.5 26.77 24.66 28.2 30.37 30.21 30.19 22.86 27.56\n", + " 30.51 25.53 28.49 33.92 23.08 28.74 22.85 27.39 29.51 26.52 26.47 31.7\n", + " 28.69 25.33 26.14 29.16 25.01 31.38 22.66 31.86 25.69 26.08 26.93 28.05\n", + " 26.86 28.53 33.1 24.6 24.46 32.92 29.25 25.18 25.8 28.89 30.32 25.86\n", + " 26.79 27.93 28.68 21.16 33.05 27.47 21.27 26.31 27.2 23.66 25.49 34.77\n", + " 23.54 30.39 29.11 24.69 28.15 21.11 28.43 25.75 26.9 26.34 29.95 29.71\n", + " 22.26 24.79 24.77 24.55 27.14 23.67 25.82 24.47 30.47 29.38 29.94 29.43\n", + " 27.31 28.44 34.19 31.33 28.06 23.83 33.23 31.74 33.8 27.16 25.48]\n", + "Lymphocytes/100 leukocytes in Blood by Automated count\n", + "[ nan 15.09 14.29 15.67 15.57 15.17 16.88 16.07 12.23 14.82 13.94 13.37\n", + " 15.79 15.31 12.33 14.09 16.49 14.18 15.02 13.84 12.7 12.13 14.1 14.7\n", + " 18.52 17.23 17.09 15.84 13.36 15.46 15.12 14.21 17.22 18.47 16.57 16.59\n", + " 15.94 14.58 16.55 13.47 13.24 13.02 17.35 13.64 17.03 14.04 14.06 13.2\n", + " 13.61 16.52 16.46 15.4 14.67 19.64 11.5 15.62 13.91 14.4 14.31 14.03\n", + " 16.13 11.42 15.66 14.25 14.87 12.94 16.65 13.86 13.09 15.58 15.87 13.99\n", + " 16.34 16.38 16.18 15.22 14.26 16.44 14.78 11.04 12.66 17.01 15.95 13.74\n", + " 15.49 17.38 15.33 14.28 18.22 13.69 12.91 19.07 15.14 13.5 16.12 13.45\n", + " 15.47 13.92 15.68 15.63 16.22 15.89 14.46 14.72 13.41 18.5 13.22 13.62\n", + " 17.59 15.54 14.61 18.78 14.41 15.51 14.48 14.91 11.97 17.37 13.54 10.5\n", + " 17.25 16.99 16.48 15.21 16.54 14.53 14.49 15.37 16.79 16.2 17.07 16.43\n", + " 15.64 14.45 14.05 17.12 14.73 15.39 17.36 17.2 15.29 17.08 14.83 14.76\n", + " 14.92 14.44 15.76 10.61 17.75 14.23 14.02 16.98 15.81 18.05 13.89 18.39\n", + " 15.16 20.15 15.41 11. 12.74 14.47 13.55 17.24 15.52 15.98 17.27 17.18\n", + " 15.59 16.32 18.28 16.76 13.49 15.56 10.33 14.5 16.58 15.82 14.66 10.32\n", + " 14.15 16.81 13.52 15.75 14.6 12.36 14.65 12.35 14.63 15.13 14.3 14.74\n", + " 17.05 15.72 15.88 12.77 13.98 14.95 13.38 12.89 13.93 14.77 16.83 17.11\n", + " 17.63 16.7 16.4 16.37 14.89 14.93 13.72 14.24]\n", + "Red Blood Cell\n", + "[nan 5.5 5.4 4.9 4.8 4.7 5.3 5. 5.7 4.6 5.8 5.2 5.9 4.5 5.6 5.1]\n", + "Eosinophils [#/volume] in Blood by Automated count\n", + "[ nan 0.41 0.36 0.4 0.38 0.42 0.32 0.47 0.37 0.43 0.39 0.46 0.34 0.44\n", + " 0.45 0.48 0.35 0.33 0.3 ]\n", + "RBC Distribution Width\n", + "[ nan 12.4 14.4 13.6 12.1 13.5 11.8 13.2 14.5 14.3 12.2 14.6 14.2 12.8\n", + " 13.4 12. 13.9 13.8 12.3 11.6 11.9 12.6 11.7 13.3 13.1 12.9 14. 12.5\n", + " 14.1 12.7 13.7 13. ]\n", + "Basophils/100 leukocytes in Blood by Automated count\n", + "[ nan 3.2 3.05 3.01 2.9 2.95 3.09 2.89 3.16 2.99 3.11 2.57 2.84 2.96\n", + " 3.12 2.85 2.87 2.64 2.77 3.06 3.02 3.04 2.75 2.92 2.66 3.27 2.93 3.28\n", + " 3.13 3.1 3.22 3.14 2.94 3.21 3.19 2.83 3.43 2.76 3.3 3.23 3. 2.91\n", + " 2.86 3.46 3.15 2.98 3.07 3.17 2.79 3.08 2.81 2.88 3.03 2.8 2.78 3.18\n", + " 2.63 3.25 2.68 2.97 2.56 2.74 3.49 2.73 3.34 3.26 3.38 2.65 3.24 2.82\n", + " 3.32 3.31 2.71 3.33 3.29 3.36]\n", + "Oxygen [Partial pressure] in Arterial blood\n", + "[ nan 51.12 47.66 46.48 46.73 50.86 46.94 48.78 45.2 49.18 51.67 45.34\n", + " 49.25 45.85 45.67 50.47 48.48 50.48 47.68 49.42 50.88 49.85 45.5 52.16\n", + " 49.49 52.44 47.32 51.72 49.15 50.62 49.89 50.76 48.15 47.25 47.62 52.65\n", + " 50.46 49.28 45.59 50.74 46.21 51.1 46.07 47.76 47.8 48.39 51.85 47.72\n", + " 49.47 48.27 51.13 48.64 47.26 49.61 48.47 44.14 47.54 48.61 50.63 44.49\n", + " 46.43 50.29 46.88 49.34 47.93 50.36 49.38 50.79 49.99 46.85 50.82 48.83\n", + " 47.18 50.11 50.18 48.5 46.03 49.96 48.82 49.55 48.25 47.63 51.36 48.94\n", + " 46.91 48.05 49.56 49.94 48.97 48.12 52.19 47.19 49.6 45.09 47.38 51.84\n", + " 47.11 47.75 49.19 50.25 48.7 48.72 49.37 48.16 48.43 45.03 47.79 50.33\n", + " 47.89 49.95]\n", + "pH of Arterial blood\n", + "[ nan 7.02 7.03 7.14 7.09 6.99 7.1 7.05 7.11 7.04 7.07 7.06 6.98 7.12\n", + " 7.15 7.01 7. 7.08 7.13 6.96 7.17 6.97]\n", + "Neutrophils [#/volume] in Blood by Automated count\n", + "[ nan 2.33 2.55 2.78 2.88 2.48 2.34 2.86 2.53 2.68 2.58 3.06 2.64 2.66\n", + " 3.26 2.72 2.61 2.47 2.75 2.31 2.69 2.98 2.49 2.74 2.83 2.62 2.08 2.59\n", + " 2.87 2.7 2.73 2.99 2.71 3.04 2.95 2.77 2.52 2.91 2.79 2.56 3.1 2.85\n", + " 2.63 2.18 2.38 2.43 3.08 2.4 2.67 2.93 2.54 2.76 2.89 2.82 2.28 2.25\n", + " 2.5 2.3 3.09 2.46 2.97 2.94 2.81 2.65 2.6 2.92 2.27 2.45 2.96 2.57\n", + " 2.36 2.32 2.26 2.22 2.8 2.84 2.24 2.2 2.39 2.9 2.14 2.51 3.01 3.16\n", + " 3. 3.23 3.24]\n", + "Lymphocytes [#/volume] in Blood by Automated count\n", + "[ nan 1. 1.02 0.97 0.98 1.06 1.03 1.01 0.99 0.56 1.07 0.64 0.59 1.05\n", + " 0.61 1.09 0.58 0.63 0.96 1.04 1.08 0.52 0.6 0.57 0.53 0.65 0.55 0.5\n", + " 0.66 0.62 0.54]\n", + "Lactate dehydrogenase [Enzymatic activity/volume] in Serum or Plasma by Lactate to pyruvate reaction\n", + "[ nan 237.9 230.62 254.82 234.23 247.3 249.74 235.6 233.1 257.67\n", + " 226.88 243.15 362.44 239.37 232.82 239.5 247.96 261.87 339.76 235.82\n", + " 363.56 230.18 252.47 246.86 353.84 263.44 363.18 247.23 246.81 248.03\n", + " 359.35 246.96 251.64 242.28 244.4 240.56 366.12 236.07 250.07 248.62\n", + " 226.72 250.32 232.44 322.02 237.02 245.5 249.02 232.87 352.6 350.98\n", + " 238.16 251. 227.18 225.2 349.26 221.36 233.9 246.57 235.15 253.14\n", + " 236.02 257.38 348.04 247.74 243.54 261.2 246.94 225.9 363.01 238.34\n", + " 364.52 242.95 251.86 248.17 233.46 233.22 230.48 236.14 228.48 254.43\n", + " 246.79 246.3 255.97 232.2 378.97 222.32 381.57 245.2 238.56 230.25\n", + " 242.71 222.08 353.06 261.29 220.96 254.63 261.48 243.97 244.35 240.64\n", + " 237.2 256.73 366.77 229.57 254.61 263.59 249.15 264.43 240.76 242.73\n", + " 251.7 244.44 227.32 230.44 225.92 241.63 243.82 243.45 230.32 247.4\n", + " 249.13 232.36 251.4 360.48 227.62 220.56 233.34 259.76 248.08 249.24\n", + " 250.79 247.64 248.93 231.22 264.06 325.2 231.02 230.3 232.22 228.58\n", + " 233.58 234.74 228.52 234.22 256.61 227.64 347.1 251.53 222.22 260.76\n", + " 245.52 255.27 254.34 363.27 238.83 223.2 259.7 227.2 219.12 239.31\n", + " 240.7 367.89 358.92 257.89 224.98 240.84 251.8 226. 258.06 253.47\n", + " 243.6 248.66 228.74 337.47 228.38 251.06 228.26 241.62 231.58 242.02\n", + " 229. 336.5 259.14 240.35 249.7 227.76 246.19 239.67 243.5 251.51\n", + " 242.78 250.9 249.6 257.47 234.64 323.35 228.92 235.44 333.8 244.36\n", + " 233.8 227.74 230.2 236.54 365.58 227.92 246.91 243.27 249.59 226.14\n", + " 242.37 360.11 219.46 251.34 231. 232.9 262.3 230.24 256. 250.25\n", + " 240.4 230.72 248.15 245.18 254.64 224.48 364.41 324.23 359.18 226.58\n", + " 235.23 366.9 221.02 231.66 380.42 342.6 231.46 244.15 229.06 372.57\n", + " 244.73 255.46 347.77]\n", + "Carbon dioxide [Partial pressure] in Arterial blood\n", + "[ nan 40.45 41.06 39.46 40.8 38.71 40.52 41.19 40.59 38.75 39.44 40.38\n", + " 41.44 40.08 40.79 39.95 39.98 40.49 39.58 40.63 42.71 39.61 37.45 38.77\n", + " 40.23 40.87 39. 40.41 38.6 39.26 39.23 40.77 40.55 40.01 39.83 40.68\n", + " 40.7 39.09 40.11 41.26 41.61 40.34 41.56 41.38 41.49 39.51 39.77 40.26\n", + " 40.74 39.02 40.64 39.15 38.81 40.14 40.56 40.19 40.51 39.38 39.5 41.22\n", + " 40.07 39.57 38.36 39.85 40.48 40.54 40.44 39.14 38.56 40.31 42.62 40.46\n", + " 38.83 42.07 39.74 38.98 42.94 39.89 41.8 39.88 40.36 38.96 41.69 40.04\n", + " 38.23 40.06 40.88 39.08 39.59 40.18 40.47 40.53 40.22 40.96 40.16 39.91\n", + " 39.49 37.46 40.24 39.06 39.79 39.41 40.82 39.78]\n", + "Eosinophils/100 leukocytes in Blood by Automated count\n", + "[ nan 4.45 4.4 4.59 3.74 4.35 4.61 4.46 4.51 5.05 4.73 4.72 4.65 4.15\n", + " 4.22 4.81 4.63 4.34 4.37 4.16 4.23 4.04 3.98 4.86 4.66 4.56 4.68 4.12\n", + " 4.83 4.25 4.92 4.75 4.32 4.58 4.11 4.71 4.41 4.21 4.17 4.89 4.19 4.43\n", + " 4.57 4.39 4.5 4.42 4.54 3.91 4.7 4.28 4.31 4.69 4.38 4.55 4.76 4.6\n", + " 4.78 4.29 4.74 4.98 4.93 4.88 4.33 4.94 4.24 4.49 4.48 4.36 4.47 5.08\n", + " 4.03 4.87 4.52 4.62 4.44 4.8 4.64 5.19 4.06 3.95 4.97 4.96 4.53 4.85\n", + " 4.08 3.87 3.92 4.18 4.84 3.84 4.27 3.99 5.06 5.24 4.82 4.79]\n", + "Bicarbonate [Moles/volume] in Arterial blood\n", + "[ nan 24.1 25.27 24.3 24.34 24.36 25.05 24.41 24.58 24.82 24.48 24.25\n", + " 24.22 23.65 24.33 24.69 23.9 24.62 24.89 24.71 24.7 24.09 24.56 24.08\n", + " 24.47 24.55 24.21 23.95 24.72 24.53 23.87 23.8 24.75 24.38 24.57 23.81\n", + " 24.06 24.93 25.07 24.64 23.67 24.23 23.94 24.28 24.44 24.17 24.45 24.37\n", + " 24.54 24.46 24.52 24.49 24.43 24.94 23.68 24.02 25.06 24.19 24.29 24.\n", + " 23.78 24.84 23.85 24.67 24.91 24.63 25.14 25.37 23.5 24.13 24.61 24.81\n", + " 24.15 24.32 24.85 23.55 25.21 23.18 24.76 24.07 24.92 25.15 24.86 24.12\n", + " 23.7 23.71 23.82]\n", + "Respiratory Disorders\n", + "[ 5 1 2 6 0 4 3 13 12 8 7 11 17 16 10 9 15 14]\n", + "Heart and Cardiovascular Diseases\n", + "[0 3 2 5 1 4 6 7 9 8]\n", + "Metabolic and Endocrine Disorders\n", + "[ 4 3 6 2 1 5 9 7 8 0 10 11 12]\n", + "Neurological Disorders\n", + "[2 0 3 4 1 6 5 8 7 9]\n", + "Orthopedic Injuries\n", + "[4 0 2 3 5 1 6 7]\n", + "Mental Health\n", + "[11 8 10 12 7 9 6 5 13 15 14 4 3 16 0 2 17 1]\n", + "Reproductive and Pregnancy\n", + "[ 0 1 15 18 14 23 19 12 2 13 5 16 20 21 24 11 17 8 9 10 3 4 22 6\n", + " 7 25 26]\n", + "Pain Relievers and Analesics\n", + "[3 2 1 4 0 5 7 6 8 9]\n", + "Cardiovascular and Blood Pressure Medications\n", + "[ 2 4 7 5 1 0 8 6 3 11 9 10 12 16 13 15 14 20]\n", + "Injection Medications\n", + "[ 3 1 2 4 6 7 5 8 0 10 9]\n", + "Oral Medications\n", + "[ 2 3 1 8 0 4 5 6 7 9 10 12]\n", + "Other Medications\n", + "[ 5 1 3 2 7 8 0 4 12 9 10 6 11 13 14 15]\n", + "Therapies and Regimes\n", + "[ 9 2 8 7 3 4 6 5 10 11 13 1 12 14 15 0]\n", + "Diagnostic Procedures\n", + "[ 8 7 9 5 10 14 16 1 3 6 15 13 4 19 12 18 2 11 17 20 24 21 25 23\n", + " 22 0]\n", + "Surgical Interventions\n", + "[1 2 0 3 4 5 6 7]\n", + "Patient Care Management\n", + "[ 8 10 13 12 3 6 2 5 9 7 4 11 14 15 1 16 19 0 17]\n", + "age_30t50\n", + "[0 1]\n", + "age_50t70\n", + "[1 0]\n", + "age_gt70\n", + "[0 1]\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Albumin_abnormal\n", + "[0 1]\n", + "Albumin_normal\n", + "[0 1]\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip_abnormal\n", + "[0 1]\n", + "Bilirubin.total [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Body Mass Index_abnormal\n", + "[1 0]\n", + "Body Mass Index_normal\n", + "[0 1]\n", + "Body temperature_abnormal\n", + "[0 1]\n", + "Body temperature_normal\n", + "[1 0]\n", + "Calcium_normal\n", + "[1 0]\n", + "Calcium [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Carbon Dioxide_abnormal\n", + "[0 1]\n", + "Carbon Dioxide_normal\n", + "[0 1]\n", + "Chloride_abnormal\n", + "[0 1]\n", + "Chloride_normal\n", + "[1 0]\n", + "Chloride [Moles/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Chloride [Moles/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Creatinine_abnormal\n", + "[0 1]\n", + "Creatinine_normal\n", + "[1 0]\n", + "DXA [T-score] Bone density_abnormal\n", + "[0 1]\n", + "DXA [T-score] Bone density_normal\n", + "[1 0]\n", + "Diastolic Blood Pressure_abnormal\n", + "[1 0]\n", + "Diastolic Blood Pressure_normal\n", + "[0 1]\n", + "Erythrocyte distribution width [Entitic volume] by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocyte distribution width [Entitic volume] by Automated count_normal\n", + "[1 0]\n", + "Erythrocyte distribution width [Ratio] by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocyte distribution width [Ratio] by Automated count_normal\n", + "[0 1]\n", + "Erythrocytes [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Erythrocytes [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Ferritin [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Ferritin [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Globulin [Mass/volume] in Serum by calculation_abnormal\n", + "[0 1]\n", + "Globulin [Mass/volume] in Serum by calculation_normal\n", + "[0 1]\n", + "Glomerular filtration rate/1.73 sq M.predicted_abnormal\n", + "[0 1]\n", + "Glomerular filtration rate/1.73 sq M.predicted_normal\n", + "[0 1]\n", + "Glucose_abnormal\n", + "[0 1]\n", + "Glucose_normal\n", + "[1 0]\n", + "Glucose [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Glucose [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Glucose [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Heart rate_abnormal\n", + "[0 1]\n", + "Heart rate_normal\n", + "[1 0]\n", + "Hematocrit [Volume Fraction] of Blood_abnormal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood_normal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood by Automated count_abnormal\n", + "[0 1]\n", + "Hematocrit [Volume Fraction] of Blood by Automated count_normal\n", + "[1 0]\n", + "Hemoglobin A1c/Hemoglobin.total in Blood_abnormal\n", + "[1 0]\n", + "Hemoglobin A1c/Hemoglobin.total in Blood_normal\n", + "[0 1]\n", + "Hemoglobin [Mass/volume] in Blood_abnormal\n", + "[0 1]\n", + "Hemoglobin [Mass/volume] in Blood_normal\n", + "[1 0]\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method_abnormal\n", + "[0 1]\n", + "Hemoglobin.gastrointestinal [Presence] in Stool by Immunologic method_normal\n", + "[0 1]\n", + "High Density Lipoprotein Cholesterol_abnormal\n", + "[1 0]\n", + "High Density Lipoprotein Cholesterol_normal\n", + "[0 1]\n", + "Iron [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron binding capacity [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Iron saturation [Mass Fraction] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Iron saturation [Mass Fraction] in Serum or Plasma_normal\n", + "[0 1]\n", + "Left ventricular Ejection fraction_abnormal\n", + "[0 1]\n", + "Left ventricular Ejection fraction_normal\n", + "[0 1]\n", + "Leukocytes [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Leukocytes [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Low Density Lipoprotein Cholesterol_abnormal\n", + "[0 1]\n", + "Low Density Lipoprotein Cholesterol_normal\n", + "[1 0]\n", + "MCH [Entitic mass] by Automated count_abnormal\n", + "[0 1]\n", + "MCH [Entitic mass] by Automated count_normal\n", + "[1 0]\n", + "MCHC [Mass/volume] by Automated count_normal\n", + "[1 0]\n", + "MCV [Entitic volume] by Automated count_normal\n", + "[1 0]\n", + "Magnesium [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Magnesium [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Microalbumin Creatinine Ratio_abnormal\n", + "[0 1]\n", + "Microalbumin Creatinine Ratio_normal\n", + "[0 1]\n", + "NT-proBNP_abnormal\n", + "[0 1]\n", + "Oxygen saturation in Arterial blood_abnormal\n", + "[0 1]\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported_abnormal\n", + "[1 0]\n", + "Pain severity - 0-10 verbal numeric rating [Score] - Reported_normal\n", + "[0 1]\n", + "Platelet mean volume [Entitic volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Platelets [#/volume] in Blood by Automated count_abnormal\n", + "[0 1]\n", + "Platelets [#/volume] in Blood by Automated count_normal\n", + "[1 0]\n", + "Polyp size greatest dimension by CAP cancer protocols_abnormal\n", + "[0 1]\n", + "Polyp size greatest dimension by CAP cancer protocols_normal\n", + "[0 1]\n", + "Potassium_normal\n", + "[1 0]\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma_abnormal\n", + "[0 1]\n", + "Prostate specific Ag [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "Protein [Mass/volume] in Urine by Test strip_abnormal\n", + "[0 1]\n", + "Protein [Mass/volume] in Urine by Test strip_normal\n", + "[0 1]\n", + "Respiratory rate_abnormal\n", + "[0 1]\n", + "Respiratory rate_normal\n", + "[1 0]\n", + "Sodium_normal\n", + "[1 0]\n", + "Specific gravity of Urine by Test strip_abnormal\n", + "[0 1]\n", + "Systolic Blood Pressure_abnormal\n", + "[1 0]\n", + "Systolic Blood Pressure_normal\n", + "[0 1]\n", + "Total Cholesterol_abnormal\n", + "[0 1]\n", + "Total Cholesterol_normal\n", + "[1 0]\n", + "Triglycerides_abnormal\n", + "[0 1]\n", + "Triglycerides_normal\n", + "[1 0]\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method_abnormal\n", + "[0 1]\n", + "Troponin I.cardiac [Mass/volume] in Serum or Plasma by High sensitivity method_normal\n", + "[0 1]\n", + "US Guidance for biopsy of Prostate_abnormal\n", + "[0 1]\n", + "US Guidance for biopsy of Prostate_normal\n", + "[0 1]\n", + "Urea Nitrogen_normal\n", + "[1 0]\n", + "Urea nitrogen [Mass/volume] in Serum or Plasma_normal\n", + "[0 1]\n", + "pH of Urine by Test strip_normal\n", + "[0 1]\n", + "HER2 [Presence] in Breast cancer specimen by FISH_negative\n", + "[0 1]\n", + "HER2 [Presence] in Breast cancer specimen by FISH_positive\n", + "[0 1]\n", + "Parainfluenza virus 1 RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain_negative\n", + "[0 1]\n", + "Estrogen+Progesterone receptor Ag [Presence] in Tissue by Immune stain_positive\n", + "[0 1]\n", + "Ketones [Mass/volume] in Urine by Test strip_low\n", + "[0 1]\n", + "Ketones [Mass/volume] in Urine by Test strip_medium\n", + "[0 1]\n", + "marital_m\n", + "[1 0]\n", + "marital_s\n", + "[0 1]\n", + "Abuse Status [OMAHA]_no\n", + "[0 1]\n", + "Abuse Status [OMAHA]_severe\n", + "[0 1]\n", + "Interleukin 6 [Mass/volume] in Serum or Plasma_5.33\n", + "[0 1]\n", + "Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain_negative\n", + "[0 1]\n", + "Progesterone receptor Ag [Presence] in Breast cancer specimen by Immune stain_positive\n", + "[0 1]\n", + "Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Influenza virus A RNA [Presence] in Respiratory specimen by NAA with probe detection_positive\n", + "[0 1]\n", + "Parainfluenza virus 2 RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Response to cancer treatment_improving\n", + "[0 1]\n", + "Response to cancer treatment_worsening\n", + "[0 1]\n", + "Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain_negative\n", + "[0 1]\n", + "Estrogen receptor Ag [Presence] in Breast cancer specimen by Immune stain_positive\n", + "[0 1]\n", + "Hemoglobin [Presence] in Urine by Test strip_negative\n", + "[0 1]\n", + "Hemoglobin [Presence] in Urine by Test strip_positive\n", + "[0 1]\n", + "Housing status_homeless\n", + "[0 1]\n", + "Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Influenza virus B RNA [Presence] in Respiratory specimen by NAA with probe detection_positive\n", + "[0 1]\n", + "Objective assessment of cardiovascular disease NYHA_minimal\n", + "[0 1]\n", + "Objective assessment of cardiovascular disease NYHA_mod-severe\n", + "[0 1]\n", + "Objective assessment of cardiovascular disease NYHA_severe\n", + "[0 1]\n", + "Drugs of abuse 5 panel - Urine by Screen method_negative\n", + "[0 1]\n", + "Drugs of abuse 5 panel - Urine by Screen method_positive\n", + "[0 1]\n", + "Leukocyte esterase [Presence] in Urine by Test strip_negative\n", + "[0 1]\n", + "Adenovirus A+B+C+D+E DNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Protein [Presence] in Urine by Test strip_1+\n", + "[0 1]\n", + "Protein [Presence] in Urine by Test strip_2+\n", + "[0 1]\n", + "Protein [Presence] in Urine by Test strip_3+\n", + "[0 1]\n", + "Appearance of Urine_cloudy\n", + "[0 1]\n", + "Capillary refill [Time] of Nail bed_increased\n", + "[0 1]\n", + "Treatment status Cancer_changed\n", + "[0 1]\n", + "Gram positive blood culture panel by Probe in Positive blood culture_positive\n", + "[0 1]\n", + "Glucose [Presence] in Urine by Test strip_2+\n", + "[0 1]\n", + "Respiratory syncytial virus RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Functional capacity NYHA_classi\n", + "[0 1]\n", + "Functional capacity NYHA_classii\n", + "[0 1]\n", + "Functional capacity NYHA_classiii\n", + "[0 1]\n", + "Functional capacity NYHA_classiv\n", + "[0 1]\n", + "Color of Urine_brown\n", + "[0 1]\n", + "Color of Urine_reddish\n", + "[0 1]\n", + "Nitrite [Presence] in Urine by Test strip_negative\n", + "[0 1]\n", + "Tumor marker Cancer_negative\n", + "[0 1]\n", + "Tobacco smoking status NHIS_former\n", + "[1 0]\n", + "Tobacco smoking status NHIS_never\n", + "[0 1]\n", + "gender_f\n", + "[0 1]\n", + "gender_m\n", + "[1 0]\n", + "HIV status_negative\n", + "[0 1]\n", + "HIV status_positive\n", + "[0 1]\n", + "Are you covered by health insurance or some other kind of health care plan [PhenX]_no\n", + "[0 1]\n", + "Are you covered by health insurance or some other kind of health care plan [PhenX]_yes\n", + "[0 1]\n", + "Human metapneumovirus RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "Ketones [Presence] in Urine by Test strip_1+\n", + "[0 1]\n", + "Ketones [Presence] in Urine by Test strip_2+\n", + "[0 1]\n", + "Ketones [Presence] in Urine by Test strip_3+\n", + "[0 1]\n", + "Ketones [Presence] in Urine by Test strip_trace\n", + "[0 1]\n", + "Clarity of Urine_cloudy\n", + "[0 1]\n", + "Clarity of Urine_translucent\n", + "[0 1]\n", + "Stage group.clinical Cancer_earlystage\n", + "[0 1]\n", + "Stage group.clinical Cancer_latestage\n", + "[0 1]\n", + "Rhinovirus RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "HER2 [Presence] in Breast cancer specimen by Immune stain_negative\n", + "[0 1]\n", + "HER2 [Presence] in Breast cancer specimen by Immune stain_positive\n", + "[0 1]\n", + "Smokes tobacco daily_True\n", + "[0 1]\n", + "Parainfluenza virus 3 RNA [Presence] in Respiratory specimen by NAA with probe detection_negative\n", + "[0 1]\n", + "SARS-CoV-2 RNA Pnl Resp NAA+probe_False\n", + "[0 1]\n", + "SARS-CoV-2 RNA Pnl Resp NAA+probe_True\n", + "[0 1]\n", + "Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay_False\n", + "[0 1]\n", + "Influenza virus A Ag [Presence] in Nasopharynx by Rapid immunoassay_True\n", + "[0 1]\n", + "Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay_False\n", + "[0 1]\n", + "Influenza virus B Ag [Presence] in Nasopharynx by Rapid immunoassay_True\n", + "[0 1]\n" + ] + } + ], + "source": [ + "for col in df2.columns:\n", + " print(col)\n", + " print(df2[col].unique())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Other NaN values will be filled with median values." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df2.fillna(df2.median())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Saving medians to fill validation data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Corelations" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = df2.drop('label', axis=1)\n", + "y_train = df2['label']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Correlations between features" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "X_corr = X_train.corr().abs()\n", + "\n", + "high_corr = []\n", + "to_drop = set()\n", + "\n", + "for i in range(len(X_corr.columns)):\n", + " for j in range(i+1, len(X_corr.columns)):\n", + " if X_corr.iloc[i, j] > .98:\n", + " col_1 = X_corr.columns[i]\n", + " col_2 = X_corr.columns[j]\n", + " corr = X_corr.iloc[i, j]\n", + " high_corr.append((col_1, col_2, corr))\n", + " to_drop.add(col_2)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Columns with correlation > 0.98:\n", + "Bilirubin.total [Mass/volume] in Serum or Plasma_abnormal <-> Ferritin [Mass/volume] in Serum or Plasma_abnormal : 0.9959728153860173\n", + "Body Mass Index_abnormal <-> Body Mass Index_normal : 0.9839742527296425\n", + "Calcium_normal <-> Urea Nitrogen_normal : 1.0\n", + "Calcium [Mass/volume] in Serum or Plasma_normal <-> Urea nitrogen [Mass/volume] in Serum or Plasma_normal : 1.0\n", + "Diastolic Blood Pressure_abnormal <-> Diastolic Blood Pressure_normal : 0.9975674506121094\n", + "Diastolic Blood Pressure_abnormal <-> Systolic Blood Pressure_abnormal : 0.9926660729264255\n", + "Diastolic Blood Pressure_abnormal <-> Systolic Blood Pressure_normal : 0.9902281695048288\n", + "Diastolic Blood Pressure_normal <-> Systolic Blood Pressure_abnormal : 0.9902495007996511\n", + "Diastolic Blood Pressure_normal <-> Systolic Blood Pressure_normal : 0.9926925140325418\n", + "Ferritin [Mass/volume] in Serum or Plasma_normal <-> Iron [Mass/volume] in Serum or Plasma_normal : 0.9889332332718642\n" + ] + } + ], + "source": [ + "print(\"Columns with correlation > 0.98:\")\n", + "for i in range(10):\n", + " print(high_corr[i][0], \"<->\", high_corr[i][1], \":\", high_corr[i][2])" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "47" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(to_drop)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will drop one of the features if they are highly correlated." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "X_train.drop(to_drop, axis=1, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Corelations between previously created features and target" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "def get_sorted_correlations(X, cols, target):\n", + " # Calculate correlations with target for each column in cols\n", + " correlations = {}\n", + " for col in cols:\n", + " if col not in X.columns:\n", + " print(f\"Column '{col}' not found in the DataFrame\")\n", + " continue\n", + " if X[col].dropna().nunique() > 1:\n", + " if X[col].dtype == 'object':\n", + " X[col] = X[col].astype('category').cat.codes\n", + " correlation = target.dropna().corr(X[col].dropna())\n", + " correlations[col] = round(correlation, 2)\n", + "\n", + " # Sort correlations in descending order\n", + " sorted_correlations = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)\n", + " \n", + " return sorted_correlations\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Column Correlation with Target\n", + "--------------------------------------------- -------------------------\n", + "Diagnostic Procedures -0.52\n", + "Patient Care Management -0.5\n", + "Metabolic and Endocrine Disorders -0.44\n", + "Other Medications -0.44\n", + "Injection Medications -0.36\n", + "Cardiovascular and Blood Pressure Medications -0.34\n", + "Heart and Cardiovascular Diseases -0.33\n", + "Neurological Disorders -0.33\n", + "Reproductive and Pregnancy -0.33\n", + "Respiratory Disorders -0.32\n", + "Mental Health -0.28\n", + "Pain Relievers and Analesics -0.28\n", + "Therapies and Regimes -0.23\n", + "Surgical Interventions -0.23\n", + "Orthopedic Injuries -0.2\n", + "Oral Medications -0.18\n" + ] + } + ], + "source": [ + "categories_correlations = get_sorted_correlations(X_train, categories, y_train)\n", + "\n", + "print(tabulate(categories_correlations, headers=[\"Column\", \"Correlation with Target\"], tablefmt=\"simple\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Saving medians to fill validation data." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "X_train.median()\n", + "with open('medians.json', 'w') as f:\n", + " json.dump(X_train.median().to_dict(), f)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### First modeling" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "# save x_train and y_train to csv\n", + "X_train.to_csv('../transformed_data/X_train.csv', index=False)\n", + "y_train.to_csv('../transformed_data/y_train.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.neural_network import MLPClassifier\n", + "from sklearn.naive_bayes import GaussianNB, BernoulliNB\n", + "from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier, RandomForestClassifier\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.svm import SVC\n", + "from sklearn.neighbors import KNeighborsClassifier\n", + "from sklearn.ensemble import ExtraTreesClassifier, BaggingClassifier\n", + "from sklearn.naive_bayes import GaussianNB" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "from tabulate import tabulate\n", + "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score\n", + "def calculate_metrics(model, X_train, y_train, X_val, y_val):\n", + " model.fit(X_train, y_train)\n", + " predictions = model.predict(X_val)\n", + " accuracy = accuracy_score(y_val, predictions)\n", + " recall = recall_score(y_val, predictions)\n", + " precision = precision_score(y_val, predictions)\n", + " f1 = f1_score(y_val, predictions)\n", + " auc = roc_auc_score(y_val, model.predict_proba(X_val)[:, 1])\n", + " gini = 2 * auc - 1\n", + " return accuracy, recall, precision, f1, auc, gini\n", + "\n", + "def print_metrics(models, X_train, y_train, X_val, y_val):\n", + " results = pd.DataFrame(columns=['Model', 'Accuracy', 'Recall', 'Precision', 'F1 Score', 'AUC', 'Gini'])\n", + " for model in models:\n", + " accuracy, recall, precision, f1, auc, gini = calculate_metrics(model, X_train, y_train, X_val, y_val)\n", + " results.loc[len(results)] = {'Model': model.__class__.__name__,\n", + " 'Accuracy': accuracy,\n", + " 'Recall': recall,\n", + " 'Precision': precision,\n", + " 'F1 Score': f1,\n", + " 'AUC': auc,\n", + " 'Gini': gini}\n", + " print(tabulate(results, headers='keys', tablefmt='simple'))" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "models = [DecisionTreeClassifier(), \n", + " MLPClassifier(),\n", + " GaussianNB(), \n", + " GradientBoostingClassifier(), \n", + " AdaBoostClassifier(),\n", + " RandomForestClassifier(),\n", + " LogisticRegression(),\n", + " SVC(probability=True),\n", + " KNeighborsClassifier(),\n", + " ExtraTreesClassifier(),\n", + " BaggingClassifier(),\n", + " BernoulliNB()\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Training data\n", + " Model Accuracy Recall Precision F1 Score AUC Gini\n", + "-- -------------------------- ---------- -------- ----------- ---------- -------- --------\n", + " 0 DecisionTreeClassifier 1 1 1 1 1 1\n", + " 1 MLPClassifier 0.984527 0.940122 0.998607 0.968483 0.999251 0.998502\n", + " 2 GaussianNB 0.61649 0.983392 0.395987 0.564617 0.745811 0.491622\n", + " 3 GradientBoostingClassifier 0.994584 0.983392 0.995135 0.989228 0.999602 0.999205\n", + " 4 AdaBoostClassifier 0.992153 0.978147 0.990704 0.984385 0.999619 0.999238\n", + " 5 RandomForestClassifier 1 1 1 1 1 1\n", + " 6 LogisticRegression 0.982317 0.95542 0.974153 0.964695 0.996748 0.993495\n", + " 7 SVC 0.946729 0.789336 1 0.882267 0.963216 0.926432\n", + " 8 KNeighborsClassifier 0.979443 0.923077 0.995287 0.957823 0.998893 0.997786\n", + " 9 ExtraTreesClassifier 1 1 1 1 1 1\n", + "10 BaggingClassifier 0.999005 0.996503 0.999562 0.99803 0.999998 0.999995\n", + "11 BernoulliNB 0.79233 0.956731 0.551524 0.699696 0.964519 0.929039\n" + ] + } + ], + "source": [ + "print('Training data')\n", + "print_metrics(models, X_train, y_train, X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from sklearn import metrics\n", + "def roc_curve_plot(models, X_train, y_train, X_val, y_val):\n", + " plt.figure(0).clf()\n", + " plt.figure(figsize=(8, 8))\n", + " plt.title(\"ROC Curve for Different Models (Training Data)\")\n", + " for model in models:\n", + " model.fit(X_train, y_train)\n", + " y_pred = model.predict_proba(X_val)[:, 1]\n", + " fpr, tpr, _ = metrics.roc_curve(y_val, y_pred)\n", + " auc = round(metrics.roc_auc_score(y_val, y_pred), 4)\n", + " model_name = model.__class__.__name__ \n", + " if model_name == 'Pipeline':\n", + " model_name = model['model'].__class__.__name__\n", + " plt.plot(fpr, tpr, label=f\"{model_name}, AUC={auc}\")\n", + " plt.legend(framealpha=0.0)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Figure size 432x288 with 0 Axes>" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeMAAAHiCAYAAADbK6SdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAACrZElEQVR4nOzdd3gU1dfA8e9NoST03muoaUsvUqVKFaQq0gTpWBGxoPLDzmsjFGmiggkCgiiISBNRpIcQQgkl9F5CQiAk2fv+McmangUSJlnO53lwd6ee2ax79t6ZuUdprRFCCCGEeZzMDkAIIYR41EkyFkIIIUwmyVgIIYQwmSRjIYQQwmSSjIUQQgiTSTIWQgghTCbJWDgkpVRepdQvSqlwpdTSh7jfSKVUlbRiUEpNVUpdUUpdeFgxmU0ptVApNdXOZcOUUm3vYx+5lVIhSqlS9x5hutt9Qyk1L7OXze7i389DSqkSZsfyqJBk7ADiv8BuxyeCC/FffvmSLdNUKbVRKRURnxx+UUrVTrZMAaXUF0qpU/HbOhr/ulga+1VKqfFKqWCl1C2l1Bml1FKllHdWHq+degElgaJa694PujGlVCullDX+fYmMP9YflVINEi+ntc6ntT6eWgxKqfLAK0BtrXWmJg17KKW0UsojnfmD45f5LNn0J+OnL8zyIO/f88AWrfUFpdRvif5OMUqpu4lez76XjWqtP9BaD8vsZe9V/Pt/K/4YriqlNiil+t7D+q2UUmfsXV5rHQ0sACbeT7zi3kkydhxdtdb5AAtQB5iUMEMp1QRYB/wMlAEqA/uAvxO14nIBGwBPoCNQAGgKXAUaprHPL4EXgPFAEaA6sBLofK/BK6Vc7nWdDFQEjmitYzMxlnPx73F+oDFwCPhLKdXGzhgqAle11pfuIyallHoY/78eA/omew8GAkcewr4fxAjgewCt9RPxP4ryAYuBTxJea61HJqyQBZ+5rOYbf0w1gIWAn1LqnSzc3w/AIKVU7izch0igtZZ/OfwfEAa0TfT6E2B1otd/ATNTWe834Lv458OAi0A+O/dZDYgDGqazzGZgWKLXg4GtiV5rYAwQCpwAZgPTkm3jZ+Dl+OdlgOXA5fjlx6ex3/eAu0AMEAk8h/HD8y3gJHAJ+A4oGL98pfhYngNOYbSwkm+zFXAmlel+wK5kx+SRSgwjgNuANf71wvjlGwP/ADcwfiC1Svb+vQ/8Hb+uB1AT+AO4BhwG+iRafiEwA1gNRADbgarx87bEx3Yrfv99UzmWwcBWYC3QOX5aEeAC8GlCzPHTuwEH4uPeDNRKNK8OsCc+hiVAADA10fwuQGD8uv8APql9ljF+BO4CbmJ8Nj9L4+9dIf79cUll3sJk+07ymYuf9iVwOn4/u4HmiZZ/F1iU7HMyKP5zcgV48z6XzQt8C1wHDgKvkcrnK/nnKtm0XsAdjJ4XgCHx24oAjgMj4qe7k/SzF4nx/1JDYFv83+E8xmc5V7J9hAIts/L7S/7Fv9dmByD/MuGPmPQLrBywH/gy/rUbRtJsncp6Q4Dz8c8DgG/vYZ8jgZMZLLOZjJPxHxhf+HmBFvFfiip+fuH4L5EyGMl0NzAZyAVUif/C6ZDGvm1fjPGvhwJH49fLB/wEfB8/L+GL87v4L668qWyvVWpflsDj8V9y7omOySONGJJsAyiL0fPQKf742sW/Lp7o/TuF0VvhAhSMf3+GxL+ui/El7xm//EKMJN0wfv5iICDZ++2R2vuV+O8DPA0siZ82GvgamMp/PyCqYyT1doArRiI5Gv93yYXxg+el+Hm9MH6QTI1fty7Gj6FGgDNGsgoDcqfyWd4GPBv/PB/QOI24OwMH0pi3kJTJ2PaZi582ACga/569gvHjI0/yv2Giz8lcjM+rLxBN/A+Re1z2I+BPjM94OSCIe0/GrkAs8ESi96EqoICWQBRQN63PL1AP48egS3y8B4EXky2zijR+9Mq/zP0n3dSOY6VSKgLjy/oSkNB9VQTji/58KuucBxLOBxdNY5m03OvyaflQa31Na30bowWvgebx83oB27TW54AGGElqitb6rjbOy84F+tm5n2cwWlbHtdaRGN34/ZJ1Vb6rtb4VH4u9zmF8+RW6h3USDADWaK3XaK2tWus/MFqCnRIts1BrfUAbXd0dgTCt9Tda61it9R6MnoJeiZb/SWu9I375xRinLe7VCqCVUqogRhf1d8nm98XoeflDax0DTMNIOE0xvtxdgS+01jFa62XAzkTrDge+1lpv11rHaa2/xUhSjVOJIwbwUEoV01pHaq3/TSPeQhitQXsl/syhtV6ktb4a/57+H5Aboys4Le9prW9rrfdh9Gb43seyfYAPtNbXtdZngK/uIX7i447B+DFWJP71aq31MW34E+PUVPN01t+ttf43/rjDMH50tUy2WAT399kW90iSseN4UmudH+MXcE3+S7LXMVpupVNZpzTG/8xgtMhSWyYt97p8Wk4nPNFaa4wWev/4SU9jJBQwzreWUUrdSPgHvIFxgZQ9ymC02BKcxGgRJF7/NPeuLMYPiBv3sW5FoHeyY2pG0vf1dLLlGyVb/hkg8cVgia/SjsJoUd6T+CS1GqNbv5jW+u9kiyR5L7XW1vg4y8bPOxv/t0yQ+H2vCLyS7BjKx6+X3HMYrfBDSqmdSqkuaYR8HeM8vr2S/J2VUq8opQ7GX9h4A6MHItWLFuPdy3uc1rJlksVxz589pZQrUByjNwSl1BNKqX+VUtfij6MT6RyHUqq6UurX+Is+bwIfpLJ8fu7vsy3ukSRjBxP/i3ghRmsFrfUtjO6+1K4o7oNx0RbAeqCDUsrdzl1tAMoppeqns8wtjG7yBKldQayTvfYHeimlKmJ0ZS6Pn34a4xxfoUT/8mutO2GfcxiJIEEFjC6+i+nEYo8ewJ749/lencboKk98TO5a64/SiOk08Gey5fNprUfdx74z8h1Gl+33qcxL8l4qpRRGQj2L0VtSNn5aggqJnp8G3k92DG5aa//kO9Fah2qt+wMlgI+BZWl8PoOAKvdwQZbtPVVKNce4YrgPUFhrXQgIx+jtyErnMbqnE5S/j210x/gM74i/yGo5xv/3JeOPYw3/HUdqn+1ZGBchVtNaF8D4cZv8uGthtOhFFpNk7Ji+ANoppSzxr1/HuCpyvFIqv1KqsDLu+2yCcaERGF+6p4HlSqmaSiknpVRRZdw7mSLhaa1DgZmAf/xtE7mUUnmUUv2UUq/HLxYI9FRKucXfUvNcRoFrrfdiXKA1D/hda30jftYO4KZSaqIy7t91Vkp5Jb+1KB3+wEtKqcrKuO3rA4zzovdztbVSSpWNv5J1GMaX2P1YBHRVSnWIP5488e9luTSW/xWorpR6VinlGv+vgVKqlp37u4hxztwef2KcE56eyrwfgc5KqTbxrbNXMLqa/8H44RcLjFdKuSilepL0avy5wEilVKP499FdKdVZKZWiZauUGqCUKh7f8r4RPzku+XLx3byhpH3Vf3ryx8d7GXBRSk3GuJMgq/0ITIr/f7EsMNbeFZVSRZRSz2BcrPex1voqxrn63BjHEauUegJon2i1i0DR+FMPCfJjXLQWqZSqCST5URcfVxEgrdMDIhNJMnZAWuvLGC2bt+NfbwU6AD0xfpGfxLjitVl8UkUb9xW2xfil/AfG/6Q7MLqttqexq/EYV2DOwPiyPIbRUvwlfv7nGFcUX8S4cnRxyk2kyj8+lh8SHVMc0BXjHOgJjO71eRhdivZYgPGDY0v8+neAcXaum6CMUirhatSdgDfG1c/r7nE7AGitT2O0bt7A+BI9DUwgjf8vtdYRGF+w/TBapxcwWoz23nryLvBtfPdwnwxi01rrDVrra6nMO4xxvns6xt+hK8atdXe11ncxPmeDMbqP+2JcLJew7i6M88Z+8fOPxi+bmo7Agfj3/Eugn9b6ThrLfg08m94xpeF3jLsKjmD8f3GH+ztdca+mAGcwPovrgWUYP2jSsy/+vTiK8SPwJa31ZLB9NsZjJPnrGKd4ViWsqLU+hPH/1fH4v38Z4NX45SIwfiQtSba/pzEu6swoLpEJEq5aFUKIHCu+m3Yv0EZrnRkXFj5USqlRGD82kl9AZYr493Mf0ELfx33x4t5JMhZCiIdMKVUa45TBNox79lcDflrrL8yMS5gnp41AI4QQjiAXRtd6ZYxTPAEY12CIR5S0jIUQQgiTyQVcQgghhMkkGQshhBAmM+2ccbFixXSlSpXM2r0QQgjx0O3evfuK1rp48ummJeNKlSqxa9cus3YvhBBCPHRKqZOpTZduaiGEEMJkkoyFEEIIk0kyFkIIIUwmyVgIIYQwmSRjIYQQwmSSjIUQQgiTSTIWQgghTCbJWAghhDCZJGMhhBDCZJKMhRBCCJNJMhZCCCFMJslYCCGEMJkkYyGEEMJkkoyFEEIIk2WYjJVSC5RSl5RSwWnMV0qpr5RSR5VSQUqpupkfphBCCOG47GkZLwQ6pjP/CaBa/L/ngVkPHpYQQgjx6HDJaAGt9RalVKV0FukOfKe11sC/SqlCSqnSWuvzmRWkvSLuxGDV97my1sTGRqY5OyY6FuMQhcgacVFRIJ+xnE9rVEza3yUi+9JaY9VxWLUVqzWOOGsclWo2wDVXrizfd4bJ2A5lgdOJXp+Jn/bQkvHzAavZFAYxNx7WHoUQ2YEzcThjJRcxVFIXACioblFOXSEOJxqqQ1wnX6bsq65TKHVVKFcpSFw6nYql1bVM2Z/IHk70/oPKng2zfD+ZkYxVKtNS/XmvlHoeoyubChUqZMKuDf/eMRKxa0Fwif//TmmN0lacrNb4AHX8f53QTs7oRGG75o3GyTWOuLsuaOt/01Xqh5ExTervykPkpJxxUpnx501nH/FfSFasWbofIdCa1nd2MjzyJ8rGXeK6UwHy6SgKWyPsWv22yv3AIbjqWJzR3HTJT7CrR7rLuunbBOWq9sD7NIMG9P1+9933Po39KbO/OJPJGxfN08VLP5R9Zca39RmgfKLX5YBzqS2otZ4DzAGoX79+pv21dXwumNOzAU1dIjjeuUuKZeJqVOZIocc5nzfx9WVWfJtOIbrcWaqcLUrZG4WgxhM4t3ge53zumRVepovcfp6owMvpLnP3RDgAuSoXzNJY3CzFydfo4XxYxSPs3USfY1d38pXxgPyl4U44FK8BbkXAaoVSXsYyLrmhSBVwzgUFypA32ebCo8OJtKMr+eKti0TcTZnwC9sRcoNEz4+FH8OqU//Ruv/yfs5EnuHI9SMAOKn0L+VJazsAlW9Wpvyt/76OFQqlVNLH1KbFPzopJ3LfNH64xBSMSTEPSHPdjLabfDoYP+gTplWtVRVPiycuygUXJ+Ofq5Or7XlG70tOlxnJeBUwVikVADQCwh/6+eL4tO4UF8vx7l1sky62fAJrsQqciahE5N0CtsUtbcvT9CkPjoS+x5kzZ6lc+UUqPz4uU0OyJ2HeL3sSba7KBSVRihwn1hrLjegbgJEwt57dSsVzwbSKn/9b29c4ka8wx24cI3+u/PzXDogAZ+Dq9v82dnFLiu3vvribsJthWRb//ap8szItb7XE3dU9/rjS56ScyO2cG40mr0teWy/VxasXAahYseL9B1MYvL29qV+//v1vQ9yzDJOxUsofaAUUU0qdAd4BXAG01rOBNUAn4CgQBQzJqmDTknDNy4Xnn6cEcK1YdfZ5jUXjDFchb4Fc5M/vRLUGJWnUrQpOTopjx/6PM2e+p0L556hcaew97c/slqkkWpEl4mIh9naKyXfjYoiKieKvs1u4fuc6J26Gkc/1/nuOdl7YSdG8RVNMzxMTzcULewFwQvPytRt4oagXHQ1AnzKlOHgswLZ8Xpe85HfNOHElFqfjcHVypVbRWnSo2IECuQuku7zWmgK5C1DKrZRt2vHg45w6cuqe9qvR5HbOjbOTc6rzT101tlex5AMkUYwkLIk0Z7Lnaur+GczXwJhMi+gBKK2xKhcCvV4AwMlFMeTjZuRxd02yXNjJrwk7OZMyZfri4THJ1mWSXFpJV1qmItvSGqxxcO0YnN0DCV17Z3eBa3xnbVwM/DsT8peB+OQQp6043zyb6iZzxf/rmvXRpyqmjAVrSW/mdnzfNi23c27yuOR5oO3u2rWL/Tv2Z7jcDW5wiv+S78mTJ4EHbH0mI0lUZO0VPia43qw5ALWbl6H1MzVTzD9zZhHHjn1CyZJdqVnjf0kScfLkm1bSlUQrso3rJyHuLlw6CNtmwOl/01081skFJ2v8tcAR59hTphanI06jtcbF3Y3bTk6EuSb9WnB1cqVOCQvurvmoXrg6uZxzkcf5wS+ISpWTCxSLv/DJ1Q3KN8bVyfhBkbDHXbt2sX9/xkk0I/ebVCVxiqzgUMn4ZoGKXHbuBUCLPtVTzD9/fgWHj7xDsWJtqF3rU5QyWgUJSTh58pWkK7LUlVC4diL+hYbAH4yWqpNrymWProeoK8bzhPnWmNS3W7kFVGoOeQtD1ce5eucag9YO5pSzQqfoBboFuYsAMNoymuqFq1MpVwHK5StH6Xzmfe7/S7iHU8zLrJapJFWRnThUMj5apQfl48C7dTmcXZNeeXfp8u8cPDSRwoWbUDnqDa7MPWiblzgJS/IVdouOhNhoiLwIUVdTzr+wH87sgDyJelbiYuHEn0Z38s0zqW+3UCpJxtXNeGw08r/nAHduQIWmoBQUqgDlGhjPE5n1rz+nXV2Y1WYmTco0SfX2kbRO1Tyo+23FppdwJYkKR+QYyTjxTVIKWvRN2iq+evUvgoNfxF3VpPTOcdw8boxRIi1gkaHoSKML+OpR2DXfaJVeDYVb93ilfL6SxqM1zuhWzlMQiteEBsOgTPztds4uUMLTeMwkZyPPsjx0Ob2q9eKxso9l2naTSyvpSlewEPZxjGScSMfnvZK8vnFjF0H7R+LuXoUK+17Hei5Okq8wREfC7GZGqzZPoRQtSgBunEw5rWx9cMkD5epDSS/Ilc9IrgXLJVtQQ7HqkL9Uym08JF/v+xonnBjuMzzDZR/kXGxaSVeSqhD2cbhkXLVOCdvzmxHBBO57jly6OOWCJmI9C66l81FihI+JEYqH5k44HNtkXBSUwBoLZ3Yard1jG/6bXr4RuKW83YbyDY3WsFdPI6mW8s76uO9RWkn0Ttwdrl25Rne37vz2428ZbudBzsVK0hXiwThUMnbnvzFhb906SmDgEJyt7pTZ/CI6WpGrcj7cLMVNjFBkqZjbELwcfr6HO+0qt4BnloNL1g8Ef6/sbammlUTPRZ5DKUUpd/ta5pJQhTCPQyTjhM5FN+sNAG7fPs3evQMhVlF2y8u4RhelUA8P6ZZ2BOFnYU5LKOqB7S9/6h+jq/huouENS3qDx+PGhVLevZNuw9UNClcy7sF1yl5D7CVOwPa2VFNLokevH+XzVZ8z1Gsoz9d7PusCFkJkCodIxgnyud5E6zgC9w0h9m4U5f95jVy3S0oizoluXTG6ki8dhOOb4OY5o4v5YnD8/MvG7TsAFZtBXDSU9jXO5VbvCJWbmxd7BtJr8SZOwA/SUp25byburu4M8XroA+IJIe6DYyTj+Kuprbhw+/YZoqJOUObSCHJHlpdEnFPcPGdcSHUuEHZ8bdwWlFzhSlCwPFRrB50/S/2CK5PZ07Wc1bfthFwN4Y+TfzDadzQFc2dtoRAhROZwiGRs66Z2uklU1DEAXE4UI1flgpKIs6Orx4xBLK4cgfAzRuJNbShGr15g6Q+lfCFf9jvXn1ritadrOavPzc4InEHB3AUZUHtAlmxfCJH5HCIZJxQQtsYpbt0KBSDXrTK4PZ79vsAfSREXYZ+/kXSDl6Wc7x5/BXz7940rlis0TuU2oYfjXm7vSS3xmn0RVOClQLac2cKLdV+0q/qPECJ7cJBkbHDLc5tbt47hEluYvOXLSKs4O4iOhP9LOTQp3WdAldZG8k2jkk1Wu9+WbQKzE29q/AL9KJKnCP1rplvfRQiRzThUMlZobl4KIVe4eYMsPLKscXD5MJzbayTXuLuw53tjOMgEE08ag2Nkk6EXs2PL9kHsOL+D7ee3M7HBRNwSD5kphMj2HCoZazS3Y8IocKup3E/8sMTehXVvGRddpaVqG+j7PeS69xq4D9ptnJ6cnHiT01rjF+hHCbcS9K7RO+MVhBDZimMl41wxWJ1v45ZHrqDOErF34dhGo2V79xb88iJEh/83v0prqD8EitUwBtFwdYf8Je3evCN2Gz8sf5/7m72X9vJ247fJnVXlDYUQWcahknFs3jsA5L5rzsU/DinqGpz8G0LXwZ7vUl+mWgd4cha4pzKcZDL23mOb4FFOsPbSWjN973TK5itLD48eZocjhLgPDpWM4+KTcR5JxvazWiEmyjjfu+dbOL0dLh8yauHevp76OsM3Go+58kPxVC7OSkVCEpbSeJlv4+mNhFwN4X+P/Q9X51RqIQshsj2HS8bOcflwjpOBDtJ08QCcD4KQn41Rq45tTLmMSx4oUA48exgt43L1jTq5ZetBGl/2GZ3bTZyEJeFmHqu24rfXj0oFKtGlShezwxFC3CeHSsbWvFZy3SyTavF0Aax7G/75Kum0Ut7gkheqtDIqFFVqDq55kiyya9cu9m/YDxxMc9MZnduVJJw1fg/7naM3jvJJi09wcXKo/52FeKQ41P+9sXmjyBVZRq6kBji9E/YvhegIQMOh1RB905jX8nXw6WMMLZlKtaLkrdzsMKqUSCnWGsvMwJl4FPKgQ6UOZocjhHgADpWMrS53cFNVHs0rqe/eMlq+xzbA9bCk8wqUIwZnYp3y8U/RvpwKKwVhW9LcVPLkK4k2e/r1+K+E3Qzji9Zf4KSyV/UpIcS9cahkDJD7bnmzQ3j4Dq2BgEQjLhUsT6RTAba6tuRy7grEOuX5L8G6yS1CjiAmLobZ+2ZTu2htHi//uNnhCCEekMMl40fmSupbVyFwMfzxtm1SpHMhfir7JnHKNVHrNk/8oyRYR7Li6ArORp7lrcZvobJh9SohxL1xqGTsFJcLl9iM73XNSZKfv3WxRlPnxm94RWxOstxyOnKzXEfba0m+jutO7B2+3vc1dUrU4bEyj5kdjhAiEzhUMna+WwB3Swmzw7gvad0adPLkSZyJ5WXn78kbdxMnrLZ5t5wL8nPpCUQ755PE+whZemQpl25f4qMWH0mrWAgH4VDJWFlVjrp4K3ECTn7RlNJxVIvcwRCWGAvHxa9UygcqNIFWr+PuVoSnH3bQwlRRMVHM2z+PRqUb0aBUA7PDEUJkEodKxljNKcWX4EGqBtm6lWtWgHlt4MappAv7Pg3d/UwrNyiyhx8O/cC1O9cYaxlrdihCiEzkUMlYWR/+4aTXus1IivO6K0fDr4v/W6BGZ2j0vDEgh3jkRdyN4Jvgb2hRrgWWEhazwxFCZCJJxnZK75wuJGvd3su52/3L4MMe/w3IAWB5BrrPyLK6vyJn+j7ke27evckYyxizQxFCZDLHSsY68w7H3lGoHuiq5VP/wvLnjOf5y0Dl5tDqdShS5b7jFo7pxp0bfBfyHe0qtqN20dpmhyOEyGQOkYyNK4ydUXEPfj41repCmXqrkNZGOcJfxhuvW00ykrAQafjmwDdExUQx2ne02aEIIbKAQyRjlZCMSTnO8r3av38/Fy5cyLr7dMP+hoWd/ntdrYMkYpGuK7ev8MPBH+hUpRMehT3MDkcIkQUcIhknUCrvfa2XuEv6woULlCpViiFDhmRmaAar9b9E7FYUBiyHMnUyfz/CoczfP58YawyjfEeZHYoQIos4VjK+x3PGqXVJlypVCm9v78wPbv+y/84PA7x2PPP3IRzOhVsXWHJ4Cd09ulOxgH1X6Qshch4HScYayPhq6vQuysqSLunoCDi7G5YOgdvX/pv+2onM3Y9wWHOC5qDRjPAZYXYoQogs5BDJOOEGIGV1TXX+Q7koK7E74fD7G7B3UdLpI7dCqSxodQuHdDriNCtCV9Crei/K5CtjdjhCiCzkEMk4IRun1jLetWsXv/76K/AQiyd8VOG/5/lKQb/FUKYuOEnNWWG/2ftm4+zkzHCf4WaHIoTIYg6VjIk/Z5zaqFhdunTJ+iQcHQHTavz3evI1Gb5S3Jfj4cf59fivPFvrWUq45cziJ0II+zlGMo53zXqLb7755sFHxbofWsOHiWopj9sjiVjct9mBs8ntnJuh3kPNDkUI8RA4RjKObxlf11FEZOU9wmmxWuHr5v+9fueGDGUp7tvha4f5Lew3hnsPp0ieImaHI4R4CBwjGceLJJqapapmzT3CabkTnvQc8eunJBGLBzIzcCb5XfMzyHOQ2aEIIR4Sh0jGttSnVdbcI5ya8LOwfBic+ue/aeP3Qp6CD2f/wiEduHKAjac3MtYyloK55bMkxKPCIZJxAnfyZn3XdNBSWDkSrLHGa+UMpX3g+c1Zu1/xSJgeOJ1CuQsxoPYAs0MRQjxEDpGMrc7xF0rpLO4ePvI7/DTsv9d1BhilDoXIBHsu7uHvs3/zSr1XcHd1NzscIcRD5BDJWCvj/t0i5Mu6nax+BXbOM54/vRSqt8+6fYlHjtaa6XunUyxvMfrW7Gt2OEKIh8wxRqGIbxAXV4WyZvund/yXiDtNg2rtsmY/4pG1/cJ2dl3cxXDv4eR1ub+CJ0KInMshWsY2Ogt+W1zYD/Pjk2+f76F2t8zfh3ikJbSKS7mXolf1XmaHI4QwgWO0jOMLRSS6rjpzbJkGs5sZz51zSyIWWeKvs38RdDmIET4jyOX84DW5hRA5j0O1jFVmXsClNWz8n/G8+wzjYi0hMplVW/Hb60e5fOXo7tHd7HCEECZxkJZxgkxKxtY4mF7PeO7ZQxKxyDIbTm3g4LWDjLaMxtUp9apjQgjH51At40wzJdEQhD3nmReHcGhx1jhm7J1B5YKV6VS5k9nhCCFM5GAt40ywYcp/z984B87ye0Vkjd/CfuNY+DHGWMbgLEVFhHikSTJO7Oox+Ov/jOfDN0EuGXhBZI0YawyzAmdRo3AN2lWUW+WEeNRJMk5sel3jsVJzKFvX3FiEQ/vl2C+cijjF2DpjcVLyv6EQjzr5FkhgjTMelTMM/tXcWIRDuxt3l9n7ZuNdzJuW5VqaHY4QIhuQZJxgdnw94sajzI1DOLzlocs5f+s8Y+uMRUm5TSEEkowNq8bDpQPG86bjzY1FOLTbsbeZEzSHeiXr0aR0E7PDEUJkE5KMtYY93xrPn98M+UuaGo5wbD8e/pErt68w1iKtYiHEfyQZrxhpPBauDGXqmBuLcGi3Ym4xf/98mpZpSv1SWVx3WwiRozzayfjyEQgKMJ6P+NPcWITDW3xwMdejrzPWMtbsUIQQ2cyjm4y1hhkNjOf1h0KegubGIxxaeHQ4C4MX0qp8K7yLe5sdjhAim3l0k/G06sZjserQ5XNzYxEO77uQ74iIiZBWsRAiVQ6VjO2+HCY6Em5dMp4P35hV4QgBwLU711gUsogOlTpQo0gNs8MRQmRDDpWM7bago/HY9l3Ind/UUITj+yb4G+7E3WG072izQxFCZFOPXjLWGi7uN543GGZuLMLhXY66jP8hf7pU6UKVQlXMDkcIkU09esn4x2eNxyZjpVUsstzc/XOJs8Yx0mek2aEIIbKxRy8ZH/zFeGw6ztw4hMM7F3mOpUeW8mS1JylfoLzZ4QghsrFHKxmHnzEeC1WA/KXMjUU4vDlBc1AoRviMMDsUIUQ292gl400fGI+PvWBuHMLhnbp5ipVHV9KnRh9KucsPPyFE+uxKxkqpjkqpw0qpo0qp11OZX1Ap9YtSap9S6oBSakjmh5oJAhcbjzW7mhuHcHiz9s3C1cmVYd5ykaAQImMZJmOllDMwA3gCqA30V0rVTrbYGCBEa+0LtAL+TymVK5NjfTAXQ4zHWl2lGITIUsduHGP18dX0r9WfYnmLmR2OECIHsKdl3BA4qrU+rrW+CwQA3ZMto4H8yihDkw+4BsRmaqQPascc49Gzh7lxCIc3M3Ambq5uDPUcanYoQogcwp5kXBY4nej1mfhpifkBtYBzwH7gBa21NVMizCwn/zYevZ4yNw7h0A5dO8S6k+t4tvazFMpTyOxwhBA5hD3JOLVRJnWy1x2AQKAMYAH8lFIFUmxIqeeVUruUUrsuX758j6E+gLhYuHIE8kn3tMhaM/bOoECuAjxb+1mzQxFC5CD2JOMzQOKbJMthtIATGwL8pA1HgRNAzeQb0lrP0VrX11rXL168+P3GfO9CVhqPJb0e3j7FIyfochCbz2xmiNcQCuRK8VtUCCHSZE8y3glUU0pVjr8oqx+wKtkyp4A2AEqpkkAN4HhmBpoeldFh/DPdeGz3XtYHIx5Zfnv9KJKnCE/XfNrsUIQQOYxLRgtorWOVUmOB3wFnYIHW+oBSamT8/NnA/4CFSqn9GN3aE7XWV7Iw7iSM68Y0cW7Je8/j3TwHuQtAKakjK7LGzgs72XZ+GxPqT8DN1c3scIQQOUyGyRhAa70GWJNs2uxEz88B7TM3tHtndU8jGd+6JIlYZBmtNX57/SiRtwR9avQxOxwhRA7k+CNwRcbXLS5W3dw4hMPadm4bey7tYbjPcPK45DE7HCFEDuT4yXjHXOOxTF1z4xAOSWvN9L3TKeNehp7VepodjhAih3L8ZBx7x3isO9DcOIRD2nx6M8FXgxnpO5Jcztlr0DkhRM7h+Mk45rbxKLWLRSazait+gX5UyF+BrlVlvHMhxP1z7GRstcLO+G5qldrYJULcv3Un13Hk+hFGW0bj4mTXtZBCCJEqx07GJzabHYFwULHWWGbsnYFHIQ86VupodjhCiBzOsZPxmd3G4/ObTQ1DOJ41J9YQdjOMMZYxODs5mx2OECKHc+xk7BzfdVishrlxCIcSY41hZuBMahWpRZsKbcwORwjhABw7GQf/ZDy65DY3DuFQVh5dydnIs4ytMzZ+9DchhHgwjpWMk38vRpw3HqUbUWSS6Lhovt73Nb7FfWletrnZ4QghHIRjJePk7t6CQhXNjkI4kGVHlnEx6iLj6oyTVrEQItM4bjKOuAAxUVCogtmRCAcRFRPF3KC5NCzVkEalG5kdjhDCgThuMj74i/FYq5u5cQiHEXA4gKt3rjK2zlizQxFCOBjHTcaBi43HWjIyknhwkXcjWRC8gGZlm1GnRB2zwxFCOBjHTcbn9hqPBUqbG4dwCN8f/J7w6HBpFQshsoRjJuOb54zHcg3MjUM4hPDocL478B1tKrTBs6in2eEIIRyQYybj8DPGY73BpoYhHMPCAwu5FXOL0ZbRZocihHBQjpmMEy7eyl/K3DhEjnf19lUWH1xMx8odqV64utnhCCEclGMm47u3jMdKMiiDeDDzg+cTHRfNaF9pFQshso6DJuNI41GGwRQP4OKtiyw5tIRuVbtRqWAls8MRQjgwx0zGUdcgfxmzoxA53Nz9c7FiZaTvSLNDEUI4OMdMxkf/AG01OwqRg52JOMPy0OU8Ve0pyuYra3Y4QggH55jJGKCAtIzF/fs66GuccGK493CzQxFCPAIcLxlrbTxWa2duHCLHCgsPY9WxVfSt2ZeS7iXNDkcI8QhwvGR886zxGB1hbhwix5q5bya5nXPznNdzZocihHhEOF4yjrtrPJbyMTcOkSOFXg9l7Ym1PFPrGYrmLWp2OEKIR4TjJeNbV4xHa4y5cYgcaWbgTNxd3RnsOdjsUIQQjxDHS8YXgozH/FIgQtybA1cPsP7UegZ6DqRg7oJmhyOEeIQ4XjJ2cjUeS9QyNw6R48zYO4OCuQvybK1nzQ5FCPGIcbxknHB/sXI2Nw6RowReCuSvs38x1Gso+XLlMzscIcQjxoGTseMdmsg6fnv9KJqnKP1q9DM7FCHEI8jxMpYkY3GPtp/fzvYL2xnuMxw3VzezwxFCPIIcL2MlDPohyVjYQWuN314/SrqVpFf1XmaHI4R4RDlexrp93XhUytw4RI6w9exWAi8H8rzP8+R2lipfQghzOFgyVkaRCADpbhQZ0Fozfe90yuYrSw+PHmaHI4R4hDlYMua/7mnXPObGIbK9jac2cvDaQUb5jsLV2dXscIQQjzDHS8ant0OFpmZHIbK5OGscfoF+VCpQic5VOpsdjhDiEedQydjZGj8udcJ5YyHS8HvY7xy9cZQxljG4OLmYHY4Q4hHnUMk4X2z8uNQ+vc0NRGRrsdZYZu6bSbXC1Whfqb3Z4QghhGMl41zW28aTghXMDURka78c+4WTN08y1jIWJ7kFTgiRDTjUN1H+mMvxT0qZG4jItmLiYpi9bzaeRT1pXb612eEIIQTgYMnYmjAetVRsEmn4KfQnzt06x7g641ByL7oQIptwqGTspOOMJ3KbikjFndg7zAmaQ90SdWlaRq64F0JkHw6VjPPHxndTO+cyNxCRLf14+Ecu3b7E2DpjpVUshMhWHCoZx6r4JOxW1NxARLYTFRPF/OD5NC7dmAalGpgdjhBCJOFQybhgzAXjiXRTi2R+OPQD1+5cY2ydsWaHIoQQKThUMr7rlNd4Il2QIpGbd2+yIHgBLcu1xLe4r9nhCCFECg6VjJXW4F7C7DBENvN9yPdE3I1gjGWM2aEIIUSqHCsZo6WOsUji+p3rfB/yPe0qtqNW0VpmhyOEEKlyqMylsEoyFkl8c+AbomKipFUshMjWHCxzaXByNjsIkU1cuX0F/4P+dK7SmaqFqpodjhBCpMmhkrFtbGohgHn75xFjjWGU7yizQxFCiHQ5VO24AjGXwBptdhgiG7hw6wI/Hv6RJz2epEIBKRwihMjeHKplXDDmogz4IQD4OuhrAEb4jDA5EiGEyJhDJeMo54KSjAWnb55mZehKelXvRel8UjRECJH9OVQy1jhBsWpmhyFMNjtoNs5Ozgz3Hm52KEIIYReHSsbucdfNDkGY7Hj4cX49/iv9a/anuFtxs8MRQgi7OEwydiXWeHL3lrmBCFPNCpxFHuc8DPEaYnYoQghhN4dJxvmJMp4Uq25uIMI0h68dZm3YWgbUHkCRPEXMDkcIIezmMMm4mAo3nhQoY24gwjQzAmeQP1d+BnkOMjsUIYS4Jw6TjIuoCOOJSx5zAxGmCL4SzKbTmxjsOZgCuQqYHY4QQtwTh0nGTliNJ4UrmhuIMIXfXj8K5y7MM7WeMTsUIYS4Zw6TjF2IM54oGZv6UbP74m7+Pvc3z3k/h7uru9nhCCHEPXOYZFxC3TCeODnUCJ8iA1prpu+dTvG8xelTo4/Z4QghxH1xmGRs4yrnjB8l/57/l90XdzPcZzh5XfKaHY4QQtwXh0nGtm7qPIVMjUM8PFpr/Pb6Ucq9FE9Ve8rscIQQ4r45TDKuqs4ZT+Rq6kfGljNbCLoSxEifkeRyzmV2OEIIcd8cJhlH6vguSjcZ7OFRYNVW/AL9KJ+/PN08upkdjhBCPBCHScaFE+4zVsrcQMRDsf7keg5dO8Qo31G4OrmaHY4QQjwQh0nGxdRNs0MQD0mcNY4ZgTOoUrAKnSp3MjscIYR4YHYlY6VUR6XUYaXUUaXU62ks00opFaiUOqCU+jNzw8xYlM7NXSe5mvZRsObEGo6HH2eMZQzOTnJfuRAi58vwplyllDMwA2gHnAF2KqVWaa1DEi1TCJgJdNRan1JKlciieNPkrOK46+SGXMbj2GKsMczaN4uaRWrStmJbs8MRQohMYU/LuCFwVGt9XGt9FwgAuidb5mngJ631KQCt9aXMDTNjFdQltOP0uos0rDq6itMRpxlrGYuTkr+3EMIx2PNtVhY4nej1mfhpiVUHCiulNiuldiulBmZWgPaK0S7kjQt/2LsVD9HduLvMDpqNTzEfWpRrYXY4QgiRaexJxqldnqyTvXYB6gGdgQ7A20qpFIWFlVLPK6V2KaV2Xb58+Z6DTU8cTlzPJeUTHdmyI8u4cOsCY+uMRclV80IIB2JPMj4DlE/0uhxwLpVl1mqtb2mtrwBbAN/kG9Jaz9Fa19da1y9evPj9xpwqhZZuagd2O/Y2c/fPpX7J+jQu3djscIQQIlPZk712AtWUUpWVUrmAfsCqZMv8DDRXSrkopdyARsDBzA01fU4pGuvCkSw5tIQrt69Iq1gI4ZAyvJpaax2rlBoL/A44Awu01geUUiPj58/WWh9USq0FggArME9rHZyVgSenAJ1qj7rI6W7F3GJ+8HweK/MY9UrWMzscIYTIdHbVG9RarwHWJJs2O9nrT4FPMy+0e+OkrKR+elvkdItCFnEj+gZj64w1OxQhhMgSDnWSVUv3pcMJjw7n2wPf0rp8a7yKeZkdjhBCZAmHScZOSMvYEX174FsiYiIYYxljdihCCJFlHCYZyzljx3PtzjUWHVxEx0odqVGkhtnhCCFElnGYZFxdnc54IZGjLNi/gOi4aEZZRpkdihBCZCmHScZXdEFyWyPNDkNkkktRlwg4HECXKl2oUrCK2eEIIUSWcphkDHDTtaTZIYhMMjdoLnHWOEb6jjQ7FCGEyHIOk4wVGgc6nEfauchzLAtdRo9qPSifv3zGKwghRA7nMNnLCS1jcDmIr4O+xgknnvd53uxQhBDioXCYZGy0jOVq6pzu5M2T/Hz0Z/rU6EMp91JmhyOEEA+FwyRjJ6Xl1iYHMGvfLHI55+I57+fMDkUIIR4ah0nGICNw5XTHbhxjzfE19K/Zn2J5i5kdjhBCPDQOk4xlBK6cb0bgDNxc3RjiOcTsUIQQ4qFymGQsaThnO3j1IH+c/IOBtQdSKE8hs8MRQoiHymGScUFuyTnjHGxG4AwK5CrAs7WfNTsUIYR46BwmGbupaHJbb5kdhrgP+y7v488zfzLEawj5c+U3OxwhhHjoHCYZx2hnopwLmR2GuA9+e/0okqcIT9d82uxQhBDCFA6TjK04EadymR2GuEc7L+zk3/P/Msx7GG6ubmaHI4QQpnCYZIyMwJXjaK3x2+tHCbcS9KnRx+xwhBDCNA6TjBUa5D7jHOWfc/+w59Ienvd+ntzOuc0ORwghTONAyVjkJFprpu+dThn3MvSs1tPscIQQwlQOk4xFzrLp9CYOXD3ASN+RuDq7mh2OEEKYymGSsULGps4prNqKX6AfFQtUpGvVrmaHI4QQpnOoZCyd1TnDurB1hF4PZbTvaFycXMwORwghTOdAyVjkBLHWWGYEzsCjkAcdK3c0OxwhhMgWHCYZSwnFnGH18dWE3QxjrGUsTsphPn5CCPFA5NtQPDQxcTHM2jeLWkVq8XiFx80ORwghsg3HSMY6YbgPaRlnZyuOruBs5FnG1RmHknvChRDCxiGSsYofe0vL93u2FR0XzddBX2MpbqFZ2WZmhyOEENmKgyTjlM9E9rL08FIuRV2SVrEQQqTCQZKxdFNnZ1ExUczdP5dGpRrRsHRDs8MRQohsxyGScQIpFJE9+R/y59qda4ytM9bsUIQQIltyiGSsJA1nWxF3I1gQvIDmZZtjKWExOxwhhMiWHCIZ57NGAZAr/lFkH4tCFnHz7k3G1BljdihCCJFtOUQyThDpWszsEEQiN+7c4LuQ72hboS2eRT3NDkcIIbIth0rGIntZeGAht2JuMdoy2uxQhBAiW5NkLLLEldtX+OHQDzxR+QmqFa5mdjhCCJGtOUQyVlou4Mpu5u+fz924u4zyHWV2KEIIke05RDIW2cuFWxf48fCPdKvajUoFK5kdjhBCZHuSjEWmmxs0FytWRviOMDsUIYTIESQZi0x1JuIMP4X+xFPVnqJsvrJmhyOEEDmCQyRjGfQj+5i9bzbOTs487/O82aEIIUSO4RDJOIGWsalNdSL8BL8c/4W+NfpSwq2E2eEIIUSO4VDJWJhr1r5Z5HbOzVCvoWaHIoQQOYokY5Epjlw/wtoTaxlQawBF8xY1OxwhhMhRHCIZS+e0+WYGziSfaz4GeQ4yOxQhhMhxHCIZ/0fSshkOXD3AhlMbGOg5kIK5C5odjhBC5DgOloyFGfz2+lEodyEG1BpgdihCCJEjSTIWD2Tvpb1sPbuVoV5DyZcrn9nhCCFEjuQQyVjuMzaP314/iuYpSr+a/cwORQghciyHSMbCHNvPb2fHhR0M9xlOXpe8ZocjhBA5liRjcV+01kzfO52SbiXpVb2X2eEIIUSO5iDJWLqpH7a/zv7Fvsv7GOE7gtzOuc0ORwghcjQHScYGGQ7z4dBa47fXj3L5yvGkx5NmhyOEEDmeQyVj8XBsOLWBg9cOMsoyClcnV7PDEUKIHE+SsbgncdY4ZgTOoHLBynSu3NnscIQQwiE4RDKWzumHZ23YWo7eOMpoy2icnZzNDkcIIRyCQyTjBEqycpaKtcYyM3Am1QtXp33F9maHI4QQDsOhkrHIWr8c+4VTEacYaxmLk5KPjhBCZBb5RhV2uRt3l1n7ZuFV1ItW5VuZHY4QQjgUh0jGMhxm1vsp9CfO3zrPuDrjUHI+QAghMpVDJOMEcp9x1rgTe4c5QXOoW6IuTco0MTscIYRwOA6VjEXWWHJ4CZdvX5ZWsRBCZBFJxiJdUTFRzN8/nyalm1C/VH2zwxFCCIfkEMlY2mpZZ/HBxVyPvs7YOmPNDkUIIRyWQyTj/0hazkw3797kmwPf0KpcK3yK+5gdjhBCOCwHS8YiM3134Dsi7kYwps4Ys0MRQgiHJslYpOr6net8H/I97Su2p2aRmmaHI4QQDs0hkrHScp9xZvsm+BvuxN1hjEVaxUIIkdUcIhmLzHU56jL+h/zpXLkzVQpVMTscYSJnZ2csFguenp74+vry2WefYbVa72tbkydPZv369WnOnz17Nt999909b/f333/HYrFgsVjIly8fNWrUwGKxMHDgwPuKM7Fp06ZRs2ZNvLy88PX1tcXXqlUrdu3a9cDbB9i1axfjx48HIDo6mrZt22KxWFiyZAnDhg0jJCQkU/aToHv37jRpknS8gMGDB7Ns2bIk0/Lly2d7fuTIETp16oSHhwe1atWiT58+XLx40a79vfnmm5QvXz7J9lLz4Ycf4uHhQY0aNfj999/tPBoHorU25V+9evV0Znli/rdav1NA/zt9ZKZt81H2wb8faN9vffWp8FNmhyJM5u7ubnt+8eJF3aZNGz158mQTI0pfy5Yt9c6dO1NMj42NvedtzZo1S7dv316Hh4drrbW+ceOGXrhwYbr7eVDbtm3TLVq0uO/1MzrO69ev63LlyumaNWvq48eP26YPGjRIL126NMmyCX/727dvaw8PD71q1SrbvI0bN+r9+/fbFdO2bdv0uXPnknyWkjtw4ID28fHRd+7c0cePH9dVqlS5r79ZTgDs0qnkRLtaxkqpjkqpw0qpo0qp19NZroFSKk4p1SvTfi2Ih+p85HmWHlnKkx5PUr5AebPDEdlIiRIlmDNnDn5+fmitiYuLY8KECTRo0AAfHx++/vpr27KffPIJ3t7e+Pr68vrrxldG4tbX66+/Tu3atfHx8eHVV18F4N1332XatGkABAYG0rhxY3x8fOjRowfXr18HjBbpxIkTadiwIdWrV+evv/5KM95KlSoxZcoUmjVrxtKlS1m3bh1NmjShbt269O7dm8jISAB2795Ny5YtqVevHh06dOD8+fMAfPDBB8ycOZMCBQoAULBgQQYNGpRiP6NGjaJ+/fp4enryzjvv2KandoxLly61tbJbtGgBwObNm+nSpQuXLl1iwIABBAYGYrFYOHbsWJIWeFrxJz/O9CxfvpyuXbvSr18/AgIC0l02wQ8//ECTJk3o2rWrbVrr1q3x8vKya/3GjRtTunTpdJf5+eef6devH7lz56Zy5cp4eHiwY8cOu7bvKFwyWkAp5QzMANoBZ4CdSqlVWuuQVJb7GHjo/QsyNnXm+TrI+EId4TPC5EhEYu/9coCQczczdZu1yxTgna6e97ROlSpVsFqtXLp0iZ9//pmCBQuyc+dOoqOjeeyxx2jfvj2HDh1i5cqVbN++HTc3N65du5ZkG9euXWPFihUcOnQIpRQ3btxIsZ+BAwcyffp0WrZsyeTJk3nvvff44osvAIiNjWXHjh2sWbOG9957L92u7zx58rB161auXLlCz549Wb9+Pe7u7nz88cd89tlnTJo0iXHjxvHzzz9TvHhxlixZwptvvsmXX35JREQEVatWzfA9ef/99ylSpAhxcXG0adOGoKAgypUrl+oxTpkyhd9//52yZcumOO4SJUowb948pk2bxq+//ppk3pUrV5g6dWqK+CdPnpzkODPi7+/PO++8Q8mSJenVqxeTJk3KcJ3g4GDq1auX6rzDhw/Tt2/fVOdt3ryZQoUKZbh9gLNnz9K4cWPb63LlynH27Fm71nUUGSZjoCFwVGt9HEApFQB0B5KfyBgHLAcaZGqE90DLUI0P5NTNU6w8upK+NfpSOl/6v2TFo0vHXzC5bt06goKCbK3d8PBwQkNDWb9+PUOGDMHNzQ2AIkWKJFm/QIEC5MmTh2HDhtG5c2e6dOmSZH54eDg3btygZcuWAAwaNIjevXvb5vfs2ROAevXqERYWlm6sCYni33//JSQkhMceewyAu3fv0qRJEw4fPkxwcDDt2rUDIC4ujtKlS6O1tnvo1x9//JE5c+YQGxvL+fPnCQkJoXbt2qke42OPPcbgwYPp06eP7TjskVb8yY8zPRcvXuTo0aM0a9YMpRQuLi4EBwfj5eWV6rHac/w1atQgMDDQ7uNIS8Jn6l7370jsScZlgdOJXp8BGiVeQClVFugBPI6JyVg8mNn7ZuPq5Mow72FmhyKSudcWbFY5fvw4zs7OlChRAq0106dPp0OHDkmWWbt2bbpfpC4uLuzYsYMNGzYQEBCAn58fGzdutDuG3LlzA8bFZbGxseku6+7uDhhf9u3atcPf3z/J/P379+Pp6cm2bdtSXff48eNUqZL2RYwnTpxg2rRp7Ny5k8KFCzN48GDu3LmT5jHOnj2b7du3s3r1aiwWi92JLK34kx9nepYsWcL169epXLkyADdv3iQgIICpU6dStGhR26kAMHovihUrBoCnpyd//vlnqtvMrJZxuXLlOH36vzRz5swZypQpY9e6jsKec8ap/V+V/GfMF8BErXVcuhtS6nml1C6l1K7Lly/bGaJ4GI7fOM7qE6vpX7M/xd2Kmx2OyIYuX77MyJEjGTt2LEopOnTowKxZs4iJiQGMK25v3bpF+/btWbBgAVFRUQApuqkjIyMJDw+nU6dOfPHFFykSUsGCBSlcuLDtfPD3339vayXfr8aNG/P3339z9OhRAKKiojhy5Ag1atTg8uXLtmQcExPDgQMHAJg0aRJjxozh5k3j9MDNmzeZM2dOku3evHkTd3d3ChYsyMWLF/ntt9/SPcZjx47RqFEjpkyZQrFixZIkoPuJPzV+fn74+fmlmO7v78/atWsJCwsjLCyM3bt3284bt2rViiVLlnD37l0AFi5cSOvWrQF4+umn+eeff1i9erVtW2vXrmX//v22lnFq/+xNxADdunUjICCA6OhoTpw4QWhoKA0bNrR7fUdgT8v4DJD4Sp5ywLlky9QHAuJ/DRcDOimlYrXWKxMvpLWeA8wBqF+/vpzozUZm7ptJHuc8DPEaYnYoIhu5ffs2FouFmJgYXFxcePbZZ3n55ZcBGDZsGGFhYdStWxetNcWLF2flypV07NiRwMBA6tevT65cuejUqRMffPCBbZsRERF0796dO3fuoLXm888/T7Hfb7/9lpEjRxIVFUWVKlX45ptvHug4ihcvzsKFC+nfvz/R0dEATJ06lerVq7Ns2TLGjx9PeHg4sbGxvPjii3h6ejJq1CgiIyNp0KABrq6uuLq68sorryTZrq+vL3Xq1MHT05MqVarYupHTOsYJEyYQGhqK1po2bdrg6+ubZqvT3viTO3TokC2OBGFhYZw6dSrJednKlStToEABtm/fTpcuXdi9ezf16tXD2dmZqlWrMnv2bADy5s3Lr7/+yosvvsiLL76Iq6srPj4+fPnll3a996+99ho//PADUVFRlCtXjmHDhvHuu++yatUqdu3axZQpU/D09KRPnz7Url0bFxcXZsyYgbOzs13bdxQqtb76JAso5QIcAdoAZ4GdwNNa6wNpLL8Q+FVrvSy1+Qnq16+vM+s+vc4LvmX1qfH8W/wZGo+ZmSnbfJQcvnaYXr/0YoTPCCkIIUQO16VLF3766Sdy5cpldigiFUqp3VrrFCXwMmwZa61jlVJjMa6SdgYWaK0PKKVGxs+fnenRiofKL9CP/LnyM9DzwQdJEEKYK/mV2CJnsKebGq31GmBNsmmpJmGt9eAHD0s8LPsv72fz6c2MrzOeArkKmB2OEEI8khxiOEy5z/j++QX6UTh3YZ6p9YzZoQghxCPLIZLxfx6t+9Ie1K4Lu/jn3D885/0cbq5uZocjhBCPLAdLxsJeWmum751O8bzF6Vsj4wEDhBBCZB1Jxo+obee3sefSHob7DCePSx6zwxFCiEeaQyRjOWd8b7TW+O31o7R7aZ6q9pTZ4YhsTCnFs88+a3sdGxtL8eLFbcM7Lly4kLFjU94OV6lSJVuhiPbt23PhwgXAGAxjxIgRVK1aFU9PT1q0aMH27dsBMiyxdy8Sl2M8dOgQFouFOnXqcOzYMZo2bZpp+wHjPSlWrFiKcZ4rVarElStXbK8TCkIk+O2336hfvz61atWiZs2atmIS9vj222+pVq0a1apV49tvv011mZMnT9KmTRt8fHxo1aoVZ86csc2bOHEiXl5eeHl5sWTJEtv0jRs3UrduXby8vBg0aFCSEc42b95sK6f5oIOwiJQcIhkn0HLK2C5/nvmT/Vf2M9J3JLmc5V5EkTZ3d3eCg4O5ffs2AH/88Qdly5a1a91Nmzaxb98+6tevbxv0Y9iwYRQpUoTQ0FAOHDjAwoULkySszDJy5EhbPeOVK1fSvXt39u7dS9WqVfnnn3/s3o7WOsP6zevWraNGjRr8+OOPqY6xnJrg4GDGjh3LokWLOHjwIMHBwekOu5nYtWvXeO+999i+fTs7duzgvffeSzKUZYJXX32VgQMHEhQUxOTJk20/FlavXs2ePXsIDAxk+/btfPrpp9y8eROr1cqgQYMICAggODiYihUr2hL9jRs3GD16NKtWreLAgQMZVocS986hkrHImFVb8dvrR4X8FehatWvGK4hH3hNPPGEbCtHf35/+/fvf0/otWrTg6NGjHDt2jO3btzN16lScnIyvnipVqtC5c+cky0dGRtKmTRvq1q2Lt7c3P//8MwC3bt2ic+fO+Pr6JmnRpVeOcc2aNXzxxRfMmzfPNrxj4hb4p59+aisBmVD+MCwsjFq1ajF69Gjq1q2b4ZCV/v7+vPDCC1SoUIF///3Xrvfkk08+4c0336RmzZqAMV736NGj7Vr3999/p127dhQpUoTChQvTrl071q5dm2K5kJAQ2rRpAxglDxPex5CQEFq2bImLiwvu7u74+vqydu1arl69Su7cuW2jerVr147ly5cDRhnFnj17UqFCBcCoMCUyl133GQvH8cfJPzh8/TAfNv8QVydXs8MR9vrtdbiwP3O3Wcobnvgow8X69evHlClT6NKlC0FBQQwdOjTdOsLJ/frrr3h7e3PgwAEsFkuGwxzmyZOHFStWUKBAAa5cuULjxo3p1q0ba9eupUyZMrYfBuHh4RmWY+zUqRMjR44kX758KbqB161bR2hoKDt27EBrTbdu3diyZQsVKlTg8OHDfPPNN8ycmf6Ifrdv32bDhg18/fXX3LhxA39//yTVlNISHBycYmjNBIsXL+bTTz9NMd3Dw4Nly5Zx9uxZypf/b4TitMoN+vr6snz5cl544QVWrFhBREQEV69exdfXl/fee4+XX36ZqKgoNm3aRO3atSlWrBgxMTHs2rWL+vXrs2zZMtsPkSNHjhATE0OrVq2IiIjghRdesPU8iMzhEMlYeqftE2eNY0bgDKoWrMoTlZ4wOxyRQ/j4+BAWFoa/vz+dOnWye73WrVvj7OyMj48PU6dOZcuWLXatp7XmjTfeYMuWLTg5OXH27FkuXryIt7c3r776KhMnTqRLly40b96c2NjYdMsxpmfdunWsW7eOOnXqAEaLPDQ0lAoVKlCxYsUk4zin5ddff6V169a4ubnx1FNP8b///Y/PP/8cZ2fn+y5L+Mwzz/DMM2nf929vucFp06YxduxYFi5cSIsWLShbtiwuLi60b9+enTt30rRpU4oXL06TJk1wcXFBKUVAQAAvvfQS0dHRtG/fHhcXI0XExsaye/duNmzYwO3bt2nSpAmNGzdOdWxscX8cIhn/R9JyetacWMOJ8BN81uoznJ0erUHYczw7WrBZqVu3brz66qts3ryZq1ev2rXOpk2bbGX4wCjFt2/fPqxWq62bOjWLFy/m8uXL7N69G1dXVypVqsSdO3eoXr06u3fvZs2aNUyaNIn27dszefLk+y7HqLVm0qRJjBgxIsn0sLAwu0oSgtFF/ffff1OpUiUArl69yqZNm2jbtq2tLGHCe5C8LOHu3bvx9fVN9fjTaxmXK1eOzZs326afOXOGVq1apVi+TJky/PTTT4DxQ2P58uUULFgQgDfffJM333wTMKoyVatWDYAmTZrYej3WrVtnqwxVrlw5ihUrhru7O+7u7rRo0YJ9+/ZJMs5Ecs74ERFjjWFm4ExqFalFmwptzA5H5DBDhw5l8uTJeHt73/c2qlatSv369XnnnXdsrbvQ0FDbucwE4eHhlChRAldXVzZt2sTJkycBOHfuHG5ubgwYMIBXX32VPXv2ZFiOMT0dOnRgwYIFREZGAnD27FkuXbqU6rIDBw5kx44dSabdvHmTrVu3curUKVtZwhkzZthqDrdq1Yrvv/8egLi4OBYtWmQ7bz1hwgQ++OADW7KzWq189tlngNEyTq0k4bJly2xxr1u3juvXr3P9+nXWrVuXoqY0wJUrV2wXn3344YcMHTrUFkvCD6qgoCCCgoJo3749gO34o6Oj+fjjjxk5ciQA3bt356+//iI2NpaoqCi2b99OrVq17H6vRcYcrGUs0vLz0Z85E3mGGW1m4KTkN5i4N+XKleOFF15Idd7ChQtZuXKl7XV6FzHNmzePV155BQ8PD9zc3ChatGiKVuAzzzxD165dqV+/PhaLxXaR0/79+5kwYQJOTk64uroya9Ysu8oxpqV9+/YcPHjQdo43X758LFq0KNVz2kFBQZQuXTrJtJ9++onHH3+c3Llz26Z1796d1157jejoaN5++21GjRqFr68vWms6duzIgAEDAKPr/4svvqB///5ERUWhlEpxIVtaihQpwttvv02DBg0AmDx5MkWKFLE9r1+/Pt26dWPz5s1MmjQJpRQtWrRgxowZgFGzuXnz5gAUKFCARYsW2bqjP/30U3799VesViujRo3i8ccfB6BWrVp07NgRHx8fnJycGDZsGF5eXva90cIuGZZQzCqZWUKx+4IF/HzqJbaVeJYmo1MW1X7URcdF0/mnzpR0L8miJxbZdd5KCGG4efMmzz33nNzOIzJFWiUUHayJJEkmNcuOLONi1EXG1RkniViIe1SgQAFJxCLLOVgyFsndjr3N3KC5NCjVgEalGpkdjhBCiFRIMnZwAYcCuHrnKmMtY6VVLIQQ2ZRDJON81igAnHRsBks+WiLvRrIgeAGPlX2MuiXrmh2OEEKINDhEMib+IrQ4JSNKJbbo4CJuRN9gnGWc2aEIIYRIh2Mk43gxTlIKMEF4dDjfHviWx8s/jmcxT7PDEUIIkQ6HSsbiP98e+JZbMbcYU2eM2aGIHOzixYs8/fTTVKlShXr16tGkSRNWrFiR5fvdtWsX48ePv+/1K1WqxFNP/VcedNmyZQwePBgw7osuXry4rRxgr169iIqKsmu7L7zwAmXLlk1SySmhKEXy/SdUo7pw4QL9+vWjatWq1K5dm06dOtkG+8jIiRMnaNSoEdWqVaNv377cvXs3xTKbNm3CYrHY/uXJkyfJfd8A48aNS1IgY/PmzRQsWNC2zpQpU2zzPv/8czw9PfHy8qJ///7cuXPHrljFg5Fk7ICu3r7KooOL6FipI9ULy3B14v5orXnyySdp0aIFx48fZ/fu3QQEBCSpi5tV6tevz1dfffVA29i1axcHDhxIdV7fvn0JDAzkwIED5MqVK0lN37RYrVZWrFhB+fLl72mc7R49etCqVSuOHTtGSEgIH3zwARcvXrRr/YkTJ/LSSy8RGhpK4cKFmT9/foplWrdubRula+PGjbi5udlG1ALjfUheQAOgefPmtvUmT54MGKOQffXVV+zatYvg4GDi4uIICAiwK1bxYCQZO6AFwQuIjotmlGWU2aGIHGzjxo3kypXLNiQiQMWKFRk3zrgGISwsjObNm1O3bl3q1q1rqxO8efPmJAUbEooVQOrlDpcuXYqXlxe+vr60aNEixTZ27NhB06ZNqVOnDk2bNuXw4cOA0cLt2bMnHTt2pFq1arz22mtJ4n/11VdtdZTTEhsby61btyhcuHCG78emTZvw8vJi1KhRtiEv7VnH1dU1yXtosVhsI2ClR2vNxo0b6dWrFwCDBg1K0eJNbtmyZTzxxBO4ubkBxtCXEyZM4JNPPrErXjDek9u3b9uGvixTpozd64r7J8NhOpiLty6y5PASulbpSuWClc0OR2SSj3d8zKFrhzJ1mzWL1GRiw4lpzj9w4AB166Z9FX6JEiX4448/yJMnD6GhofTv35/0RtVLq9zhlClT+P333ylbtmyqLbiaNWuyZcsWXFxcWL9+PW+88Yatzm5gYCB79+4ld+7c1KhRg3HjxtnKC/bp04eZM2dy9OjRFNtcsmQJW7du5fz581SvXp2uXTOu7Z1Qy7l79+688cYbxMTE4Oqa/kWjwcHB1KtXL9V5ERERaSblH374gRIlSlCoUCHbUJVplUpMLCAggJdfftn22s/Pj27duqUYyhNg27Zt+Pr6UqZMGaZNm4anpydly5bl1VdfpUKFCuTNm5f27dsnaWWLrCMtYwczd/9c4qxxjPQdmfHCQtyDMWPG4OvraxsTOSYmhuHDh+Pt7U3v3r0JCQlJd/0CBQrYyh3+9NNPttbbY489xuDBg5k7dy5xcXEp1gsPD6d37954eXnx0ksvJel6btOmDQULFiRPnjzUrl3bVlQCwNnZmQkTJvDhhx+m2GZCN/WFCxfw9vZOtUpSYnfv3mXNmjU8+eSTFChQgEaNGrFu3Tog7bKIGd3Xnz9//lQLQgQGBlK7dm27SyUmOH/+PPv377cVjTh37hxLly619WQkVrduXU6ePMm+ffsYN24cTz75JADXr1/n559/5sSJE5w7d45bt26xaNGidI9DZA5pGTuQs5FnWR66nJ7VelIufzmzwxGZKL0WbFbx9PS0tUABZsyYwZUrV6hf3xhW9/PPP6dkyZK2soh58hh3M7i4uCS5wCnhAiAXF5dUyx3Onj2b7du3s3r1aiwWS4rKS2+//TatW7dmxYoVhIWFJSkXmLhIg7OzM7GxSccaePbZZ/nwww/x9Ez9jgKlFF27dmX69Om8/vrrab4Xa9euJTw83Fa1KioqCjc3Nzp37kzRokU5f/58kuUjIiIoVKgQnp6etmpLyWXUMq5VqxY3btwgNjYWFxcXzpw5k26X8Y8//kiPHj1srfW9e/dy9OhRPDw8bDF7eHhw9OhRChQoYFuvU6dOjB49mitXrrBp0yYqV65M8eLFAejZsyf//POPrcCFyDoO0TKWcaUMX+/7GiecGO4z3OxQhAN4/PHHuXPnDrNmzbJNS3zVcXh4OKVLl8bJyYnvv//e1qqtWLEiISEhREdHEx4ezoYNGwDSLHd47NgxGjVqxJQpUyhWrBinT59OEkd4eDhly5YFsJ17tperqysvvfQSX3zxRZrLbN26lapVqwKwYsUKJk2alGIZf39/5s2bZyuVeOLECdatW0dUVBQtWrRg1apVREREAEY1J19fX5ydnXn88ceJjo5m7ty5tm3t3LmTP//8M8OWsVKK1q1b25L5t99+S/fu3dM8joRu9ASdO3fmwoULtpjd3NxsXfYXLlywtbx37NiB1WqlaNGiVKhQgX///ZeoqCi01mzYsEFKJT4kDpGMBYSFh7Hq2Cr61OhDKfdSZocjHIBSipUrV/Lnn39SuXJlGjZsyKBBg/j4448BGD16NN9++y2NGzfmyJEjuLu7A1C+fHn69OmDj48PzzzzDHXq1AGMlmCXLl3w8fGhZcuWtnKHEyZMwNvbGy8vL1q0aIGvr2+SOF577TUmTZrEY489lmo3dkaee+65FC3mJUuWYLFY8PHxYe/evbz99tuA8cMgcasRjB8gv//+e5ISh+7u7jRr1oxffvkFHx8fxo4dS7NmzbBYLMyePZt58+bZ3sMVK1bwxx9/ULVqVTw9PXn33Xftvijq448/5rPPPsPDw4OrV6/y3HPPAcYV0sOGDbMtFxYWxunTp2nZsqVd2122bJntornx48cTEBCAUopGjRrRq1cv6tati7e3N1arleeff96ubYoH4xAlFAfMnc6is2/xZ6kRtBxp/1WDjmTilolsOr2JNT3XUCxvMbPDESJHGjBgAJ9//rmtm1aIzJZWCUU5Z+wAjl4/ym8nfmOo11BJxEI8ALlYSZhFuqkdwMx9M3F3dWeI1xCzQxFCCHEfJBnncCFXQ/jj5B8MrD2QgrkLmh2OEEKI+yDJOIebETiDgrkLMqC23HoghBA5lSTjHCzwUiBbzmxhiOcQ8ufKb3Y4Qggh7pMk4xzML9CPInmK0L9m/4wXFkIIkW05RDJWmHN7lpl2nN/B9vPbGe49HDdXN7PDEQ4qK0ooJi45OHnyZNavX39f2wkMDGTNmjW21w9SGvF+9rdq1So++uij+95eTEwMr7/+OtWqVcPLy4uGDRvy22+/AUlLMD6oxHFevnyZRo0aUadOHf766y86deqU6njgD8LX1zfJ4CMArVq1SjJueVhYGF5eXrbXO3bsoEWLFtSoUYOaNWsybNgwu/92a9eupUaNGnh4eKT597h+/To9evTAx8eHhg0bEhwcbJv35Zdf4uXlhaenZ5LBYfbt20eTJk3w9vama9eu3Lx50zYvKCiIJk2a4Onpibe3d6aUmXSIZPyfR2MsLq01foF+lHArQe8avc0ORzioeymhmHxQDXtNmTKFtm3b3te6yZMj3F9pxPvdX7du3dIdQjMjb7/9NufPnyc4OJjg4GB++eUX2yhemSlxnBs2bKBmzZrs3buX5s2bs2bNGgoVKmT3tjIadOXgwYNYrVa2bNnCrVu37NrmxYsX6d27Nx9//DGHDx/m4MGDdOzY0a73Ii4ujjFjxvDbb78REhKCv79/qmOkf/DBB1gsFoKCgvjuu+944YUXAKOQx9y5c9mxYwf79u3j119/JTQ0FIBhw4bx0UcfsX//fnr06GEbvzw2NpYBAwYwe/ZsDhw4wObNmzMsGGIPB0vGj4a/z/3N3kt7GeEzgtzOuTNeQYj7kFEJxYULF9K7d2+6du1K+/btiYyMpE2bNrbRm37++Wfbeu+//z41atSgbdu2thKIAIMHD7YN97h7925atmxJvXr16NChg22851atWjFx4kQaNmxI9erV+euvv7h79y6TJ0+2jaSVPOkmL4148uRJ2rRpg4+PD23atOHUqVPpTk9e1jG1/S1cuJCxY8fajmP8+PE0bdqUKlWq2I7JarUyevRoPD096dKlC506dWLZsmVERUUxd+5cpk+fbhtfu2TJkvTp0yfF3+HJJ5+kXr16eHp6MmfOHMBIQoMHD8bLywtvb2/baGZfffWVrURlv379bH+nsWPHEhgYyGuvvcaaNWuwWCzcvn07SQt80aJFNGzYEIvFwogRI2yJN1++fEyePJlGjRqxbdu2dD8zP/zwA88++yzt27dn1apV6S6bYMaMGQwaNIgmTZoAxqhlvXr1omTJkhmuu2PHDjw8PKhSpQq5cuWiX79+ST53CUJCQmjTpg1gVAELCwvj4sWLHDx4kMaNG+Pm5oaLiwstW7a09fwcPnzYVtKzXbt2tnHa161bh4+Pj22kuKJFi+Ls7GzXsaZHBv3IYbTWTN87nbL5ytLDo4fZ4YiH5MIHHxB9MHNLKOauVZNSb7yR5vyMSiiCUYYvKCiIIkWKEBsby4oVKyhQoABXrlyhcePGdOvWjT179hAQEMDevXuJjY2lbt26KcoKxsTEMG7cOH7++WeKFy/OkiVLePPNN1mwYAFgJNcdO3awZs0a3nvvPdavX8+UKVPYtWsXfn5+gJF00iqNOHbsWAYOHMigQYNYsGAB48ePZ+XKlWlOT17WMVeuXKnuL7Hz58+zdetWDh06RLdu3ejVqxc//fQTYWFh7N+/n0uXLlGrVi2GDh3K0aNHqVChQoqhN1OzYMECihQpwu3bt2nQoAFPPfUUYWFhnD171tbdmtDV/NFHH3HixAly586dovvZYrGkOIYEBw8eZMmSJfz999+4uroyevRoFi9ezMCBA7l16xZeXl5MmTIlw1iXLFnCH3/8weHDh/Hz80vRXZ2a4OBgBg0alOq8TZs28dJLL6WY7ubmxj///MPZs2dtJTPBKDO5ffv2FMv7+vry008/0axZM3bs2MHJkyc5c+YMXl5evPnmm1y9epW8efOyZs0aWyEULy8vVq1aRffu3Vm6dKltzPQjR46glKJDhw5cvnyZfv36pailfT+kZZzDbDy9kZCrIYz0HYmr84N3jQhhr+QlFMFoMRQpUgQwfii+8cYb+Pj40LZtW86ePcvFixf566+/6NGjB25ubhQoUIBu3bql2Pbhw4cJDg6mXbt2WCwWpk6dmqQ7vGfPngDUq1ePsLCwNGNMqzTitm3bePrppwGjktPWrVvTnZ5RWcfUPPnkkzg5OVG7dm0uXrwIGEUoevfujZOTE6VKlaJ169Z2bSuxr776Cl9fXxo3bszp06cJDQ2lSpUqHD9+nHHjxrF27VpbUk8YD3zRokW2Osj22LBhA7t376ZBgwZYLBY2bNjA8ePHAaMa1lNPPZXhNnbu3Enx4sWpWLEibdq0Yc+ePVy/fh1IvfRjRiUmAVq3bp1qIY1//vkHwO4yk6+//jrXr1/HYrEwffp06tSpg4uLC7Vq1WLixIm0a9eOjh074uvra3vfFixYwIwZM6hXrx4RERHkypULMH4Ybt26lcWLF7N161ZWrFhhK4byIKRlnINYtRW/vX5UKlCJLlW6mB2OeIjSa8FmlYxKKAK24hAAixcv5vLly+zevRtXV1cqVapku7Aloy9erTWenp5pdoMmdOWmViYxNRmVRsyoBnFGZR3TixH+SxJpjf3v4eHBqVOniIiIIH/+tG9L3Lx5M+vXr2fbtm24ubnRqlUr7ty5Q+HChdm3bx+///47M2bM4Mcff2TBggWsXr2aLVu2sGrVKv73v/8lqf2cHq01gwYNSrX2c548eezqhvX39+fQoUNUqlQJgJs3b7J8+XKGDRtG0aJFbYkZ4Nq1axQrZgzd6+npye7du1OtSJVRy7hcuXJJqnylVWayQIECfPPNN7ZjrVy5MpUrVwaMQiIJBTjeeOMNypUzys/WrFnTVrP6yJEjrF69GjBa3y1btrTF36lTJ/bs2WPrBr9f0jLOQX4P+52jN44y2jIaFyf5HSWyVkYlFJMLDw+nRIkSuLq6smnTJk6ePAlAixYtWLFiBbdv3yYiIoJffvklxbo1atTg8uXLtmQcExOTYSLJnz9/uhf5JC6N2LRpUwICAgDjR0OzZs3SnZ5aWceM9peaZs2asXz5cqxWKxcvXmTz5s2AkUyee+45xo8fz927dwGjmzv52Njh4eEULlwYNzc3Dh06xL///gvAlStXsFqtPPXUU/zvf/9jz549WK1WTp8+TevWrfnkk0+4ceMGkZGRdsXZpk0bli1bxqVLlwAjWSb8/ZKbNGlSiivqrVYrS5cuJSgoyFay8eeff8bf3x8wzvsvWrTI9uPk22+/tfUSjB07lm+//TZJ9/KiRYu4cOFChi3jBg0aEBoayokTJ7h79y4BAQGp9rzcuHHD9j7PmzePFi1a2HoTEo751KlT/PTTT7au9YTpVquVqVOn2q6d6NChA0FBQURFRREbG8uff/5J7dq17Xqf0yPf6DlErDWWmYEz8SjkQYdKHcwORzwCEkoovvTSS3zyyScUL14cd3d3WwnF5J555hm6du1K/fr1sVgs1KxZE4C6devSt29fLBYLFStWpHnz5inWzZUrF8uWLWP8+PGEh4cTGxvLiy++iKenZ5rxtW7dmo8++giLxWKrQZxwzthqtVKuXDnbed2vvvqKoUOH8umnn1K8eHFbKymt6RMmTCA0NBStNW3atMHX15cKFSqk2F9GnnrqKTZs2ICXlxfVq1enUaNGFCxoDFs7depU3nrrLWrXrk2ePHlwd3dPcV62Y8eOzJ49Gx8fH2rUqEHjxo0BOHv2LEOGDMFqtQLw4YcfEhcXx4ABAwgPD0drzUsvvWT3ldK1a9dm6tSptG/fHqvViqurKzNmzKBixYoplt2/f3+KhLdlyxbKli1rqzsNxo+wkJAQzp8/z/PPP8+hQ4fw9fVFKUX9+vVtrfCSJUsSEBDAq6++yqVLl3BycqJFixa2UxPpcXFxwc/Pjw4dOhAXF8fQoUNtn5nZs2cDMHLkSA4ePMjAgQNxdnamdu3azJ8/37aNp556iqtXr9qOOeGiP39/f2bMmAEYp0mGDDHG/i9cuDAvv/wyDRo0QClFp06dkpTXvF8OUULx2blf8f3Zt/mz1Ehajkz9iyKnW3l0JW///TZftP6CNhUerDtECPHwREZGki9fPq5evUrDhg35+++/KVUq59Yc79ChA7///rvZYeRYUkIxB4uJi2H2vtnULlqbx8s/bnY4Qoh70KVLF1s36dtvv52jEzEgiTiLOEQydvShPlYcXcHZyLO81fgtu65AFEJkHwnniYVIj1zAlc3dib3D1/u+pk6JOjxW5jGzwxFCCJEFJBlnc0uPLOXS7UuMqzNOWsVCCOGgJBlnY1ExUczbP49GpRvRoFSDjFcQQgiRI0kyzsZ+OPQD1+5cY6xlrNmhCCGEyEKSjLOpiLsRfBP8DS3KtcBSwmJ2OOIRtmLFCpRSHDqU9tjYyUvkpbVMjRo1sFgs1KpVy1b0ILMsXLiQc+fO2V5LiUIpUZijaK1N+VevXj2dWZ6d86XW7xTQm2e9lmnbNNuMvTO010IvfeDKAbNDEY+43r1762bNmul33nknzWVatmypd+7cme52Ei9z9epVXahQIR0dHZ1pcSaPYeLEiXrgwIH6zp07WmutL1y4oJcsWaK11rpixYr68uXLmbbvBP7+/nrgwIH3vX5sbGy680NCQrSXl5cuU6aMjoyMtE1PfuwnTpzQnp6eWmvjuCtUqKD/+ecfrbXWVqtVL126VF+4cMGueKpUqaKPHTumo6OjtY+Pjz5wIOV30quvvqrfffddrbXWBw8e1I8//rjWWuv9+/drT09PfevWLR0TE6PbtGmjjxw5orXWun79+nrz5s1aa63nz5+v33rrLa211jExMdrb21sHBgZqrbW+cuVKhu9LTgLs0qnkRGkZZ0M37tzgu5DvaFexHbWLPvgwa0Lcr8jISP7++2/mz59vGzYS4Pbt2/Tr1w8fHx/69u3L7du3bfNGjRpF/fr18fT05J133klzu+7u7rYxj/39/fH29sbLy4uJEyfalkttemrlA5ctW8auXbt45plnsFgs3Lp1S0oUxpMShTmDQ9xn7Gi+OfANUTFRjPYdbXYoIpv468cjXDlt3zjD9ipWPh/N+1RPd5mVK1fSsWNHqlevTpEiRdizZw9169Zl1qxZuLm5ERQURFBQUJJSi++//z5FihQhLi6ONm3aEBQUhI+PD2AMmZk7d25CQ0P54osvcHZ25ty5c0ycOJHdu3dTuHBh2rdvz8qVK2nYsGGq08uXL5+ifGChQoXw8/Nj2rRp1K9fn6CgIClRGE9KFOYMDtEyVmjbs5zuyu0r/HDwBzpV6YRHYQ+zwxGPOH9/f1sLsF+/fraB/7ds2cKAAQMAo2xfQrIF+PHHH6lbty516tThwIEDhISE2OYtXryYoKAgTp06xbRp0zh58iQ7d+6kVatWFC9eHBcXF5555hm2bNmS5vS0ygfeLylR+GiUKMzuHKplrB3gPtz5++cTY41hlO8os0MR2UhGLdiscPXqVTZu3EhwcDBKKeLi4lBK8cknnwCpfymfOHGCadOmsXPnTgoXLszgwYNTvfimePHi1K1bl+3bt9u+hJNLLREAaZYPTExKFEqJwpzGIVrGjuLCrQssObyE7h7dqVggZbUUIR6mZcuWMXDgQE6ePElYWBinT5+mcuXKbN26lRYtWrB48WLA6AYNCgoCjATh7u5OwYIFuXjxou3q5eSioqLYu3cvVatWpVGjRvz5559cuXKFuLg4/P39admyZZrTUysfCElLKkqJQilRmNM4VMs4p5sTNAeNZoTPCLNDEQJ/f39ef/31JNOeeuopfvjhBz777DOGDBmCj48PFouFhg0bAsa5wzp16uDp6UmVKlV47LGkQ7g+88wz5M2bl+joaAYPHky9evUAowRg69at0VrTqVMnWysuten79u1LUT4QYPDgwYwcOZK8efOybds2KVEoJQpzFIcooTho7pd8e3Yym0uPptWIlN1AOcHpiNN0W9GNXtV78WbjN80ORwhhEilR6NjSKqEo3dTZxOx9s3F2cma4z3CzQxFCmEgS8aNJknE2cDz8OL8e/5V+NfpRwq2E2eEIIYR4yCQZZwOzA2eT2zk3Q72Hmh2KEEIIE0gyNtnha4f5Lew3BtQaQJE8RcwORwghhAkcIhn/N+hHzjMzcCb5XfMzyDP1EXKEEEI4PodIxjnVgSsH2Hh6I4M8B1Ewd0GzwxFCCGESScYmmh44nUK5CzGg9gCzQxEiVc7OzlgsFry8vOjatWumlQNMKK6QGSpVqoS3tzcWiwWLxWIbsCKzBQYGsmbNmiTTfvvtN+rXr0+tWrWoWbMmr776KgDvvvsu06ZNy7R9N23a1PZ8woQJeHp6MmHCBGbPns13332XafsB+Pzzz8mTJw/h4eG2aan9vRKXbYyMjGTEiBFUrVoVT09PWrRokeoY1qlZunQpnp6eODk5pVuG055SjjmZDPphkj0X9/D32b95pd4ruLu6mx2OEKnKmzcvgYGBAAwaNIgZM2bw5pvZ7z74TZs22YZPtFdsbOw9jS8dGBjIrl276NSpE2CMPDZ27FhWr15NzZo1iY2NzfQazQkS/8D4+uuvuXz5sq0a1b2w55j9/f1p0KABK1asYPDgwXZtd9iwYVSuXJnQ0FCcnJw4fvw4Bw8etGtdLy8vfvrpJ0aMSHuwo7i4OMaMGcMff/xBuXLlaNCgAd26dXOokbmkZWwCrTXT906nWN5i9K3Z1+xwhLBLkyZNOHv2LGCU1mvatCl16tShadOmHD58GDBaUD179qRjx45Uq1YtSbWdb775hurVq9OyZUv+/vtv2/STJ0/Spk0bfHx8aNOmDadOnQKMEbVGjRpF69atqVKlCn/++SdDhw6lVq1aGSaJ9Lb58ssv07p1ayZOnMixY8fo2LEj9erVo3nz5hw6dAgwWmteXl74+vrSokUL7t69y+TJk1myZAkWi4UlS5bwySef8Oabb1KzZk3AGK1q9OiUldbmzp1LgwYN8PX15amnniIqKirVfQAcOHDAVmbRx8eH0NBQwCizCNCtWzdu3bpFo0aNWLJkSZIWeFrHkvyY03Ps2DEiIyOZOnWqbajNjBw7dozt27czdepUnJyMlFKlShW7R82qVasWNWrUSHcZe0s55mTSMjbB9gvb2XVxF5MaTiKvS16zwxE5wKaFc7h08nimbrNExSq0Hvy8XcvGxcWxYcMG24D/NWvWZMuWLbi4uLB+/XreeOMNWz3awMBA9u7dS+7cualRowbjxo3DxcWFd955h927d1OwYEFat25NnTp1AGPs5IEDBzJo0CAWLFjA+PHjWblyJQDXr19n48aNrFq1iq5du/L3338zb948GjRoQGBgIBaLBTCqDzk7O5M7d262b9+e7jaPHDnC+vXrcXZ2pk2bNsyePZtq1aqxfft2Ro8ezcaNG5kyZQq///47ZcuW5caNG+TKlStFecSPP/6YV155JcP3rmfPngwfbgzm89ZbbzF//nzGjRuXYh9gDDH5wgsv8Mwzz3D37l1bzeMEq1atIl++fLbeinfffdc27/nnn0/1WJIfc3r8/f3p378/zZs35/Dhw1y6dIkSJdIf++DAgQNYLJY0t928eXPbmOGJTZs2jbZt26a77QT2lnLMySQZP2QJreJS7qXoVb2X2eEIka7bt29jsVgICwujXr16tGvXDjAKLAwaNIjQ0FCUUsTExNjWadOmDQULGhck1q5dm5MnT3LlyhVbOUSAvn37cuTIEQC2bdvGTz/9BMCzzz6bpDXdtWtXlFJ4e3tTsmRJvL29AaMSUVhYmC0ZJ++mTm+bvXv3xtnZmcjISP755x969+5tmxcdHQ3AY489xuDBg+nTp49dYzinJzg4mLfeestWWKJDhw5p7qNJkya8//77nDlzhp49e1KtWjW79pHesSQ+5owEBASwYsUKnJyc6NmzJ0uXLmXMmDFplmO0p0zjX3/9ZccRpM/eUo45mSTjh+yvs38RdDmId5q8Qy7n1EvHCZGcvS3YzJZwzjg8PJwuXbowY8YMxo8fz9tvv03r1q1ZsWIFYWFhtGrVyrZO4nOZzs7OxMbGAvZ/eSZeLmFbTk5OSbbr5ORk2+69btPd3bhGw2q1UqhQIVsrM7HZs2ezfft2Vq9ejcViSXWZhNKEvr6+6e578ODBrFy5El9fXxYuXMjmzZvT3MfTTz9No0aNWL16NR06dGDevHk8/vjjGR5feseS+JjTExQURGhoqO0H1927d6lSpQpjxoxJUaIR/ivTWKhQIfbt24fVarV1UyeWGS1je0s55mR2nTNWSnVUSh1WSh1VSr2eyvxnlFJB8f/+UUql/+l8RFm1Fb+9fpTLV47uHilriwqRXRUsWJCvvvqKadOmERMTQ3h4uK2C0MKFCzNcv1GjRmzevJmrV68SExPD0qVLbfOaNm1KQEAAAIsXL6ZZs2YPHK892yxQoACVK1e2xaK1Zt++fYBxHrRRo0ZMmTKFYsWKcfr06SQlGsG4qvmDDz6wtfCtViufffZZiv1ERERQunRpYmJibGUn09rH8ePHqVKlCuPHj6dbt2620pQZSe9YktuxYwcDBw5MMd3f3593333XVqLx3LlznD17lpMnT9KgQQP+/vtvLly4AMCuXbuIjo6mfPnyVK1alfr16/POO+/YWrChoaG2c7p//fVXqmUa7U3EYH8px5wsw2SslHIGZgBPALWB/kqp5JewnQBaaq19gP8BWXNJYQ634dQGDl47yGjLaFydXM0OR4h7UqdOHXx9fQkICOC1115j0qRJPPbYYynOa6amdOnSvPvuuzRp0oS2bdtSt25d27yvvvqKb775Bh8fH77//nu+/PLLB47V3m0uXryY+fPn4+vri6enpy2BTJgwAW9vb7y8vGjRogW+vr60bt2akJAQ2wVcPj4+fPHFF/Tv359atWrh5eXF+fPnU+zjf//7H40aNaJdu3a2i73S2seSJUvw8vLCYrFw6NChVJNmWtI6luROnTpF3rwpr1UJCAigR48eSab16NGDgIAASpYsyZdffkmnTp2wWCy8+OKL+Pv721rC8+bN48KFC3h4eODt7c3w4cPtbrmuWLGCcuXKsW3bNjp37mzrxj937pztyvXEpRxr1apFnz59bKUcHUWGJRSVUk2Ad7XWHeJfTwLQWqdaq1ApVRgI1lqXTW1+gswsoTh47hcsPPsOm0uPodWIDzJlm5ktzhrHU6uewoqVFd1W4OyU8fkbIYTIbBMmTODZZ5/Fx8fH7FAeSWmVULTnnHFZ4HSi12eARuks/xzw272F5/h+C/uNY+HHmNZymiRiIYRpPv30U7NDEKmwJxmndtVFqs1ppVRrjGSc6kkfpdTzwPMAFSpUsDPEnC/GGsOswFnUKFyDdhXbmR2OEEKIbMaeC7jOAOUTvS4HnEu+kFLKB5gHdNdaX01tQ1rrOVrr+lrr+gm3ODwKfjn2C6ciTjG2zliclIyzIoQQIil7MsNOoJpSqrJSKhfQD1iVeAGlVAXgJ+BZrfWRzA8z57obd5fZ+2bjXcybluVamh2OEEKIbCjDbmqtdaxSaizwO+AMLNBaH1BKjYyfPxuYDBQFZsbfzxeb2gnqR9Hy0OWcv3Wed5u+63A3qQshhMgcdg36obVeA6xJNm12oufDgGGZG9p9yGa57nbsbeYEzaFeyXo0Kd3E7HCEEEJkU3ICMwv9ePhHrty+wljLWGkVixwpoUDBg9i1axfjx49Pc35YWBg//PCD3cvDf2UTfXx8aNmyJSdPnnzgODNLVpQ1TLBixQqUUrYiEACbN2+mS5cuSZYbPHgwy5YtAyAmJobXX3+datWq4eXlRcOGDfntN/tueImOjqZv3754eHjQqFEjwsLCUl0u4b5rT0/PJEOPJi7Y0apVK86cOWObd+rUKdq3b0+tWrWoXbu2bdvNmze3lcMsU6YMTz75pF2x5nQOkYxVBvdKm+FWzC3m759P0zJNqV9KeuzFo6t+/fp89dVXac5PnowzWj7Bpk2bCAoKolWrVkydOvWB49RaY7VaH3g7I0eOvKfBOu6Fv78/zZo1s40uZo+3336b8+fPExwcTHBwML/88kuqw1OmZv78+RQuXJijR4/y0ksvpVr16erVq0yYMIENGzZw4MABLl68yIYNGwB49dVXGThwIEFBQUyePJlJkybZ1hs4cCATJkzg4MGD7Nixw1aQIvGIXU2aNHngscFzCodIxgmyU0pefHAx16OvM9aSOQXUhcguAgMDady4MT4+PvTo0cM2ZvHOnTvx8fGhSZMmTJgwAS8vLyBpy+3PP/+0tXrq1KlDREQEr7/+On/99RcWi4XPP/88yfKRkZEMGTLE1gpOqAyVWOLSjpcvX+app56iQYMGtiEcE6a3a9eOunXrMmLECCpWrMiVK1cICwujVq1ajB49mrp163L69Gk+/fRTGjRogI+PD++88w4At27donPnzvj6+uLl5cWSJUsAeP3116lduzY+Pj68+uqrAEnKGqb1XrVq1YqJEyfSsGFDqlevblcxhcjISP7++2/mz59vdzKOiopi7ty5TJ8+3Ta2d8mSJenTp49d6//8888MGjQIgF69erFhw4YURRuOHz9O9erVbUVA2rZta/s7hYSE0KZNG8CorJUwKlhISAixsbG2cbDz5cuHm5tbku1GRESwcePGR6ZlLIUiskB4dDgLgxfSqnwrvIt7mx2OcAA3fjnG3XO3MnWbucq4U6hr1Xteb+DAgUyfPp2WLVsyefJk3nvvPb744guGDBnCnDlzaNq0Ka+/nmIIe8AoDjBjxgwee+wxIiMjyZMnDx999BHTpk3j119/BbAVUgBjKMmCBQuyf/9+gBTFCgDWrl1r+8J+4YUXeOmll2jWrBmnTp2iQ4cOHDx4kPfee4/HH3+cSZMmsXbtWubM+W/E3sOHD/PNN98wc+ZM1q1bR2hoKDt27EBrTbdu3diyZQuXL1+mTJkyrF69GjCqVl27do0VK1Zw6NAhlFK2Moj2vFcAsbGx7NixgzVr1vDee++xfv36dN/3lStX0rFjR6pXr06RIkXYs2dPkmFFU3P06FEqVKhAgQIFUp3ft29fWy3qxF5++WUGDhyYpHShi4sLBQsW5OrVq0kqZHl4eHDo0CHCwsIoV64cK1eu5O7duwD4+vqyfPlyXnjhBVasWEFERARXr17lyJEjFCpUiJ49e3LixAnatm3LRx99lKSy1IoVK2jTpk2asTsaScZZ4LuQ74iIiZBWsXA44eHh3Lhxg5Ytjdv0Bg0aRO/evblx4wYRERE0bdoUgKefftqWXBN77LHHePnll3nmmWfo2bMn5cqVS3d/69evT9IKLFy4sO1569atuXjxIiVKlLB1U69fv56QkBDbMjdv3iQiIoKtW7eyYsUKADp27JhkOxUrVqRx48YArFu3jnXr1tlqLUdGRhIaGkrz5s159dVXmThxIl26dKF58+bExsaSJ08ehg0bRufOnVOct03rvUqQ0P1ar169NM/FJubv78+LL74IQL9+/fD396du3boPVN4woYWfFntKFxYuXJhZs2bRt29fnJycaNq0KcePG7W3p02bxtixY1m4cCEtWrSgbNmyuLi4EBsby19//cXevXupUKECffv2ZeHChbZ62QnHO2yY+dcFPyySjDPZtTvXWBSyiA6VOlCjSA2zwxEO4n5asA9TRmPcJ3j99dfp3Lkza9asoXHjxhm2BrXWaSaVTZs24e7uzuDBg5k8eTKfffYZVquVbdu2pSiEkF58icsLaq2ZNGkSI0aMSLHc7t27WbNmDZMmTaJ9+/ZMnjyZHTt2sGHDBgICAvDz82Pjxo3pHk9iCd3GictMpuXq1ats3LiR4OBglFLExcWhlOKTTz5Jt7yhh4cHp06dIiIigvz586fYbkYt44TSheXKlSM2Npbw8HCKFCmSYvmuXbvStWtXAObMmWNr4ZYpU8ZWVzoyMpLly5dTsGBBypUrR506dahSpQoATz75JP/++68tGV+9epUdO3bYfkA9ChzqnHF28E3wN9yJu8No39FmhyJEpitYsCCFCxe2neP8/vvvadmyJYULFyZ//vz8+++/AGme0zx27Bje3t5MnDiR+vXrc+jQoRSlCRNr3749fn5+ttfJk07evHn54osv+O6777h27VqK5RPq+zZr1owff/wRMFq/qXV3A3To0IEFCxYQGRkJwNmzZ7l06RLnzp3Dzc2NAQMG8Oqrr7Jnzx4iIyMJDw+nU6dOfPHFFylqCaf1XqXn7NmztnOsiS1btoyBAwdy8uRJwsLCOH36NJUrV2br1q1Uq1aNc+fOcfDgQcC4gnnfvn1YLBbc3Nx47rnnGD9+vK3r+Pz58yxatAgwWsaplTdMuACtW7dufPvtt7YYHn/88VR/HF26dAkw/j4zZ860tWivXLliuyjuww8/ZOjQoYBREvH69etcvnwZgI0bN1K79n/FAJcuXUqXLl3IkydPuu+XI5GWcSa6HHUZ/0P+dKnShSqFqpgdjhAPLCoqKklX8ssvv8y3337LyJEjiYqKokqVKnzzzTeAceXt8OHDcXd3p1WrVhQsWDDF9r744gs2bdqEs7MztWvX5oknnsDJyQkXFxd8fX0ZPHiwrYsY4K233mLMmDF4eXnh7OzMO++8k+Lq2tKlS9O/f39mzJjBV199xZgxY/Dx8SE2NpYWLVowe/Zs3nnnHfr378+SJUto2bIlpUuXJn/+/Lakm6B9+/YcPHiQJk2McQHy5cvHokWLOHr0KBMmTMDJyQlXV1dmzZpFREQE3bt3586dO2it+fzzz1Mcb1rvVVrOnz+Pi0vKr2V/f/8U5+GfeuopfvjhB5o3b86iRYsYMmQId+7cwdXVlXnz5tne/6lTp/LWW29Ru3Zt8uTJg7u7O1OmTEk3jgTPPfcczz77LB4eHhQpUiTJjyyLxWL7AfLCCy/Y6idPnjyZ6tWrA8b5/0mTJqGUokWLFsyYMQMwegOmTZtGmzZt0FpTr149hg8fbtt2QEBAmtcdOKoMSyhmlcwsoThkzud8c+5dNpUZQ+vnzSuh+MH2D1h6eCmrnlxF+QLlM15BCAcSGRlpuy/5o48+4vz585lSmzgzREdH4+zsjIuLC9u2bWPUqFEpWrLZgZ+fHxUqVKBbt25mhyKyyIOUUBR2OBd5jqVHlvJktSclEYtH0urVq/nwww+JjY2lYsWKLFy40OyQbE6dOkWfPn2wWq3kypWLuXPnmh1SqsaOlYs+H1UOkYxVNrjDeE7QHBSKET4pL/wQ4lHQt29f+vbta3YYqapWrRp79+41Owwh0uRgF3CZM+TkqZunWHl0JX1q9KGUeylTYhBCCJFzOVgyNsesfbNwdXJlmPejc0+cEEKIzCPJ+AEdu3GM1cdX079Wf4rlLZbxCkIIIUQykowf0MzAmbi5ujHUc6jZoQghhMihJBk/gEPXDrHu5Dqerf0shfIUMjscITLd+++/j6enJz4+PlgsFrZv3867776bpPoOGINr1KpVCzBucRoxYgRVq1bF09OTFi1asH379gz3dfnyZVxdXfn666+TTE9exnHhwoVJrjr+7rvv8PLywtPTk9q1a9uKNNjjww8/xMPDgxo1avD777+nusy+ffto0qQJ3t7edO3alZs3b9rmBQUF0aRJEzw9PfH29ubOnTuAMeSmr68vnp6ejBw5kri4OAA+++wzW2GJNm3aZKvSj8JckowfwIy9MyiQqwDP1n7W7FCEyHTbtm3j119/Zc+ePQQFBbF+/XrKly9vGzwjsYCAAJ5++mkAhg0bRpEiRQgNDeXAgQMsXLiQK1euZLi/pUuX0rhxY/z9/e2O8bfffuOLL75g3bp1HDhwgD179qQ62EhqQkJCCAgI4MCBA6xdu5bRo0fbkmZiw4YN46OPPmL//v306NGDTz/9FDAKPQwYMIDZs2dz4MABNm/ejKurKwA//vgj+/btIzg4mMuXL7N06VIA6tSpw65duwgKCqJXr15Jav+KR5sk4/sUdDmIzWc2M8RrCAVyPRpVRcSj5fz58xQrVsw2hnKxYsUoU6YMNWrUoFChQklauz/++CP9+vXj2LFjbN++nalTp+LkZHy9VKlShc6dO2e4P39/f/7v//6PM2fO2EoiZuTDDz9k2rRplClTBoA8efIkGckpPT///DP9+vUjd+7cVK5cGQ8PD3bs2JFiucOHD9OiRQsA2rVrZysPuG7dOnx8fPD19QWgaNGitjGZEyoNxcbGcvfuXdsQkq1bt7aVCmzcuDFnzpyxK1bh+BziPmMz+O31o0ieIjxd82mzQxGPgN9++40LFy5k6jZLlSrFE088keb89u3bM2XKFKpXr07btm3p27evbWzl/v37ExAQQKNGjfj3338pWrQo1apVY9WqVVgsliSl8Oxx+vRpLly4QMOGDenTpw9Llizh5ZdfznC94OBg6tWrl+q8Tz/9lMWLF6eY3qJFC7766ivOnj1rq9YEUK5cuVR/BHh5ebFq1Sq6d+/O0qVLOX36NABHjhxBKUWHDh24fPky/fr1S9LS7dChAzt27OCJJ56gV69eKbY7f/78dN9/8WhxiJbxw767eOeFnWw7v43nvJ7DzdUt4xWEyIHy5cvH7t27mTNnDsWLF7eVuQOjhN+yZcuwWq0EBATQv3//B9pXQECAreB9QnnA9NhTHnDChAmpFkH46quvAPvKAwIsWLCAGTNmUK9ePSIiIsiVKxdgtHq3bt3K4sWLbSUaN2zYYFvv999/5/z580RHR6eo5rRo0SJ27drFhAkTMjwO8WhwsJZx1qdlrTV+e/0okbcEfWr0yfL9CQGY1oJydnamVatWtGrVCm9vb7799lsGDx5M+fLlqVSpEn/++SfLly9n27ZtAHh6erJv3z6sVqutm9oe/v7+XLx40daSPXfuHKGhoVSrVo28efNy9+5dWxJMKA+YsL/du3fz+OOPp9hmRi3jhPKACc6cOWPr7k6sZs2arFu3DjBaw6tXrwaMlnTLli1tsXTq1Ik9e/YkqbqUJ08eunXrxs8//0y7du0Ao+by+++/z59//mk7BSCEQ7SMH6Zt57ax59IehvsMJ4/Lo1PeSzx6Dh8+TGhoqO11YGAgFStWtL3u378/L730ElWrVrVVdqpatSr169fnnXfesbU8Q0ND+fnnnwFo06ZNiq7gw4cPc+vWLc6ePUtYWBhhYWFMmjTJViGoZcuWtpJ/t2/f5scff6R169YATJo0iddee83WhR8dHW1r+WbUMu7WrRsBAQFER0dz4sQJQkNDadiwYYr3IaE8oNVqZerUqYwcORIwuqGDgoKIiooiNjaWP//8k9q1axMZGcn58+cBo/W8Zs0aatasCcDevXsZMWIEq1atokSJEvfxVxGOSpLxPdBaM33vdMq4l6FntZ4ZryBEDhYZGcmgQYNst+KEhITw7rvv2ub37t2bAwcO0K9fvyTrzZs3jwsXLuDh4YG3tzfDhw+nTJkyWK1Wjh49mqI4vb+/Pz169Egy7amnnrJ1VX/55Zf89NNPWCwWGjduTO/evW0XVHXq1IkxY8bQtm1bPD09qVevHrGxsXYdn6enJ3369KF27dp07NiRGTNm2M51Dxs2jISqcv7+/lSvXp2aNWtSpkwZhgwZAkDhwoV5+eWXadCgARaLhbp169K5c2du3bpFt27dbBd3lShRwpbAJ0yYQGRkJL1798ZisUh1JmHjECUUn5vzGfPPvcemMuNo/fzUTNlmajad2sT4TeOZ0nQKPar1yHgFIYRNcHAwCxYs4LPPPjM7FCFMk1YJRWkZ28mqrfgF+lEhfwW6Vu1qdjhC5DheXl6SiIVIg2Ml4yy8fmvdyXUcuX6E0ZbRuDg52HVvQgghTOVYyTiLxFpjmbF3Bh6FPOhYqaPZ4QghhHAwkoztsObEGsJuhjHGMgZnp3sbzEAIIYTIiCTjDMRYY5gZOJNaRWrRpkKbjFcQQggh7pFDJGNF1l0RvvLoSs5GnmVsnbF2jfojhBBC3CuHSMb/ydxkGR0Xzdf7vsa3uC/NyzbP1G0LkRMkLl+4Zs0aqlWrxqlTp3j33Xdxc3OzDYiRfNm0dOrUiRs3bqS7TKtWrUjttsfkpRMz07Rp06hZsyZeXl74+vry3XffpRvL/di1axfjx48HjMFJ2rZti8ViYcmSJQwbNoyQkJBM2U+C7t2706RJkyTTBg8ezLJly5JMS/x3O3LkCJ06dcLDw4NatWrRp08fLl68aNf+du/ejbe3Nx4eHowfPz7V4Ubv3r3LkCFD8Pb2xtfXl82bN9vmLVmyBB8fHzw9PZOM8X3q1Clat25NnTp18PHxYc2aNbZ5EydOxMvLCy8vrxSVxHIaB0vGmWvZkWVcjLrIuDrjpFUsHmkbNmxg3LhxrF27lgoVKgBGFaf/+7//u6ftrFmzhkKFCmVBhOnTWmO1WlOdN3v2bP744w927NhBcHAwW7ZsSTWRPKj69evbRv/au3cvMTExBAYG0rdvX+bNm0ft2rXt3lZqpR4Tu3HjBnv27OHGjRucOHHCrm3euXOHzp07M2rUKI4ePcrBgwcZNWoUly9ftmv9UaNGMWfOHEJDQwkNDWXt2rUplpk7dy4A+/fv548//uCVV17BarVy9epVJkyYwIYNGzhw4AAXL160jfM9depU+vTpw969ewkICGD06NEArF69mj179hAYGMj27dv59NNPk9SazmkkGachKiaKuUFzaViqIY1KNzI7HCFM89dffzF8+HBWr15N1apVbdOHDh3KkiVLuHbtWop1Fi1aRMOGDbFYLIwYMcKWPCpVqmSrbfy///2PmjVr0q5dO/r378+0adNs6y9dupSGDRtSvXp1/vrrL9v006dP07FjR2rUqMF7771nm/7ZZ5/ZWkhffPEFAGFhYdSqVYvRo0dTt25dTp8+zeDBg/Hy8sLb25vPP/8cgA8++ICZM2fayh4WLFiQQYMGpTimUaNGUb9+fTw9PXnnnXds019//XXbKGWvvvqqLf6EVnbCaGGbN2+mS5cuXLp0iQEDBhAYGIjFYuHYsWNJWuDr1q2jSZMm1K1bl969exMZGWl776ZMmUKzZs1s9ZHTsnz5crp27Uq/fv1sw4pm5IcffqBJkyZ07frfOAqtW7fGy8srw3XPnz/PzZs3adKkCUopBg4cyMqVK1MsFxISYhu7u0SJEhQqVIhdu3Zx/PhxqlevTvHixQFo27atrVSlUsqWZMPDw23jh4eEhNCyZUtcXFxwd3fH19c31R8AOYXcMJuGgMMBXL1zlc/rfG52KEJw5Mj/iIg8mKnbzJ+vFtWrv53uMtHR0XTv3p3NmzfbxldOkC9fPoYOHcqXX36ZJDEePHiQJUuW8Pfff+Pq6sro0aNZvHgxAwcOtC2za9culi9fzt69e4mNjaVu3bpJSiHGxsayY8cO1qxZw3vvvcf69esBbK1XNzc3GjRoQOfOnVFK8c0337B9+3a01jRq1IiWLVtSuHBhDh8+zDfffMPMmTPZvXs3Z8+eJTg4GDBajxEREURERCT5kZGW999/nyJFihAXF0ebNm0ICgqiXLlyrFixgkOHDqGUsnXBT5kyhd9//52yZcum6JYvUaIE8+bNY9q0afz6669J5l25coWpU6eyfv163N3d+fjjj/nss8+YPHkyYBSe2Lp1a4ax+vv7884771CyZEl69erFpEmTMlwnvXKUhw8fpm/fvqnO27x5M2fPnrWNTw5pl6P09fW11ZE+ffo0u3fv5vTp0zz++OMcOnSIsLAwypUrx8qVK7l79y4A7777Lu3bt2f69OncunXL9lnw9fXlvffe4+WXXyYqKopNmzbdU+9CdiPJOBWRdyNZELyAZmWbUadEHbPDEcI0rq6uNG3alPnz5/Pll1+mmD9+/HgsFguvvPKKbdqGDRvYvXs3DRo0AIziDsmLImzdupXu3buTN29egCStMYCePY2x3+vVq0dYWJhtert27ShatKhtma1bt6KUokePHri7u9um//XXX3Tr1o2KFSvaahZXqVKF48ePM27cODp37kz79u2JjIy0+xTUjz/+yJw5c4iNjeX8+fOEhIRQu3Zt8uTJw7Bhw+jcuTNdunQB4LHHHmPw4MH06dPHdiz2+PfffwkJCeGxxx4DjHOsic/7ppUQE7t48SJHjx6lWbNmKKVwcXEhODgYLy+vVI/VnuOvUaMGgYGBac63txzl0KFDOXjwIPXr16dixYo0bdoUFxcXChcuzKxZs+jbty9OTk40bdqU48ePA8YPi8GDB/PKK6+wbds2nn32WYKDg2nfvj07d+6kadOmFC9enCZNmuDiknNTWs6NPAt9f/B7wqPDGVsnay4WEeJeZdSCzSpOTk78+OOPtG3blg8++IA33njj/9u787iq6vyP46+vgKYiuDclISCuGFwRcidzwR2nqVwTNZc0nXSmKM1fZuSopVPOiIooDuWCmhpupTRmuZRYmCgqChqay7jLIoIC5/fHlRPbhYsiV/DzfDx4POTec8/93C/g537POff7znN/zZo1GTJkCIsWLdJv0zSN4cOHM3v2bJP7Le6cbE60oJWVVZ7gh/z/wSulitxXToMGY7BDTEwMO3bsYOHChaxbt47ly5dTvXp1Tp8+jYuLi8n9/Pbbb8ybN4+ff/6ZWrVqMWLECNLT07G2tubAgQPs3LmTNWvWEBQUxHfffUdwcDBRUVFs27YNg8FQZCPLTdM0unfvbjLPOffrMWXt2rXcuHEDZ2dnAJKTk1mzZg0zZ86kTp063LhxQ982fxzlDz/8UOg+i5sZOzg4cO7cOf02U3GU1tbW+ukBgPbt29O4cWPA+IYs501ZSEiIHtoRGhqqH35u164d6enpXL16lfr16zNt2jSmTZsGwJAhQ/R9lUdyzjifpIwkvjj6BV0du+JWx83S5QhhcdWqVWPr1q2sWrWK0NDQAvf//e9/Z8mSJXrT7Nq1K+vXr9evtL5+/TpnzpzJ85iOHTuyZcsW0tPTSU1N1TOCi/Ptt99y/fp1bt++TUREBB06dMDHx4eIiAjS0tK4desWX331FZ06Ffz0w9WrV8nOzuall17io48+4uDBg4AxhnHChAn6ecnk5GRCQkLyPDY5OZnq1atjb2/PpUuX+OabbwBjslVSUhK9e/dm/vz5etM9deoUbdq0ITAwkLp16+bJTS5K27Zt2bdvHwkJCQCkpaVx8uTJQrcNCgoiKCiowO3h4eFs375dj6OMjo7Wzxt37tyZtWvX6oeAw8LC9DjKIUOG8OOPP+b5WWzfvp0jR47oM+PCvmrWrMlTTz1FjRo12L9/P5qm8cUXX9C/f/8CteX8jMD4s7S2ttYPLef8vty4cYNFixYxevRoABwdHfWLuY4fP056ejr16tUjKyuLa9euAXD48GEOHz6Mr6+vWeP8KJKZcT5hR8O4dfcWbxjesHQpQjwyateuzfbt2/Hx8dFnUjnq1q3Liy++qM94WrRowcyZM/H19SU7OxsbGxsWLlyYJwvZ29sbPz8/PDw8aNiwIV5eXtjb2xdbR8eOHRk2bBgJCQkMGTIELy9j+M2IESP0LOLRo0fTqlWrPIe3Ac6fP8/IkSP1q6pzZu7jx48nNTUVb29vbGxssLGxyXPYHYznJ1u1aoWbmxsuLi76YeSUlBT69+9Peno6mqbpYxAQEEB8fDyaptG1a1c8PDxMzjpzq1evHmFhYQwePJiMjAzAeDVxkyZNCmwbFxen15EjMTGRs2fP6ofmAZydnbGzsyMqKoq+ffsSHR1N69atsbKyolGjRgQHBwNQtWpVtm7dyuTJk5k8eTI2Nja4u7sXenqiMIsXL2bEiBHcvn2bXr160atXLwA2b97ML7/8QmBgIJcvX6ZHjx5UqlSJBg0asGLFCv3xkyZNIiYmBoDp06frr/mf//wnY8aM4bPPPkMpRVhYGEop7t69q7/psrOzY+XKleX6MHWFiFAcHTKPZRc+YleDN3lhzEf3vZ9rt6/Ra2MvOj/TmU98PimV2oQQhUtNTcXW1pa0tDR8fHwICQnB09PT0mWVG3379mXjxo1UrlzZ0qWIEjAVoVh+30YU6sE+CxwaG0pGVgZveMisWIiHbezYsRw7doz09HSGDx8ujbiE8l+JLcq3CtaM79+lW5dYG7cWv0Z+ONk7WbocISq81atXW7oEIR4ZcgHXPUuPLCWbbMZ5jLN0KUIIIR4z0oyBcynn2BC/gZcav0QD2waWLkcIIcRjRpoxsOTwEipRiTHPjrF0KUIIIR5Dj30zTkxKZPOpzQxsNpAnqz9p6XKEEEI8hh77Zrw4ZjFVrKowquUoS5cixCPHysoKg8Ggf82ZM6fI7WfNmlXi53jxxRcxGAy4urpib2+vP9ePP/54v2WblJqayuuvv06jRo1wc3PDx8eHqKgowLwISHMFBwfrMYxxcXEYDAZatWrFqVOnaN++fak9DxjX8a5bt26B9adzh3LAH0EVOb755hu8vLxo3rw5zZo100MuzNGzZ09q1qyZZ3/5ZWRkMHDgQFxdXWnTpk2Bz32LvB7rq6njb8TzzW/fMOrZUdSpWsfS5QjxyKlatarZSzkChS6ZCcZlHjVNo1Klgu//v/rqK8DYLAoLT8jMzCy1xRxGjx6Ns7Mz8fHxVKpUidOnT3P8eOkGcACMG/fHhaARERH0799fD9MoyZuMosYtR2RkJE2bNmXdunXMmjXLrLWmY2NjmThxItu2baNZs2ZkZmYWWHWsKAEBAaSlpbFkyRKT24SGhlKrVi0SEhJYs2YN7777brnPHH6YKsTM+H4/Xbzo0CKq21RnhNuI0ixHiAotKSmJpk2bcuLECQAGDx7M0qVLmTJlCrdv38ZgMDB06NBCIwxNxRDmFxYWxiuvvEK/fv3w9fXl1q1bvPbaa3h7e9OqVSs2bdoEGHN9AwIC8Pb2xt3dXW8OFy9exMfHB4PBQMuWLdmzZw+nTp0iKiqKmTNn6s3NxcWFPn365Hnu1NRUunbtiqenJ88++6z+XLdu3aJPnz54eHjkCbMvLEJxxowZzJs3j6+//pr58+ezbNkyfdnJ3DPwuXPn6rXnjEdh41aU8PBwJk2ahKOjI/v37zfjJwiffPIJ06ZN05O4rK2t9Zxgc3Tt2pUaNWoUuc2mTZv0KMqXX36ZnTt3PpSc6IqiQs2MS/JjPnrtKP89+1/eMLyBfZXil+ETwpLejz9HbOrtUt1nS9uqfNTYochtcpprjqlTpzJw4ECCgoIYMWIEkyZN4saNG4wZY7z4MSgoSJ9JJyYm5okwhMJjCN3d3Qt97p9++onDhw9Tu3Zt3nvvPbp06cLy5cu5efMmzz33HN26dWPVqlXY29vz888/k5GRQYcOHfD19WXjxo306NGDadOmkZWVpUfsGQwGPYDAlCeeeIKvvvoKOzs7rl69Stu2bfHz82P79u08/fTT+trNSUlJXL9+vdAIxRy9e/dm3Lhx2NraFjgMHBkZSXx8PAcOHEDTNPz8/Ni9ezeOjo4Fxq2on8/OnTtZsmQJN2/eJDw8PE/KkymxsbEFlvzMsWrVKubOnVvgdldXV9avX1/svnOcP3+eZ555BjA2e3t7e65du1ZgOVVhVKGacUks/HUh9lXsGdZ8mKVLEeKRZeowdffu3fnyyy+ZMGGCvp5wYXJHGELhMYSmmnH37t2pXbs2YGxcmzdvZt68eQCkp6dz9uxZIiMjOXz4sN4kkpKSiI+Px9vbm9dee427d+/y5z//Oc8biuJomsZ7773H7t27qVSpEufPn+fSpUs8++yzvP3227z77rv07duXTp06kZmZWWiEojkiIyOJjIykVStjTGtqairx8fE4OjoWGDdTtm7dygsvvEC1atX0AIzPPvsMKyur+45LHDp0KEOHDjX7dZhibqyiMHosm/Ghy4fYc34Pf2v9N2wrl95FG0I8LMXNYMtadnY2x48fp2rVqly/fj1PsHxuuSP/TMUQmpL7sZqmsWHDBpo2bZpnG03TWLBgAT169Cjw+N27d7Nt2zaGDRtGQEAAHTp0ICYmhuzs7CLPwa5atYorV64QHR2NjY0NTk5OpKen06RJE6Kjo/n666+ZOnUqvr6+TJ8+vdAIRXNomsbUqVN5/fXX89yemJhoVlQiGA9R79u3DycnJwCuXbvGrl276Natmx6XmDMTzR+XGB0djYeHR6GvvzRmxg4ODvz+++84ODiQmZlJUlKS/uZKFFQhzhmXVNCvQdR5og6Dmg6ydClClEufffYZzZs3Jzw8XJ+BAtjY2Oj/zs9UDKE5evTowYIFC/TZ1q+//qrfvnjxYv05T548ya1btzhz5gz169dnzJgxjBo1ioMHD9KoUSO8vLz44IMP9P3Ex8fr54RzJCUlUb9+fWxsbNi1a5ce/3jhwgWqVavGq6++yttvv83BgwdNRiia+5qWL19OamoqYDysmxMjmJ+/vz8HDhzIc1tycjJ79+7l7NmzelziwoUL9Szkzp0766lIWVlZrFy5Uj9vHRAQwKxZs/R4xuzsbD799FPAODMuLCqxJI0YwM/Pj88//xyA9evX06VLF5kZF+GxmxlHXYwi6n9RTHluCtVsqlm6HCEeafnPGffs2ZPXXnuNZcuWceDAAWrUqIGPjw8zZ87kww8/ZOzYsbi7u+Pp6ck//vGPPPsyFUNojvfff5/Jkyfj7u6Opmk4OTmxdetWRo8eTWJiIp6enmiaRr169YiIiOD7779n7ty52NjYYGtrq3/MaNmyZbz11lu4urpSrVo16tSpU2AWOHToUPr164eXlxcGg0G/yOnIkSMEBARQqVIlbGxsWLx4sckIRXP4+vpy/Phx/Ryvra0tK1euLPSc9uHDh3nqqafy3LZx40a6dOlClSpV9Nv69+/PO++8Q0ZGBu+//z7jx4/Hw8MDTdPo2bMnr776KgDu7u7Mnz+fwYMHk5aWhlKqwIVsRenUqRNxcXGkpqbi4OBAaGgoPXr0YPr06Xh5eeHn58eoUaMYNmwYrq6u1K5dW89UFoWrEBGKY0LmsfTCR3zXYBJdxgSa3E7TNPy/8efirYts+8s2qlhVMbmtEEI8CpKTkxk1ahRffvmlpUsRpcBUhOJjdZh67/m9HLpyiLHuY6URCyHKBTs7O2nEj4HHphlrmsaCXxfQwLYBL7q+aOlyhBBCCF2FaMbKjE8Yf3f2O45fP854j/HYWNmUQVVCCCGEeSpEM/5D4VfqZWVnEXQoCCc7J/q4mH+RghBCCFEWKlgzLtyOxB0k3ExggmEC1pUeuwvIhRBCPOIqfDPOzM5kUcwiGtdqjK+Tr6XLEUIIIQqo8M14y6ktnEk+w0TDRCqpCv9yhShVORGKHh4eeHp6PpRYw19++YU333zzgfYxb948mjVrRsuWLfHw8NA/V9y5c2dK6yOUuevMyMigW7duGAwG1q5dy+jRozl27FipPE+O/v37F1hnesSIEQUW38gdPHHy5El69+6Nq6srzZs3Z8CAAVy6dMms55s2bRrPPPNMsVGSs2fPxtXVlaZNm7Jjxw4zX40oToU+Zns36y7BMcG41XHjhWdesHQ5QpQ7udem3rFjB1OnTuWHH34o1efw8vLCy6vAxy7NFhwczLfffsuBAwews7MjKSmJiIiI0ivwntx1/vrrr9y9e1cfm4EDB5ZoX1lZWUUGVty8eZODBw9ia2vLb7/9hrOzc7H7TE9Pp0+fPnz66af069cPgF27dnHlyhWefPLJYh/fr18/Jk6cSOPGjU1uc+zYMdasWcPRo0e5cOEC3bp14+TJk8WGb4jiVeip4sb4jVy4dYG/tvqrLMMmxANKTk6mVq1agOmYQYCPPvqIZs2a0b17dwYPHqyHO/z888+4u7vTrl07AgICaNmyJZA39H7GjBm89tprdO7cGRcXF/79738Xu99Zs2axaNEi7OzsALC3t9ej+3IzFd9YWATil19+qc+yfXx88tR5+fJlXn31VQ4dOoTBYODUqVN5ZuCRkZG0a9cOT09PXnnlFX25SycnJwIDA+nYsWOxnxvesGED/fr1Y9CgQWavXLV69WratWunN2KAF154QR/n4rRt27bAKl/5bdq0iUGDBlGlShWcnZ1xdXUtsEynuD8VdmacnplOyOEQPOt70v7p9pYuR4gH8uGWoxy7kFyq+2zxtB0f9HMrcpuc5TDT09O5ePGiHoJgKmYwOjqaDRs28Ouvv5KZmYmnpyetW7cGYOTIkYSEhNC+fXumTJli8jnj4uLYtWsXKSkpNG3alPHjxxMTE1PoflNSUkhJSaFRo0bFvt7C4hsdHBwKjUAMDAxkx44dNGjQoEAsYv369Vm2bBnz5s1j69atee67evUqM2fO5L///S/Vq1fn448/5tNPP2X69On6uO3du7fYWsPDw/nggw948sknefnll5k6dWqxj4mNjdXHOr8TJ06YnL1///331KxZs9j9g3H97NxpUg4ODpw/f96sx4qiVdhmvO7EOi7fvswcnzkyKxbiPuU+TP3TTz/h7+9PbGysyZjBvXv30r9/f6pWrQqgz9Ju3rxJSkoK7dsb3xgPGTKkQCPL0adPH6pUqUKVKlWoX79+kfvVNM3sv+/C4htbtGhRaARihw4dGDFiBAMGDOAvf/mL2eO1f/9+jh07pq+7fefOnTznfc05nH3p0iUSEhLo2LEjSimsra2JjY2lZcuW9x2L2LRp0xKFWJgisYgPT4VoxirfL0ja3TRCY0Np+1RbvP/kbaGqhCg9xc1gy0K7du24evUqV65c4euvvy40ZtDUWvclWQM/d/CBlZUVmZmZJh9vZ2dH9erVOX36NC4uLib3aSq+0drautAIxODgYKKioti2bRsGg8HsRqZpGt27d9eTk/IzJxpx7dq13LhxQz9PnJyczJo1a5g5c6Yei5gjfyyiqfP5pTUzzolFzHHu3Dmefvppsx4rilaxzhnfe4O2Om4119OvM7HVRMvWI0QFEhcXR1ZWFnXq1DEZM9ixY0e2bNlCeno6qampbNu2DYBatWpRo0YN9u/fD1DiBB9T+wWYOnUqEyZMIDnZeBg/OTmZkJCQPI83Fd9oKgLx1KlTtGnThsDAQOrWrZunARWlbdu27Nu3j4SEBADS0tL0mML8goKCCAoKKnB7eHg427dv12MRo6Oj9fHq3Lkza9eu5c6dOwCEhYXpsYhDhgzhxx9/zDM227dv58iRI/rMuLAvcxsxGGMR16xZQ0ZGBr/99hvx8fE899xzZj9emFYhZsa5Jd9JZnnscp53eB6PegWDs4UQ5ssdoahpGp9//jlWVlYmYwa9vb3x8/PDw8ODhg0b4uXlhb29PQChoaGMGTOG6tWr07lzZ/12cxS13/Hjx5Oamoq3tzc2NjbY2Njw1ltv5Xm8qfhGUxGIAQEBxMfHo2kaXbt2xcPDw6yryOvVq0dYWBiDBw8mIyMDgJkzZ9KkSZMC28bFxRWIkUxMTOTs2bN5zss6OztjZ2dHVFQUffv2JTo6mtatW2NlZUWjRo0IDg4GjKcUtm7dyuTJk5k8eTI2Nja4u7vzr3/9y6wxfuedd1i9ejVpaWk4ODgwevRoZsyYwebNm/nll18IDAzEzc2NAQMG0KJFC6ytrVm4cKFcSV1KKkSE4utLPmHJxX/wncNkjnvVJzgmmHV919G8TvNS2b8QwnypqanY2tqSlpaGj48PISEheHp66rcDzJkzh4sXL5rdKIrab3nVt29fNm7cSOXKlS1diihDpiIUK9TM+G52FiuOraB7w+7SiIWwkLFjx3Ls2DHS09MZPny43jC3bdvG7NmzyczMpGHDhoSFhZXKfssrUxewiceTWc1YKdUT+BdgBSzTNG1OvvvVvft7A2nACE3TDpZyrcX6362LpFVJY4JhQlk/tRDintWrVxd6+8CBA0u8OIY5+xWiIij2Ai6llBWwEOgFtAAGK6Va5NusF9D43tdYYHEp12mWy2mX6ePSh0Y1i//MoRBCCPGoMOdq6ueABE3TTmuadgdYA/TPt01/4AvNaD9QUylV9FIuD0E2GuM9xpf10wohhBAPxJxm3ADIfV3/uXu3lXSbh67eE3VwtHMs66cVQgghHog5zbiw5VXyX4JtzjYopcYqpX5RSv1y5coVc+ozS5UsjRjlgmN9uWhLCCFE+WNOMz4HPJPrewfgwn1sg6ZpIZqmeWma5lWvXr2S1mrSv994F/fpB+k2zPR6t0KIkiuLCEVz5Q6UCAsLY+JE46I+wcHBemTiiBEjaNCggf4Z36tXr+Lk5AQYP8NbtWpV/fW0b9+eEydOmPXcX331FUop4uLiCq0nR+6Iw7t37zJlyhQaN25My5Ytee655/TFRoqTkZHBwIEDcXV1pU2bNiQmJha63dq1a3F3d8fNzY133nknz33r1q2jRYsWuLm5MWTIkDz3JScn06BBA30Mc2p3dnbGYDCUaNUxUTrMacY/A42VUs5KqcrAIGBzvm02A/7KqC2QpGnaxVKutUiyPqoQpS9nbeqYmBhmz55tVmBBDk3TyM7OfojVGY0bNw5/f3/9eysrK5YvX17oto0aNdJfz/Dhw5k1a5ZZzxEeHk7Hjh1LtHLY+++/z8WLF4mNjSU2NpYtW7aQkpJi1mNDQ0OpVasWCQkJ/O1vf+Pdd98tsM21a9cICAhg586dHD16lEuXLrFz504A4uPjmT17Nvv27ePo0aPMnz+/QG3PP/98gX3OnTtXX5krZ7EXUTaKbcaapmUCE4EdwHFgnaZpR5VS45RS4+5t9jVwGkgAlgJvPKR6hRAWkjtCEYz/cXt7e+Pu7q5HEiYmJtK8eXPeeOMNPD092bNnD82bN2fMmDG4ubnh6+vL7du3ATh06BBt27bF3d2dF198UV9zOXccYe6ZrSkzZszQ4xQBJk+ezGeffUZmZmaJXo8pqamp7Nu3j9DQULObcVpaGkuXLmXBggX6WttPPvkkAwYMMOvxmzZt0mMgX375ZXbu3Flgfe7Tp0/TpEkTco4yduvWjQ0bNgCwdOlSJkyYoL+++vXr64+Ljo7m0qVL+Pr6mlWLKBtmfc5Y07SvMTbc3LcF5/q3BsiHe4V4WL6ZAv87Urr7/NOz0GtOkZuYilCMjIwkPj6eAwcOoGkafn5+7N69G0dHR06cOMF//vMfFi1aRGJiIvHx8YSHh7N06VIGDBjAhg0bePXVV/H392fBggU8//zzTJ8+nQ8//LDADO5+ODo60rFjR1asWJEn2xeMa04bDAZSUlJIS0sjKiqq2P1FRETQs2dPmjRpQu3atTl48GCxC44kJCTg6OioZyznN3DgwEIPkf/973/H39+f8+fP88wzxjN/1tbW2Nvbc+3aNT0UAsDV1ZW4uDgSExNxcHAgIiJCX7M6Zz3sDh06kJWVxYwZM+jZsyfZ2dm89dZbrFixQp9F5zZt2jQCAwPp2rUrc+bMyRPaIR6uCrUClxCidJmKUIyMjCQyMpJWrVoBxtljfHw8jo6ONGzYsMDayjmHPFu3bk1iYiJJSUncvHlTP1Q6fPhwXnnllVKr+7333sPPz48+ffrkuT3nMDUYz7eOHTuW7du3F7mv8PBwJk+eDMCgQYMIDw/H09PT5Kkxc06ZrV27tsj7zYkqrFWrFosXL2bgwIFUqlSJ9u3bc/r0aQAyMzOJj4/n+++/59y5c3Tq1InY2FhWrlxJ79699Uaf2+zZs/nTn/7EnTt3GDt2LB9//LGewywePmnGQpQHxcxgy0LuCEVN05g6dSqvv/56nm0SExMLxATmj0TMOUxtirW1tX6uOT09/b5qdXV1xWAwsG7dOpPb+Pn5MXLkyCL3c+3aNb777jtiY2NRSpGVlYVSik8++aRAnCH8EWno6urK2bNnSUlJoUaNGgX2W9zMOCeq0MHBgczMTJKSkqhdu3aB7fv166fP/kNCQvTQBgcHB9q2bYuNjQ3Ozs40bdqU+Ph4fvrpJ/bs2cOiRYtITU3lzp072NraMmfOHJ56yrg0RJUqVRg5cmSeQ//i4atYEYpCiIcmd4Rijx49WL58OampqQCcP3+ey5cvm70ve3t7atWqxZ49ewBYsWKFPkt2cnIiOjoaQL8y+X5MmzatyIayd+9eGjUyrtZ3/vx5unbtWmCb9evX4+/vz5kzZ0hMTOT333/H2dmZvXv30rhxYy5cuMDx48cBOHPmDDExMRgMBqpVq8aoUaN488039UPHFy9eZOXKlYBxZlxYnGHOhWh+fn58/vnneg1dunQpdMadM+Y3btxg0aJFjB49GoA///nP7Nq1CzCedz958iQuLi6sWrWKs2fPkpiYyLx58/D392fOnDl6fWCclUdERNCyZcuSDLd4QDIzFkKYZCpC0dfXl+PHj9OuXTsAbG1tWblyZYni9D7//HPGjRtHWloaLi4u/Oc//wHg7bffZsCAAaxYsYIuXbrcd+1ubm54enpy8OAfy+TnnDPWNI3KlSuzbNkywNiIrK0L/ncYHh7OlCl5PzL50ksvsXr1ajp16sTKlSsZOXIk6enp2NjYsGzZMj3acebMmfzf//0fLVq04IknnqB69eoEBgaaVfuoUaMYNmwYrq6u1K5dO8+FY7k/djRp0iRiYmIAmD59uh7V2KNHDyIjI2nRogVWVlbMnTuXOnXqFPmcQ4cO1Y96GAwGPZpRlI0KEaEohBAPIigoCEdHR/z8/CxdiqjgHosIRSGEuB+5F78QwhLknLEQQghhYdKMhRBCCAuTZiyEEEJYmDRjIYQQwsKkGQshhBAWJs1YCCGEsDBpxkIIIYSFSTMWQgghLEyasRBCCGFhFlsOUyl1BThTirusC1wtxf09rmQcH5yM4YOTMXxwMoYP7mGMYUNN0+rlv9Fizbi0KaV+KWy9T1EyMo4PTsbwwckYPjgZwwdXlmMoh6mFEEIIC5NmLIQQQlhYRWrGIZYuoIKQcXxwMoYPTsbwwckYPrgyG8MKc85YCCGEKK8q0sxYCCGEKJfKXTNWSvVUSp1QSiUopaYUcr9SSv373v2HlVKelqjzUWbGGA69N3aHlVI/KqU8LFHno6y4Mcy1nbdSKksp9XJZ1ldemDOOSqnOSqlDSqmjSqkfyrrGR50Zf8/2SqktSqmYe2M40hJ1PqqUUsuVUpeVUrEm7i+bnqJpWrn5AqyAU4ALUBmIAVrk26Y38A2ggLZAlKXrfpS+zBzD9kCte//uJWNY8jHMtd13wNfAy5au+1H7MvN3sSZwDHC89319S9f9KH2ZOYbvAR/f+3c94DpQ2dK1PypfgA/gCcSauL9Mekp5mxk/ByRomnZa07Q7wBqgf75t+gNfaEb7gZpKqafKutBHWLFjqGnaj5qm3bj37X7AoYxrfNSZ83sI8FdgA3C5LIsrR8wZxyHARk3TzgJomiZjmZc5Y6gBNZRSCrDF2Iwzy7bMR5emabsxjokpZdJTylszbgD8nuv7c/duK+k2j7OSjs8ojO8KxR+KHUOlVAPgRSC4DOsqb8z5XWwC1FJKfa+UilZK+ZdZdeWDOWMYBDQHLgBHgEmapmWXTXkVQpn0FOvS3uFDpgq5Lf/l4OZs8zgze3yUUi9gbMYdH2pF5Y85YzgfeFfTtCzjhEQUwpxxtAZaA12BqsBPSqn9mqadfNjFlRPmjGEP4BDQBWgEfKuU2qNpWvJDrq2iKJOeUt6a8TngmVzfO2B8t1fSbR5nZo2PUsodWAb00jTtWhnVVl6YM4ZewJp7jbgu0FsplalpWkSZVFg+mPv3fFXTtFvALaXUbsADkGZsZM4YjgTmaMYToAlKqd+AZsCBsimx3CuTnlLeDlP/DDRWSjkrpSoDg4DN+bbZDPjfuwKuLZCkadrFsi70EVbsGCqlHIGNwDCZgRSq2DHUNM1Z0zQnTdOcgPXAG9KICzDn73kT0EkpZa2Uqga0AY6XcZ2PMnPG8CzGIwsopZ4EmgKny7TK8q1Mekq5mhlrmpaplJoI7MB4FeFyTdOOKqXG3bs/GOOVq72BBCAN47tCcY+ZYzgdqAMsujezy9RkwXmdmWMoimHOOGqadlwptR04DGQDyzRNK/QjKI8jM38XPwLClFJHMB5yfVfTNElzukcpFQ50Buoqpc4BHwA2ULY9RVbgEkIIISysvB2mFkIIISocacZCCCGEhUkzFkIIISxMmrEQQghhYdKMhRBCCAuTZiyEEEJYmDRjIYQQwsKkGQshhBAW9v9K53lgyAE4fAAAAABJRU5ErkJggg==", + "text/plain": [ + "<Figure size 576x576 with 1 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "roc_curve_plot(models, X_train, y_train, X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cross Validation" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score, StratifiedKFold" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "def cross_validate_calculate_scores(models, X, y, scoring):\n", + " results = {}\n", + " skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)\n", + " for model in models:\n", + " model_name = model.__class__.__name__\n", + " for score, score_name in scoring.items(): \n", + " scores_result = cross_val_score(model, X, y, cv=skf, scoring=score)\n", + " results[(model_name, score_name)] = scores_result\n", + " \n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "scoring = {'roc_auc': 'roc_auc', 'f1': 'f1', 'accuracy': 'accuracy', 'precision': 'precision', 'recall': 'recall'}\n", + "cross_val_scores = cross_validate_calculate_scores(models, X_train, y_train, scoring)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "def plot_cross_validate_scores(cross_val_scores, scoring):\n", + " sns.set_palette(sns.color_palette('hls', 8))\n", + " num_metrics = len(scoring)\n", + " fig, axes = plt.subplots(num_metrics, 1, figsize=(10, 3 * num_metrics), sharex=True, sharey=False)\n", + "\n", + " scores_by_metric = {}\n", + " for (model_name, metric_name), scores in cross_val_scores.items():\n", + " if metric_name not in scores_by_metric:\n", + " scores_by_metric[metric_name] = {}\n", + " scores_by_metric[metric_name][model_name] = scores\n", + "\n", + " for i, (metric_name, scores) in enumerate(scores_by_metric.items()):\n", + " min_score = min(min(scores) for scores in scores_by_metric[metric_name].values())\n", + " max_score = max(max(scores) for scores in scores_by_metric[metric_name].values())\n", + " sns.boxplot(data=list(scores.values()), ax=axes[i])\n", + " axes[i].set_ylim(min_score - 0.01, max_score + 0.01)\n", + " axes[i].set_xticklabels(scores.keys(), rotation=45, fontsize=10)\n", + " axes[i].set_title(metric_name)\n", + " axes[i].set_ylabel('Score')\n", + " fig.suptitle('Cross-validation scores')\n", + " plt.tight_layout()\n", + " plt.show()\n", + " return scores_by_metric\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "<Figure size 720x1080 with 5 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "scores_by_metric = plot_cross_validate_scores(cross_val_scores, scoring)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model Metric Fold 1 Fold 2 Fold 3 Fold 4 Fold 5 Mean\n", + "-------------------------- --------- -------- -------- -------- -------- -------- --------\n", + "DecisionTreeClassifier roc_auc 0.979873 0.981687 0.971879 0.982433 0.977671 0.978709\n", + "MLPClassifier roc_auc 0.998912 0.99887 0.998763 0.998587 0.997632 0.998553\n", + "GaussianNB roc_auc 0.752191 0.737028 0.743017 0.748565 0.743148 0.74479\n", + "GradientBoostingClassifier roc_auc 0.997837 0.99876 0.997455 0.996957 0.997898 0.997781\n", + "AdaBoostClassifier roc_auc 0.995736 0.998194 0.997287 0.997755 0.997959 0.997386\n", + "RandomForestClassifier roc_auc 0.998537 0.999064 0.998236 0.997711 0.996173 0.997944\n", + "LogisticRegression roc_auc 0.99704 0.996559 0.996226 0.995583 0.995554 0.996192\n", + "SVC roc_auc 0.969975 0.963131 0.959823 0.960533 0.958628 0.962418\n", + "KNeighborsClassifier roc_auc 0.982652 0.978629 0.97595 0.980412 0.982138 0.979957\n", + "ExtraTreesClassifier roc_auc 0.997683 0.998605 0.997287 0.995945 0.9968 0.997264\n", + "BaggingClassifier roc_auc 0.992643 0.996265 0.993824 0.992671 0.992852 0.993651\n", + "BernoulliNB roc_auc 0.964625 0.971294 0.959676 0.958243 0.9649 0.963748\n", + "DecisionTreeClassifier f1 0.962882 0.970936 0.965746 0.976898 0.970492 0.969391\n", + "MLPClassifier f1 0.967379 0.973568 0.963293 0.982301 0.969365 0.971181\n", + "GaussianNB f1 0.570521 0.560549 0.563046 0.566709 0.56391 0.564947\n", + "GradientBoostingClassifier f1 0.976948 0.980349 0.971047 0.982301 0.981257 0.97838\n", + "AdaBoostClassifier f1 0.972497 0.984615 0.976796 0.983389 0.983498 0.980159\n", + "RandomForestClassifier f1 0.976693 0.982379 0.965129 0.978818 0.972004 0.975005\n", + "LogisticRegression f1 0.963374 0.95747 0.963696 0.956522 0.960265 0.960265\n", + "SVC f1 0.877451 0.891041 0.878825 0.882641 0.866005 0.879193\n", + "KNeighborsClassifier f1 0.952273 0.937571 0.938073 0.946163 0.941176 0.943051\n", + "ExtraTreesClassifier f1 0.95838 0.969967 0.959459 0.96614 0.972004 0.96519\n", + "BaggingClassifier f1 0.971491 0.979235 0.975501 0.976744 0.984513 0.977497\n", + "BernoulliNB f1 0.702962 0.707965 0.688958 0.693484 0.702746 0.699223\n", + "DecisionTreeClassifier accuracy 0.98011 0.982873 0.983425 0.986733 0.983969 0.983422\n", + "MLPClassifier accuracy 0.962431 0.988398 0.979558 0.99005 0.983416 0.980771\n", + "GaussianNB accuracy 0.626519 0.61105 0.61326 0.622996 0.615257 0.617816\n", + "GradientBoostingClassifier accuracy 0.988398 0.990608 0.985635 0.99005 0.990603 0.989059\n", + "AdaBoostClassifier accuracy 0.986188 0.992265 0.988398 0.991708 0.991708 0.990053\n", + "RandomForestClassifier accuracy 0.986188 0.991713 0.98232 0.987839 0.987839 0.98718\n", + "LogisticRegression accuracy 0.981768 0.978453 0.981768 0.978441 0.9801 0.980106\n", + "SVC accuracy 0.944751 0.950276 0.945304 0.946932 0.940299 0.945512\n", + "KNeighborsClassifier accuracy 0.976796 0.969613 0.970166 0.974019 0.971808 0.97248\n", + "ExtraTreesClassifier accuracy 0.981215 0.98674 0.98232 0.981758 0.98618 0.983643\n", + "BaggingClassifier accuracy 0.98453 0.990608 0.98011 0.988944 0.989497 0.986738\n", + "BernoulliNB accuracy 0.795028 0.799448 0.779006 0.789386 0.796573 0.791888\n", + "DecisionTreeClassifier precision 0.958874 0.951579 0.971366 0.971678 0.977974 0.966294\n", + "MLPClassifier precision 0.995413 0.94926 0.969631 0.993258 0.990991 0.979711\n", + "GaussianNB precision 0.40233 0.392483 0.394231 0.399284 0.395083 0.396682\n", + "GradientBoostingClassifier precision 0.98234 0.980349 0.990909 0.993289 0.988889 0.987155\n", + "AdaBoostClassifier precision 0.980044 0.99115 0.988814 0.995516 0.988938 0.988893\n", + "RandomForestClassifier precision 0.997727 0.993333 0.997685 0.997727 0.997706 0.996836\n", + "LogisticRegression precision 0.979684 0.956427 0.971175 0.975 0.96882 0.970221\n", + "SVC precision 1 1 1 1 1 1\n", + "KNeighborsClassifier precision 0.992891 0.976359 0.987923 0.992788 0.995122 0.989017\n", + "ExtraTreesClassifier precision 0.995338 0.988662 0.993039 0.995349 0.997696 0.994017\n", + "BaggingClassifier precision 0.98234 0.973856 0.986456 0.98441 0.997758 0.984964\n", + "BernoulliNB precision 0.554994 0.56051 0.535024 0.548346 0.556978 0.55117\n", + "DecisionTreeClassifier recall 0.960699 0.980349 0.960699 0.971554 0.962801 0.96722\n", + "MLPClassifier recall 0.962882 0.975983 0.973799 0.934354 0.969365 0.963277\n", + "GaussianNB recall 0.980349 0.980349 0.984716 0.97593 0.984683 0.981206\n", + "GradientBoostingClassifier recall 0.971616 0.980349 0.951965 0.971554 0.973742 0.969845\n", + "AdaBoostClassifier recall 0.965066 0.978166 0.965066 0.971554 0.978118 0.971594\n", + "RandomForestClassifier recall 0.956332 0.969432 0.941048 0.960613 0.954048 0.956295\n", + "LogisticRegression recall 0.947598 0.958515 0.956332 0.938731 0.95186 0.950607\n", + "SVC recall 0.781659 0.803493 0.783843 0.789934 0.763676 0.784521\n", + "KNeighborsClassifier recall 0.914847 0.901747 0.893013 0.90372 0.892779 0.901221\n", + "ExtraTreesClassifier recall 0.932314 0.958515 0.934498 0.936543 0.947484 0.941871\n", + "BaggingClassifier recall 0.965066 0.971616 0.947598 0.960613 0.978118 0.964602\n", + "BernoulliNB recall 0.958515 0.960699 0.967249 0.943107 0.95186 0.956286\n" + ] + } + ], + "source": [ + "headers = [\"Model\", \"Metric\", \"Fold 1\", \"Fold 2\", \"Fold 3\", \"Fold 4\", \"Fold 5\", \"Mean\"]\n", + "\n", + "table_data = []\n", + "for metric, classifiers in scores_by_metric.items():\n", + " for classifier, scores in classifiers.items():\n", + " row = [classifier, metric] + list(scores) + [sum(scores) / len(scores)]\n", + " table_data.append(row)\n", + "\n", + "print(tabulate(table_data, headers=headers))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}