1654 lines (1653 with data), 69.2 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading the data and required libraries"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import re\n",
"import datetime\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import string\n",
"import nltk\n",
"from nltk import word_tokenize\n",
"from nltk.stem.porter import PorterStemmer\n",
"from nltk.corpus import stopwords"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"DIR = \"E:/Coding/Summer 2023/data/\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"notes_df = pd.read_csv(DIR + \"NOTEEVENTS.csv\", low_memory=False, memory_map=True)\n",
"admissions_df = pd.read_csv(DIR + \"ADMISSIONS.csv\", low_memory=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Exploration\n",
"## Admissions"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ROW_ID</th>\n",
" <th>SUBJECT_ID</th>\n",
" <th>HADM_ID</th>\n",
" <th>ADMITTIME</th>\n",
" <th>DISCHTIME</th>\n",
" <th>DEATHTIME</th>\n",
" <th>ADMISSION_TYPE</th>\n",
" <th>ADMISSION_LOCATION</th>\n",
" <th>DISCHARGE_LOCATION</th>\n",
" <th>INSURANCE</th>\n",
" <th>LANGUAGE</th>\n",
" <th>RELIGION</th>\n",
" <th>MARITAL_STATUS</th>\n",
" <th>ETHNICITY</th>\n",
" <th>EDREGTIME</th>\n",
" <th>EDOUTTIME</th>\n",
" <th>DIAGNOSIS</th>\n",
" <th>HOSPITAL_EXPIRE_FLAG</th>\n",
" <th>HAS_CHARTEVENTS_DATA</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>21</td>\n",
" <td>22</td>\n",
" <td>165315</td>\n",
" <td>2196-04-09 12:26:00</td>\n",
" <td>2196-04-10 15:54:00</td>\n",
" <td>NaN</td>\n",
" <td>EMERGENCY</td>\n",
" <td>EMERGENCY ROOM ADMIT</td>\n",
" <td>DISC-TRAN CANCER/CHLDRN H</td>\n",
" <td>Private</td>\n",
" <td>NaN</td>\n",
" <td>UNOBTAINABLE</td>\n",
" <td>MARRIED</td>\n",
" <td>WHITE</td>\n",
" <td>2196-04-09 10:06:00</td>\n",
" <td>2196-04-09 13:24:00</td>\n",
" <td>BENZODIAZEPINE OVERDOSE</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>22</td>\n",
" <td>23</td>\n",
" <td>152223</td>\n",
" <td>2153-09-03 07:15:00</td>\n",
" <td>2153-09-08 19:10:00</td>\n",
" <td>NaN</td>\n",
" <td>ELECTIVE</td>\n",
" <td>PHYS REFERRAL/NORMAL DELI</td>\n",
" <td>HOME HEALTH CARE</td>\n",
" <td>Medicare</td>\n",
" <td>NaN</td>\n",
" <td>CATHOLIC</td>\n",
" <td>MARRIED</td>\n",
" <td>WHITE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>23</td>\n",
" <td>23</td>\n",
" <td>124321</td>\n",
" <td>2157-10-18 19:34:00</td>\n",
" <td>2157-10-25 14:00:00</td>\n",
" <td>NaN</td>\n",
" <td>EMERGENCY</td>\n",
" <td>TRANSFER FROM HOSP/EXTRAM</td>\n",
" <td>HOME HEALTH CARE</td>\n",
" <td>Medicare</td>\n",
" <td>ENGL</td>\n",
" <td>CATHOLIC</td>\n",
" <td>MARRIED</td>\n",
" <td>WHITE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>BRAIN MASS</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>24</td>\n",
" <td>24</td>\n",
" <td>161859</td>\n",
" <td>2139-06-06 16:14:00</td>\n",
" <td>2139-06-09 12:48:00</td>\n",
" <td>NaN</td>\n",
" <td>EMERGENCY</td>\n",
" <td>TRANSFER FROM HOSP/EXTRAM</td>\n",
" <td>HOME</td>\n",
" <td>Private</td>\n",
" <td>NaN</td>\n",
" <td>PROTESTANT QUAKER</td>\n",
" <td>SINGLE</td>\n",
" <td>WHITE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>INTERIOR MYOCARDIAL INFARCTION</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>129635</td>\n",
" <td>2160-11-02 02:06:00</td>\n",
" <td>2160-11-05 14:55:00</td>\n",
" <td>NaN</td>\n",
" <td>EMERGENCY</td>\n",
" <td>EMERGENCY ROOM ADMIT</td>\n",
" <td>HOME</td>\n",
" <td>Private</td>\n",
" <td>NaN</td>\n",
" <td>UNOBTAINABLE</td>\n",
" <td>MARRIED</td>\n",
" <td>WHITE</td>\n",
" <td>2160-11-02 01:01:00</td>\n",
" <td>2160-11-02 04:27:00</td>\n",
" <td>ACUTE CORONARY SYNDROME</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ROW_ID SUBJECT_ID HADM_ID ADMITTIME DISCHTIME \\\n",
"0 21 22 165315 2196-04-09 12:26:00 2196-04-10 15:54:00 \n",
"1 22 23 152223 2153-09-03 07:15:00 2153-09-08 19:10:00 \n",
"2 23 23 124321 2157-10-18 19:34:00 2157-10-25 14:00:00 \n",
"3 24 24 161859 2139-06-06 16:14:00 2139-06-09 12:48:00 \n",
"4 25 25 129635 2160-11-02 02:06:00 2160-11-05 14:55:00 \n",
"\n",
" DEATHTIME ADMISSION_TYPE ADMISSION_LOCATION \\\n",
"0 NaN EMERGENCY EMERGENCY ROOM ADMIT \n",
"1 NaN ELECTIVE PHYS REFERRAL/NORMAL DELI \n",
"2 NaN EMERGENCY TRANSFER FROM HOSP/EXTRAM \n",
"3 NaN EMERGENCY TRANSFER FROM HOSP/EXTRAM \n",
"4 NaN EMERGENCY EMERGENCY ROOM ADMIT \n",
"\n",
" DISCHARGE_LOCATION INSURANCE LANGUAGE RELIGION \\\n",
"0 DISC-TRAN CANCER/CHLDRN H Private NaN UNOBTAINABLE \n",
"1 HOME HEALTH CARE Medicare NaN CATHOLIC \n",
"2 HOME HEALTH CARE Medicare ENGL CATHOLIC \n",
"3 HOME Private NaN PROTESTANT QUAKER \n",
"4 HOME Private NaN UNOBTAINABLE \n",
"\n",
" MARITAL_STATUS ETHNICITY EDREGTIME EDOUTTIME \\\n",
"0 MARRIED WHITE 2196-04-09 10:06:00 2196-04-09 13:24:00 \n",
"1 MARRIED WHITE NaN NaN \n",
"2 MARRIED WHITE NaN NaN \n",
"3 SINGLE WHITE NaN NaN \n",
"4 MARRIED WHITE 2160-11-02 01:01:00 2160-11-02 04:27:00 \n",
"\n",
" DIAGNOSIS HOSPITAL_EXPIRE_FLAG \\\n",
"0 BENZODIAZEPINE OVERDOSE 0 \n",
"1 CORONARY ARTERY DISEASE\\CORONARY ARTERY BYPASS... 0 \n",
"2 BRAIN MASS 0 \n",
"3 INTERIOR MYOCARDIAL INFARCTION 0 \n",
"4 ACUTE CORONARY SYNDROME 0 \n",
"\n",
" HAS_CHARTEVENTS_DATA \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"admissions_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(58976, 19)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"admissions_df.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Types of admissions"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['EMERGENCY', 'ELECTIVE', 'NEWBORN', 'URGENT'], dtype=object)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"admissions_df['ADMISSION_TYPE'].unique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Check for missing values on the admission times"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(admissions_df['ADMITTIME'].isnull())\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Conversion of times to datetime type"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"admissions_df['ADMITTIME'] = admissions_df['ADMITTIME'].astype('datetime64[ns]')\n",
"admissions_df['DISCHTIME'] = admissions_df['DISCHTIME'].astype('datetime64[ns]')\n",
"admissions_df['DEATHTIME'] = admissions_df['DEATHTIME'].astype('datetime64[ns]')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sort by subject and admission type and reset the data frame index."
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"admissions_df = admissions_df.sort_values(['SUBJECT_ID', 'ADMITTIME'])\n",
"admissions_df = admissions_df.reset_index(drop = True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"admissions_df['NEXT_ADMITTIME'] = admissions_df.groupby('SUBJECT_ID').ADMITTIME.shift(-1)\n",
"admissions_df['NEXT_ADMISSION_TYPE'] = admissions_df.groupby('SUBJECT_ID').ADMISSION_TYPE.shift(-1)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"admissions_df.loc[admissions_df['NEXT_ADMISSION_TYPE'] == 'ELECTIVE', 'NEXT_ADMITTIME'] = pd.NaT\n",
"admissions_df.loc[admissions_df['NEXT_ADMISSION_TYPE'] == 'ELECTIVE', 'NEXT_ADMISSION_TYPE'] = np.NaN"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fill NA's with the next valid value. Previously sorted."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"admissions_df[['NEXT_ADMITTIME','NEXT_ADMISSION_TYPE']] = admissions_df.groupby(['SUBJECT_ID'])[['NEXT_ADMITTIME','NEXT_ADMISSION_TYPE']].fillna(method = 'bfill')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Obtain days to readmission: from discharge to next readmission"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"admissions_df['DAYS_TO_READMISSION'] = (admissions_df['NEXT_ADMITTIME'] - admissions_df['DISCHTIME']).dt.days"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Number of readmissions"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11399"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(admissions_df['DAYS_TO_READMISSION'].notnull())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Distribution of days to readmission"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnYAAAFRCAYAAAD5FeDqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5XElEQVR4nO3de3zPdf/H8edm5znkuIlE9LWWaWNfbb9aSSxX4Wo5hRESRVQukUMiDZdjFyXkXMlpk9BVUl2RmE3I1SwppJgZ09AOZp/fH932vXxtfL+bw3c+Hvfbbbebvd/vz+vz/u5tefY5uhmGYQgAAAA3PHdXTwAAAABXB8EOAADAJAh2AAAAJkGwAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACT8HD1BG5EO3fulGEY8vT0dPVUAACAyZ07d05ubm4KCwtzOJZgVwqGYYgXdgAAgOuhJJmDYFcKhUfqQkJCXDwTAABgdnv27HF6LNfYAQAAmATBDgAAwCQIdgAAACZBsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmATBDgAAwCQIdmVYQYFr30fr6v0DAICScem7YhMTE9WjR49i+2rXrq0vvvhCv/32m8aNG6ekpCT5+fmpQ4cOGjhwoMqVK2cb+8EHH2jBggU6fvy4GjVqpFGjRik4ONjW70yNssjd3U3LP9+n45l/Xvd9V6/sp86tLNd9vwAAoPRcGuzCwsL0zTff2LXt2rVLAwcOVP/+/XXu3Dk9/fTTqlu3rpYtW6Zff/1VI0eOlLu7uwYNGiRJWr16tSZNmqRx48YpODhYc+fOVa9evfTvf/9bVapUcapGWXY8808dyTjr6mkAAIAbgEuDnZeXl6pXr277/s8//9SECRMUExOj9u3ba926dTpy5IhWrFihSpUqyWKx6MSJE5o0aZKeffZZeXl5afbs2YqNjVW7du0kSePHj1fLli21cuVK9evXT5999pnDGgAAAGZQpq6xmz17trKzszVs2DBJUnJysu6++25VqlTJNiYiIkJnzpzR3r17deLECR08eFCRkZG2fg8PD4WHhyspKcmpGgAAAGZRZoLdyZMntWjRIj377LO65ZZbJElpaWkKDAy0G1ejRg1J0tGjR5WWliZJqlmzZpExhX2OagAAAJhFmQl2S5cuVYUKFdS5c2dbW05OTpFTpd7e3pKk3NxcZWdnS1KxY3Jzc52qAQAAYBZlJth99NFHevzxx+Xj42Nr8/HxUV5ent24wjDm5+dnG1vcGF9fX6dqAAAAmEWZCHapqak6fPiw2rZta9ceGBio9PR0u7bC7wMCAmynYIsbExAQ4FQNAAAAsygTwS45OVlVq1ZVUFCQXbvValVKSorOnDlja9u2bZv8/f0VFBSkqlWrql69ekpMTLT15+fnKzk5WVar1akaAAAAZlEmgl1KSooaNmxYpL1ly5aqXr26XnzxRaWmpmrjxo2aNm2aevfubbturnfv3lq4cKFWr16t/fv3a8SIEcrJyVGHDh2crgEAAGAGLn2OXaHjx4/b7oS9kLe3t+bNm6exY8eqU6dOqlSpkrp27ar+/fvbxnTq1EmnT5/Wm2++qVOnTqlRo0ZauHChqlSp4nQNAAAAM3AzDIMXgpbQnj17JEkhISHXfF9vrdjlkjdP3FrNX893Cr3u+wUAAPZKkjvKxKlYAAAAXDmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2AAAAJkGwAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2AAAAJkGwAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2AAAAJlEmgt1HH32kRx99VCEhIXrsscf073//29b322+/qV+/fmrSpInuv/9+vfnmmzp//rzd9h988IEefvhhNW7cWF27dlVKSopdvzM1AAAAbnQuD3Zr1qzRyJEj1a1bN61fv15t2rTR4MGDtXPnTp07d05PP/20JGnZsmUaM2aMPvzwQ7399tu27VevXq1JkybphRdeUEJCgmrXrq1evXrp5MmTkuRUDQAAADPwcOXODcPQv/71L/Xo0UPdunWTJD333HNKTk7W9u3b9fvvv+vIkSNasWKFKlWqJIvFohMnTmjSpEl69tln5eXlpdmzZys2Nlbt2rWTJI0fP14tW7bUypUr1a9fP3322WcOawAAAJiBS4/YHThwQL///rvatm1r1z5//nz169dPycnJuvvuu1WpUiVbX0REhM6cOaO9e/fqxIkTOnjwoCIjI239Hh4eCg8PV1JSkiQ5rAEAAGAWLg92kvTnn3/q6aefVmRkpDp27Kgvv/xSkpSWlqbAwEC7bWrUqCFJOnr0qNLS0iRJNWvWLDKmsM9RDQAAALNwabA7c+aMJGnYsGFq06aNFixYoPvuu0/9+/fX1q1blZOTU+RUqbe3tyQpNzdX2dnZklTsmNzcXElyWAMAAMAsXHqNnaenpyTp6aefVkxMjCTprrvuUkpKihYuXCgfHx/l5eXZbVMYxvz8/OTj4yNJxY7x9fWVJIc1AAAAzMKlR+wCAgIkSRaLxa69QYMG+u233xQYGKj09HS7vsLvAwICbKdgixtTWNtRDQAAALNwabC7++675e/vr927d9u179u3T3Xq1JHValVKSortlK0kbdu2Tf7+/goKClLVqlVVr149JSYm2vrz8/OVnJwsq9UqSQ5rAAAAmIVLg52Pj4/69Omjt99+W+vWrdOvv/6qd955R1u2bFGvXr3UsmVLVa9eXS+++KJSU1O1ceNGTZs2Tb1797ZdN9e7d28tXLhQq1ev1v79+zVixAjl5OSoQ4cOkuRUDQAAADNw6TV2ktS/f3/5+vpq+vTpOnbsmOrXr6+ZM2fq3nvvlSTNmzdPY8eOVadOnVSpUiV17dpV/fv3t23fqVMnnT59Wm+++aZOnTqlRo0aaeHChapSpYqkv26UcFQDAADADNwMwzBcPYkbzZ49eyRJISEh13xfb63YpSMZZ6/5fi52azV/Pd8p9LrvFwAA2CtJ7nD5K8UAAABwdRDsAAAATIJgBwAAYBIEOwAAAJMg2AEAAJgEwQ4AAMAkCHYAAAAmQbADAAAwCYIdAACASRDsAAAATIJgBwAAYBIEOwAAAJMg2AEAAJgEwQ4AAMAkCHYAAAAmQbADAAAwCYIdAACASRDsAAAATIJgBwAAYBIEOwAAAJMg2AEAAJgEwQ4AAMAkCHYAAAAmQbADAAAwCZcHu2PHjqlhw4ZFvhISEiRJe/fuVWxsrEJDQ9WiRQstWbLEbvuCggLNmDFDUVFRCg0N1TPPPKPDhw/bjXFUAwAAwAw8XD2B1NRUeXt7a+PGjXJzc7O1V6hQQZmZmerVq5datGihsWPHateuXRo7dqz8/f3Vvn17SdKsWbO0dOlSTZw4UYGBgZo8ebL69OmjtWvXysvLy6kaAAAAZuDyYLdv3z7VrVtXNWrUKNK3ePFieXp66vXXX5eHh4fq16+vQ4cOae7cuWrfvr3y8vK0YMECDRkyRM2bN5ckTZ8+XVFRUdqwYYPatGmjFStWXLYGAACAWbj8VOyPP/6o+vXrF9uXnJysZs2aycPjf/kzIiJCBw8eVEZGhlJTU3X27FlFRkba+itWrKjg4GAlJSU5VQMAAMAsXB7s9u3bp5MnT6pbt276v//7P3Xp0kWbNm2SJKWlpSkwMNBufOGRvaNHjyotLU2SVLNmzSJjCvsc1QAAADALlwa7/Px8/fLLL/rjjz80cOBAzZ07V6Ghoerbt6+2bt2qnJwceXl52W3j7e0tScrNzVV2drYkFTsmNzdXkhzWAAAAMAuXXmPn4eGhxMRElStXTj4+PpKkRo0a6aefftL8+fPl4+OjvLw8u20Kw5ifn59tm7y8PNufC8f4+vpKksMaAAAAZuHyU7H+/v52oUyS7rzzTh07dkyBgYFKT0+36yv8PiAgwHYKtrgxAQEBkuSwBgAAgFm4NNj99NNPatKkiRITE+3a//vf/6pBgwayWq3asWOHzp8/b+vbtm2b6tWrp6pVqyooKEjly5e32z4rK0spKSmyWq2S5LAGAACAWbg02NWvX1933HGHXn/9dSUnJ+vnn3/WhAkTtGvXLj333HNq3769zpw5o5EjR2r//v1KSEjQokWL1K9fP0l/XVsXGxurKVOm6IsvvlBqaqpeeuklBQYGKjo6WpIc1gAAADALl15j5+7urtmzZ2vq1Kl68cUXlZWVpeDgYC1cuFAWi0WSNG/ePMXFxSkmJkbVq1fX0KFDFRMTY6sxaNAg5efna9SoUcrJyZHVatX8+fPl6ekpSapatarDGgAAAGbgZhiG4epJ3Gj27NkjSQoJCbnm+3prxS4dyTh7zfdzsVur+ev5TqHXfb8AAMBeSXKHy2+eAAAAwNVBsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmATBDgAAwCQIdgAAACZBsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmATBDgAAwCQIdgAAACZxTYJdWlratSgLAACAyyhVsLvrrrv0/fffF9uXnJysv/3tb1c0KQAAAJSch7MDFyxYoD///FOSZBiGVq5cqU2bNhUZt3PnTnl5eV29GQIAAMApTge73NxcvfXWW5IkNzc3rVy5ssgYd3d3VahQQc8999zVmyEAAACc4nSwe+6552yBLSgoSCtWrFDjxo2v2cQAAABQMk4HuwulpqZe7XkAAADgCpUq2EnSli1b9NVXXyk7O1sFBQV2fW5ubho/fvwVTw4AAADOK9VdsQsWLNDTTz+tlStX6ttvv1ViYmKRr9I4cOCAwsLClJCQYGvbu3evYmNjFRoaqhYtWmjJkiV22xQUFGjGjBmKiopSaGionnnmGR0+fNhujKMaAAAAZlCqI3bvv/++2rZtq7i4uKt2B+y5c+c0ZMgQ2523kpSZmalevXqpRYsWGjt2rHbt2qWxY8fK399f7du3lyTNmjVLS5cu1cSJExUYGKjJkyerT58+Wrt2rby8vJyqAQAAYAalCnYZGRnq0KHDVX2sycyZM1W+fHm7thUrVsjT01Ovv/66PDw8VL9+fR06dEhz585V+/btlZeXpwULFmjIkCFq3ry5JGn69OmKiorShg0b1KZNG4c1AAAAzKJUp2KDg4P1008/XbVJJCUlafny5Zo4caJde3Jyspo1ayYPj//lz4iICB08eFAZGRlKTU3V2bNnFRkZaeuvWLGigoODlZSU5FQNAAAAsyjVEbsRI0boxRdflJ+fn+655x75+voWGXPrrbc6VSsrK0tDhw7VqFGjVLNmTbu+tLQ0WSwWu7YaNWpIko4ePWp7ddnF29WoUcPW56hGtWrVnJonAABAWVeqYNelSxcVFBRoxIgRcnNzK3bM3r17nao1ZswYhYWFqW3btkX6cnJyipzu9fb2lvTXA5Ozs7Mlqdgxf/zxh1M1AAAAzKJUwW7cuHGXDHQl8dFHHyk5OVlr164ttt/Hx0d5eXl2bYVhzM/PTz4+PpKkvLw8258LxxQeRXRUAwAAwCxKFeyeeOKJq7Lz+Ph4nThxwnbjQ6HXXntNn3zyiQIDA5Wenm7XV/h9QECA8vPzbW116tSxG9OwYUNJclgDAADALEoV7ApvTLgcq9XqcMyUKVOUk5Nj1xYdHa1BgwapXbt2WrNmjZYtW6bz58+rXLlykqRt27apXr16qlq1qipUqKDy5csrMTHRFuyysrKUkpKi2NhY2zwuVwMAAMAsShXsunfvLjc3NxmGYWu7+NSsM9fYXeqIWdWqVRUQEKD27dtr3rx5GjlypPr06aPvv/9eixYt0tixYyX9dW1dbGyspkyZoipVqqhWrVqaPHmyAgMDFR0dLUkOawAAAJhFqYJdcW9u+PPPP5WcnKw1a9Zo5syZVzwx6a+AN2/ePMXFxSkmJkbVq1fX0KFDFRMTYxszaNAg5efna9SoUcrJyZHVatX8+fPl6enpdA0AAAAzcDMuPOx2FcyaNUu7d+/WnDlzrmbZMmXPnj2SpJCQkGu+r7dW7NKRjLPXfD8Xu7Wav57vFHrd9wsAAOyVJHeU6gHFlxMeHq7t27df7bIAAABw4KoHuy+//FL+/v5XuywAAAAcKNU1dj169CjSVlBQoLS0NP3+++965plnrnhiAAAAKJlSBbviLstzd3eXxWJRv3791L59+yueGAAAAEqmVMHuvffeu9rzAAAAwBUqVbArtGnTJm3fvl1ZWVmqUqWKmjZtqqioqKs1NwAAAJRAqYJdXl6e+vfvr2+++UblypVT5cqVlZmZqTlz5igiIkJz5syRl5fX1Z4rAAAALqNUd8XOnDlTO3bs0KRJk/T999/rm2++0e7duzVhwgTt2rVL77zzztWeJwAAABwoVbBbt26dnn/+ebVr1872/lUPDw89/vjjev7557V27dqrOkkAAAA4Vqpgd/LkSQUHBxfbFxwcrGPHjl3RpAAAAFBypQp2derU0Y4dO4rtS0pKUs2aNa9oUgAAACi5Ut088eSTT2rixIny8fHRY489pmrVqikjI0Pr1q3Tu+++q+eff/5qzxMAAAAOlCrYdenSRSkpKZoyZYqmTp1qazcMQzExMerbt+9VmyAAAACcU+rHncTFxal3797avn27/vjjD7m5ually5aqX7/+1Z4jAAAAnFCia+x+/PFHtW/fXgsXLpQk1a9fX126dFHXrl31r3/9S4MHD9aBAweuyUQBAABweU4Hu99++009evRQRkaG6tWrZ9fn6empoUOH6tSpU+ratSt3xQIAALiA08Fu7ty5uuWWW7R69Wq1bt3ars/X11c9e/bUqlWr5O3trTlz5lz1iQIAAODynA52W7duVZ8+fVSlSpVLjqlevbp69+6tLVu2XJXJAQAAwHlOB7v09HTVrVvX4TiLxaK0tLQrmRMAAABKwelgV6VKFaWnpzscl5mZqUqVKl3RpAAAAFByTgc7q9WqhIQEh+M++uijS75uDAAAANeO08Gue/fuSkxM1MSJE5Wbm1ukPy8vT5MmTdKmTZvUrVu3qzpJAAAAOOb0A4pDQkI0fPhwjR8/XmvWrFFkZKRq166t8+fP68iRI0pMTFRmZqZeeOEFRUVFXcs5AwAAoBglevNEt27dFBQUpPnz5+uLL76wHbnz9/fX/fffr969e+uee+65JhMFAADA5ZX4lWJNmzZV06ZNJUknT56Uh4eHKlasWOoJnDhxQhMnTtTmzZuVm5srq9WqYcOG2V5NtnfvXsXFxem///2vqlSpop49e6pHjx627QsKCvTWW29p5cqVOn36tKxWq0aPHq3bbrvNNsZRDQAAADMo0SvFLlalSpUrCnWSNGDAAB06dEhz587VqlWr5OPjo549eyo7O1uZmZnq1auX6tSpo/j4eA0YMEBTpkxRfHy8bftZs2Zp6dKlGjdunJYtW6aCggL16dNHeXl5kuRUDQAAADMo8RG7q+mPP/5QrVq11K9fP1ksFklS//799fe//10//fSTtm7dKk9PT73++uvy8PBQ/fr1bSGwffv2ysvL04IFCzRkyBA1b95ckjR9+nRFRUVpw4YNatOmjVasWHHZGgAAAGZxRUfsrlSlSpU0depUW6g7efKkFi1apMDAQDVo0EDJyclq1qyZPDz+lz8jIiJ08OBBZWRkKDU1VWfPnlVkZKStv2LFigoODlZSUpIkOawBAABgFi49YnehV199VStWrJCXl5feeecd+fn5KS0tzRb6CtWoUUOSdPToUdsbLmrWrFlkTGGfoxrVqlW7Jp8HAADgenPpEbsLPfXUU4qPj1ebNm00YMAA/fDDD8rJyZGXl5fdOG9vb0lSbm6usrOzJanYMYV37DqqAQAAYBZl5ohdgwYNJElxcXHavXu33n//ffn4+NhugihUGMb8/Pzk4+Mj6a+HIxf+uXCMr6+vJDmsAQAAYBYuPWJ38uRJrV+/Xvn5+bY2d3d3NWjQQOnp6QoMDCzyftrC7wMCAmynYIsbExAQIEkOawAAAJiFS4NdRkaGBg8erK1bt9razp07p5SUFNWvX19Wq1U7duzQ+fPnbf3btm1TvXr1VLVqVQUFBal8+fJKTEy09WdlZSklJUVWq1WSHNYAAAAwC5cGO4vFogceeEBvvPGGkpKStG/fPr3yyivKyspSz5491b59e505c0YjR47U/v37lZCQoEWLFqlfv36S/rq2LjY2VlOmTNEXX3yh1NRUvfTSSwoMDFR0dLQkOawBAABgFi6/xm7atGmaOnWqXnrpJZ0+fVrh4eH64IMPdOutt0qS5s2bp7i4OMXExKh69eoaOnSoYmJibNsPGjRI+fn5GjVqlHJycmS1WjV//nx5enpKkqpWreqwBgAAgBm4GYZhuHoSN5o9e/ZIkkJCQq75vt5asUtHMs5e8/1c7NZq/nq+U+h13y8AALBXktxRZh53AgAAgCtDsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmATBDgAAwCQIdgAAACZBsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmATBDgAAwCQIdgAAACZBsAMAADAJgh0AAIBJEOwAAABMgmAHAABgEgQ7AAAAkyDYAQAAmITLg92pU6c0evRoPfDAA2rSpIm6dOmi5ORkW//WrVv1xBNP6J577lHr1q21fv16u+1zc3M1duxYRUZGKiwsTP/4xz908uRJuzGOagAAAJiBy4Pd4MGDtXPnTk2bNk3x8fG666679PTTT+uXX37Rzz//rH79+ikqKkoJCQnq2LGjhg4dqq1bt9q2HzNmjL755hvNnDlTixcv1i+//KJBgwbZ+p2pAQAAYAYertz5oUOHtGXLFi1dulRNmzaVJL366qvavHmz1q5dqxMnTqhhw4Z66aWXJEn169dXSkqK5s2bp8jISB07dkwfffSRZs+erfDwcEnStGnT1Lp1a+3cuVNhYWFavHjxZWsAAACYhUuP2FWuXFlz585VSEiIrc3NzU1ubm7KyspScnJykfAVERGhHTt2yDAM7dixw9ZWqF69egoICFBSUpIkOawBAABgFi4NdhUrVtSDDz4oLy8vW9tnn32mQ4cOKSoqSmlpaQoMDLTbpkaNGsrOzlZmZqaOHTumypUry9vbu8iYtLQ0SXJYAwAAwCxcfo3dhb777jsNHz5c0dHRat68uXJycuxCnyTb93l5ecrOzi7SL0ne3t7Kzc2VJIc1AAAAzKLMBLuNGzeqd+/eCg0N1ZQpUyT9FdAuDl+F3/v6+srHx6fYcJabmytfX1+nagAAAJhFmQh277//vgYOHKiHHnpIs2fPtp1arVmzptLT0+3Gpqeny8/PTxUqVFBgYKBOnTpVJLilp6crICDAqRoAAABm4fJgt3TpUo0bN07dunXTtGnT7E6bhoeHa/v27Xbjt23bpiZNmsjd3V1NmzZVQUGB7SYKSTpw4ICOHTsmq9XqVA0AAACzcGmyOXDggMaPH69WrVqpX79+ysjI0PHjx3X8+HGdPn1a3bt31/fff68pU6bo559/1oIFC/Tpp5+qT58+kqSAgAA99thjGjVqlBITE/X9999r8ODBatasmUJDQyXJYQ0AAACzcOlz7D777DOdO3dOn3/+uT7//HO7vpiYGE2cOFGzZs3S5MmTtXjxYtWuXVuTJ0+2e3zJuHHjNH78eD3//POSpAceeECjRo2y9d95550OawAAAJiBm8HD3Epsz549kmT3/L1r5a0Vu3Qk4+w138/Fbq3mr+c7hV73/QIAAHslyR1cZAYAAGASBDsAAACTINgBAACYBMEOAADAJAh2AAAAJkGwAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2KFZ5P08VFBiunkaZmAMAADcKD1dPAGWTr5eH3N3dtPzzfTqe+adL5lC9sp86t7K4ZN8AANyICHa4rOOZf+pIxllXTwMAADiBU7EAAAAmQbADAAAwCYIdAACASRDsAAAATIJgBwAAYBIEOwAAAJMg2AEAAJgEwQ4AAMAkCHYAAAAmUaaC3Zw5c9S9e3e7tr179yo2NlahoaFq0aKFlixZYtdfUFCgGTNmKCoqSqGhoXrmmWd0+PDhEtUAAAAwgzIT7D744AO9+eabdm2ZmZnq1auX6tSpo/j4eA0YMEBTpkxRfHy8bcysWbO0dOlSjRs3TsuWLVNBQYH69OmjvLw8p2sAAACYgcvfFXvs2DG99tprSkxMVN26de36VqxYIU9PT73++uvy8PBQ/fr1dejQIc2dO1ft27dXXl6eFixYoCFDhqh58+aSpOnTpysqKkobNmxQmzZtHNYAAAAwC5cfsfvhhx/k6empjz/+WPfcc49dX3Jyspo1ayYPj//lz4iICB08eFAZGRlKTU3V2bNnFRkZaeuvWLGigoODlZSU5FQNAAAAs3D5EbsWLVqoRYsWxfalpaXJYrHYtdWoUUOSdPToUaWlpUmSatasWWRMYZ+jGtWqVbvyDwEAAFAGuPyI3eXk5OTIy8vLrs3b21uSlJubq+zsbEkqdkxubq5TNQAAAMyiTAc7Hx8f200QhQrDmJ+fn3x8fCSp2DG+vr5O1QAAADCLMh3sAgMDlZ6ebtdW+H1AQIDtFGxxYwICApyqAQAAYBZlOthZrVbt2LFD58+ft7Vt27ZN9erVU9WqVRUUFKTy5csrMTHR1p+VlaWUlBRZrVanagAAAJhFmQ527du315kzZzRy5Ejt379fCQkJWrRokfr16yfpr2vrYmNjNWXKFH3xxRdKTU3VSy+9pMDAQEVHRztVAwAAwCxcflfs5VStWlXz5s1TXFycYmJiVL16dQ0dOlQxMTG2MYMGDVJ+fr5GjRqlnJwcWa1WzZ8/X56enk7XAAAAMAM3wzAMV0/iRrNnzx5JUkhIyDXf11srdulIxtlrvp+L3dOgmjpHN3TZ/iXJUucW9Xg0WO7ubi7Zf6GCAsPlcwAA3LxKkjvK9BE73Nx8vTzk7u6m5Z/v0/HMP10yh+qV/dS5lcXxQAAAygCCHcq845l/uuyoIQAAN5IyffMEAAAAnEewAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2wGWU9/NUQYHr37pXFuYAACj7ePMEcBm81gwAcCMh2AFO4LVmAIAbAadiAQAATIJgBwAAYBIEO6CM4wYOAICzuMYOKOO4gQMA4CyCHXCD4AYOAIAjnIoFAAAwCYIdAIe4zg8AbgycigXgENf5AcCNgWAHwGlc5wcAZRunYgHcEDgdDACOccQOwA2hLJwOvr1mRT12Xz2X7PtCBQWG3N3dXD0NAGXQTRPsCgoK9NZbb2nlypU6ffq0rFarRo8erdtuu83VUwNQAq48HVz9Fl/CpQiWQFl20wS7WbNmaenSpZo4caICAwM1efJk9enTR2vXrpWXl5erpwfgBnIzh0tuYgHKtpsi2OXl5WnBggUaMmSImjdvLkmaPn26oqKitGHDBrVp08a1EwSAEnJVuCy81tHVR+zKwhyAsuimCHapqak6e/asIiMjbW0VK1ZUcHCwkpKSCHYA4CSudfyfshAuy8IcULa4GYZh+lu8NmzYoIEDB2r37t3y8fGxtb/wwgvKycnRnDlzSlTvu+++k2EY1+UU7tnsczrvgrvwPD3c5evt4bL9MwfmwBzK5hxcvf8L55CTm6/zLvonzKOcu7w9y7l0DuXc3eXjVc4l+8b1lZeXJzc3NzVp0sTh2JviiF12drYkFQli3t7e+uOPP0pcz83t+v3fkb+v53XbV1ncP3NgDsyhbM7B1fuXJB9v1/8TVhbmAPNzc3NzOnvcFH8jC4/S5eXl2R2xy83Nla+vb4nrhYWFXbW5AQAAXC03xQOKa9asKUlKT0+3a09PT1dAQIArpgQAAHDV3RTBLigoSOXLl1diYqKtLSsrSykpKbJarS6cGQAAwNVzU5yK9fLyUmxsrKZMmaIqVaqoVq1amjx5sgIDAxUdHe3q6QEAAFwVN0Wwk6RBgwYpPz9fo0aNUk5OjqxWq+bPny9PT9dfAAwAAHA13BSPOwEAALgZ3BTX2AEAANwMCHYAAAAmQbADAAAwCYIdAACASRDsAAAATIJgBwAAYBIEOwAAAJMg2JVRBQUFmjFjhqKiohQaGqpnnnlGhw8fdvW04MCcOXPUvXt3u7a9e/cqNjZWoaGhatGihZYsWWLX78xaO6qBa+PUqVMaPXq0HnjgATVp0kRdunRRcnKyrX/r1q164okndM8996h169Zav3693fa5ubkaO3asIiMjFRYWpn/84x86efKk3RhHNXDtnDhxQi+//LIiIiIUFhamvn376ueff7b187trDgcOHFBYWJgSEhJsbaZeWwNl0syZM417773X+Oqrr4y9e/cavXv3NqKjo43c3FxXTw2X8P777xtBQUFGbGysre3kyZPGvffeawwfPtzYv3+/sWrVKiMkJMRYtWqVbYyjtXamBq6NXr16GW3atDGSkpKMX375xRg7dqzRuHFj4+effzb2799vhISEGNOmTTP2799vzJs3zwgODja+/fZb2/avvPKK0bJlSyMpKcnYvXu38fjjjxvdunWz9TtTA9dO586djY4dOxq7d+829u/fbwwcONC4//77jT///JPfXZPIy8sznnjiCcNisRjx8fGGYZj/v8sEuzIoNzfXCAsLMz744ANb2x9//GE0btzYWLt2rQtnhuKkpaUZ/fr1M0JDQ43WrVvbBbvZs2cb999/v3Hu3Dlb29SpU43o6GjDMJxba0c1cG0cPHjQsFgsRnJysq2toKDAaNmypfHmm28ar776qtGhQwe7bQYPHmz07t3bMIy//l4EBQUZ//nPf2z9v/zyi2GxWIzvvvvOMAzDYQ1cO6dOnTIGDx5s/Pjjj7a2vXv3GhaLxdi9eze/uyYxdepUo0ePHnbBzuxry6nYMig1NVVnz55VZGSkra1ixYoKDg5WUlKSC2eG4vzwww/y9PTUxx9/rHvuuceuLzk5Wc2aNZOHx/9eyxwREaGDBw8qIyPDqbV2VAPXRuXKlTV37lyFhITY2tzc3OTm5qasrCwlJyfbrZv017rs2LFDhmFox44dtrZC9erVU0BAgN3aXq4Grp1KlSpp6tSpslgskqSTJ09q0aJFCgwMVIMGDfjdNYGkpCQtX75cEydOtGs3+9oS7MqgtLQ0SVLNmjXt2mvUqGHrQ9nRokULzZw5U7fddluRvrS0NAUGBtq11ahRQ5J09OhRp9baUQ1cGxUrVtSDDz4oLy8vW9tnn32mQ4cOKSoq6pLrkp2drczMTB07dkyVK1eWt7d3kTGO1rawBq6PV199VZGRkVq/fr3i4uLk5+fH7+4NLisrS0OHDtWoUaOKrJHZ15ZgVwZlZ2dLkt0/KJLk7e2t3NxcV0wJpZSTk1PsOkp/XVjvzFo7qoHr47vvvtPw4cMVHR2t5s2bF7suhd/n5eUpOzu7SL/keG0vrIHr46mnnlJ8fLzatGmjAQMG6IcffuB39wY3ZswYhYWFqW3btkX6zL62Ho6H4Hrz8fGR9Nd/2Av/LP31l8XX19dV00Ip+Pj4FPkHuvCX3s/Pz6m1dlQD197GjRs1ZMgQNWnSRFOmTJH013/EL16Xwu99fX2LXTfJfm0d1cD10aBBA0lSXFycdu/erffff5/f3RvYRx99pOTkZK1du7bYfrOvLUfsyqDCw7/p6el27enp6QoICHDFlFBKgYGBxa6jJAUEBDi11o5q4Np6//33NXDgQD300EOaPXu27f/Ka9asWey6+Pn5qUKFCgoMDNSpU6eK/Mf/wrV1VAPXzsmTJ7V+/Xrl5+fb2tzd3dWgQQOlp6fzu3sDi4+P14kTJ9S8eXOFhYUpLCxMkvTaa6+pT58+pl9bgl0ZFBQUpPLlyysxMdHWlpWVpZSUFFmtVhfODCVltVq1Y8cOnT9/3ta2bds21atXT1WrVnVqrR3VwLWzdOlSjRs3Tt26ddO0adPsTr2Eh4dr+/btduO3bdumJk2ayN3dXU2bNlVBQYHtJgrpr+dpHTt2zLa2jmrg2snIyNDgwYO1detWW9u5c+eUkpKi+vXr87t7A5syZYo++eQTffTRR7YvSRo0aJDi4uLMv7auvi0XxZs2bZrRrFkzY+PGjXbP0MnLy3P11HAZw4YNs3vcSUZGhmG1Wo1hw4YZP/30kxEfH2+EhIQYCQkJtjGO1tqZGrj6fvnlF+Puu+82BgwYYKSnp9t9ZWVlGfv27TPuvvtuY/Lkycb+/fuN+fPnF3kG3eDBg40WLVoY27Ztsz3H7sK/H87UwLXTp08fIzo62ti+fbvx448/GoMHDzasVqvx+++/87trMhc+7sTsa0uwK6Py8/ONSZMmGREREUZoaKjxzDPPGIcPH3b1tODAxcHOMAxj9+7dRqdOnYxGjRoZDz30kPHee+/Z9Tuz1o5q4Op75513DIvFUuzXsGHDDMMwjK+//tpo06aN0ahRI6N169bG+vXr7WqcPXvWGDlypBEeHm6Eh4cbgwcPNk6ePGk3xlENXDtZWVnGa6+9Ztx3331G48aNjd69exv79u2z9fO7ax4XBjvDMPfauhkGD0sCAAAwAy7iAAAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJAh2wE2ge/fuatiwoe0rKChIYWFheuKJJ7RkyRK792Ve6B//+IcaNmyoBQsW2NoMw1CPHj3UqFEj7du3r9jtli1bpoYNG+rDDz+0te3YsUPPPvus7r33XjVq1EjNmzfXiBEjdPjw4RJ9loSEBLvPcqmvC/34448aOnSoHnjgAdu+Bw8erN27d5do34VmzpxZ7D5DQ0P1yCOPaPr06XY/00uNv/Cr8AXiF1qxYoUaNmyoZ5991umfRUhIiFq0aKFXX31VaWlplxx/4MCBYmtu2rSpyM8wMTFRDRs2tHvFkiR9+eWXeuqppxQeHq6QkBC1atVKcXFxOnHiRJG6zq5/4c/qYmfPntWsWbPUrl07hYaGqlmzZnryySe1fPnyIn9/Cz/nG2+8UexnvNQ+ADPwcPUEAFwfwcHBeu211yRJ58+f1x9//KFNmzZpwoQJSk5O1ptvvmn3ftLTp09r48aNslgsWr58uXr16iU3Nze5ubkpLi5O7dq106hRo7Rs2TK77dLS0jR58mRFRUWpS5cukqStW7eqT58+tn/4K1SooF9//VULFixQhw4dtHLlStWpU8epz9G8eXMtX77c9v1//vMfvfPOO3rrrbdUvXr1IuPXrFmjkSNHKjg4WC+99JJq1aqltLQ0rVq1Sl26dNHLL7+sXr16lepneuE8JCkzM1Pr1q3T7NmzlZ+fr5dffvmy4y904XtoC8XHx8tisWjTpk06evSo7eXkF7vws2dnZ+unn37S3LlztXHjRi1fvrzIz9bd3V2ffvqpnnvuuSK1Pvnkk0vO8UKrV6/W8OHD9eSTT6pnz57y9fXV/v37NXfuXH311VeKj49XpUqVJF35+h89elS9evVSZmamunfvrqZNmyo3N1fffvut4uLitG7dOs2aNUsVKlSw2+6DDz5Q69atFR4e7tRnAkzBxW++AHAdxMbGFnnVWaHFixcbFovFWLNmjV370qVLjcaNGxtbt241LBZLkfeXLlmyxLBYLMbChQvt2vv27Ws0a9bMSEtLs7X16NHDePLJJ4vs+9ixY0bjxo2NMWPGlPKTGUZ8fLxhsViKfeXeDz/8YNx9993GiBEjjPPnzxfpf+ONN4yGDRsaW7ZsKdE+Z8yYYVgslkv2d+rUyYiIiHB6fHH2799vWCwWY9OmTUbTpk2N6dOnFxlzuc9++PBhw2q1Gj179iwyvmvXrkbbtm2LbJObm2s0bdrU+Pvf/243323bthkWi8XYtm2bre3hhx82Bg8eXKTG3r17DYvFYrz77ru2tpKs/8U/q4KCAqNDhw7Ggw8+aBw5cqRIje+++85o1KiRMWTIkCKfMzw83GjVqpWRnZ1tt01p1gO4UXAqFrjJxcbGKiAgQMuWLbNrj4+PV2RkpCIiInT77bcX6Y+NjVV4eLj+9a9/6ffff5ckrVu3Tv/5z380evRoBQQE2MZmZGTIKObthTVq1NCoUaN03333XYNPJs2ePVt+fn4aNWqU3VHFQi+//LJq1qypt99++6rut3z58nJzc7uiGoVHvCIiIvTII49o1apVlzxlXpzatWurc+fO+vbbb/Xrr7/a9T366KP68ccfi5yO3bRpk9zc3PTAAw84rH+pNQ0KCtLw4cPVqFEjh2OdWf+vv/5a33//vW2tLhYWFqannnpKH3/8cZHTusOGDdOvv/6qadOmOfw8gFkQ7ICbnLu7uyIjI/X999/bgsNPP/2kPXv26PHHH5ckPf744/riiy+UkZFh287NzU3jx49XQUGBJk6cqNOnT2vixIn629/+pscee8xuH82bN9fOnTvVvXt3rVq1yu4f4I4dO6ply5ZX/XMVFBRoy5YtioyMlK+vb7FjvLy81LJlS+3YsUOZmZkl3kd+fr7tKy8vT8eOHdO7776rLVu26O9///tlx1/4VVBQUGTcxx9/rDZt2sjT01MxMTE6fvy4vvzyyxLNrzAw7dixo0h7pUqV9Omnn9q1f/LJJ2rVqpU8PT0d1m7evLnWr1+vAQMGaN26dTp27Jitr2fPnoqIiLAbW9r137x5s9zd3fXggw9eckzh37cvvvjCrj0iIkKdO3fWe++9V+RnAJgV19gBULVq1XTu3DmdOnVK1apVU3x8vG655Ra1aNFCkhQTE6OZM2dq1apVdhfy33777XrppZc0YcIEWzAaM2ZMkfovvPCCTp8+rVWrVmn79u2SpMDAQD344IPq2bOn7rjjjqv+mU6dOqUzZ86oVq1alx13++23yzAMHT16VJUrVy7RPu6+++4ibbfeeqsGDhyovn37OjVekrp166bRo0fbvt+0aZOOHz+uJ554QpIUHh6uunXratmyZYqOjnZ6foXX3R0/ftyu3cPDQy1bttS///1v23V22dnZ+uqrr/T22287FYLGjRungoICbdiwQRs3bpQk1alTRw8//LB69epld8T2Stb/t99+0y233KLy5ctfckzh9Xm//fZbkb6hQ4dq8+bNGjFihNasWSMfHx+Hnw24kXHEDoDtNJmbm5vOnTunjz/+WC1btlROTo6ysrLk7++vpk2basWKFUWOLvXo0UNhYWFKSkpSXFycbrnlliL1vby89Prrr+vrr79WXFyc2rZtq4KCAi1fvlzt2rXThg0brtlnc3T0qVy5cpJU7KlCR1atWqVVq1bpvffe08MPP6zy5ctr1KhRGjBgQLH7LRx/8VefPn3sxsXHx6tevXqqU6eOsrKylJWVpdatWxd7WvVyLlzXi118Ovarr76Sn5+f7r33XqdqV6hQQTNmzNDGjRs1evRoPfLII8rKytLChQvVunVr7dy50zb2StbfMAx5eFz+GMTl+v39/RUXF6eDBw9q+vTpTn024EbGETsAOnbsmHx8fHTLLbfoyy+/1IkTJ2yh42KbN2+2Oy3m7u6u++67Tzt37rzs6TLpryNIHTp0UIcOHSRJ27Zt08svv6wxY8aoZcuWxV4HV1qVK1eWn59fsUdxLlR4WvBSd5xeTkhIiO3P4eHh6tmzp1544QUtWrSo2DsxLxx/KSdOnNDXX3+tc+fOyWq1Fulfvnx5kbttL6XwcSeBgYFF+iIiIlS5cmXb3bGffPKJWrdubQu6zqpdu7a6deumbt26qaCgQBs3btQrr7yicePGKSEhwW5sada/Vq1a2rJli7Kzsy95Sr1wDW+99dZi+yMjI9W5c2ctWbJEjzzySIk+H3Cj4YgdcJPLz89XYmKimjRponLlyik+Pl633XablixZYve1ePFiVahQochNFI7s3r1b//d//6ctW7YU6YuIiNDTTz+tEydOlOoat8txc3PTQw89pM2bN+vs2bPFjjl//rw2btyoJk2aqEqVKle0P3d3d02YMEEeHh565ZVXin0unTM+/vhj5efn6+233y6yBlarVQkJCcrLy3Oq1rfffis3N7diQ6aHh4eio6P16aef6syZM9q0aVORayMv5bPPPlNERESRmy/c3d0VHR2t9u3b6+eff5Z05evfokUL5efn2073FqfwWsHCSweKM3ToUAUGBmr48OHKyclx+BmBGxXBDrjJLV++XMePH1eXLl10/Phxbd68WY899pjuvfdeu6+IiAi1bt1aX3/9td2F8o7UrVtX2dnZWrJkSZHTuJJ04MABVa9e/YqDVXH69eun7OxsjR49WufPny/SP23aNB06dOiSDwAuqVq1aql///46fPiw3n333VLVSEhIUGhoqFq2bFlkDTp16qSTJ0/q888/d1gnLS1NK1euVPPmzS95NPLRRx9VamqqFi5cqGrVqiksLMypOd555506deqUFi9eXGz/wYMHZbFYJF35+t93331q2rSp/vnPfxb7MOs9e/Zo3rx5evTRR1W3bt1Lzrl8+fJ64403dPDgwcs+TxC40XEqFrhJnDlzRrt27ZL01x2jmZmZ+uabb2zXOUVHR+vdd99Vfn7+JY/cPP7441q5cqVWrFihgQMHOrXfSpUqadiwYXrttdfUtWtXderUSbfddptOnz6tzz//XKtXr9aUKVOu+PEgxWnYsKEmTpyo4cOHq0uXLuratatq166t9PR0JSQkaMuWLRoyZIjDU8gl0bNnT61atUrvvvuuYmJi7G7eKPz5F6devXo6dOiQ9u3bp1dffbXYMa1atZK/v7+WLVtmt0Z79+613bGcnZ2tH3/8UYsWLZKPj4/dTRkXa9asmapXr645c+aoZ8+eTq/BHXfcob59+2rOnDk6cuSI2rVrp8DAQJ04cUJr1qzR1q1btXDhQklXvv7u7u6aOnWq+vbtqw4dOqhHjx5q0qSJCgoK9O233+qDDz5QcHCwxo4d63De9913nzp27KiVK1c69TmBGxHBDrhJpKSkqHPnzpL+Ok3p7+8vi8WiMWPGqGPHjpL+Olp055132o62XKxp06aqXbu2Vq5cqf79+zt9PdaTTz6p22+/XUuWLNG0adN06tQp+fv7q3Hjxlq8eLHTF+yXxmOPPaaGDRtq0aJFmjFjho4fP64qVaooPDxcH374oUJDQ6/q/ry8vDRixAj169dP//znPzVjxgxbX+HPvzhvv/22Nm/erHLlyql169bFjvH19dUjjzyihIQE26lOSXr++edtf/b09FStWrXUqlUr9e3bt9i3cRRyd3fXI488ovfff9/p07CFBg8erLvuuksrV67UG2+8oTNnzqhixYoKDw/XqlWrFBQUZBt7petfs2ZNLV++XB9++KHWrVun+fPnq1y5cqpfv75eeeUVdezY0em/i6+88oq++eYbHT16tESfF7hRuBmluRUMAAAAZQ5H7ACUCYZhFHsd3MXKlSt3TU7bunr/AHA1EOwAlAnbt29Xjx49HI6bMGGC7cG9Zto/AFwNnIoFUCacOXOmyOMzilO7du0SvyHiRtg/AFwNBDsAAACT4Dl2AAAAJkGwAwAAMAmCHQAAgEkQ7AAAAEyCYAcAAGASBDsAAACTINgBAACYBMEOAADAJP4f2r4mrJ80jT8AAAAASUVORK5CYII=",
"text/plain": [
"<Figure size 700x350 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.set(rc={'figure.figsize':(7,3.5), 'axes.grid':True})\n",
"sns.set_style(\"whitegrid\", {'axes.grid' : False})\n",
"fig = sns.histplot(admissions_df['DAYS_TO_READMISSION'], kde=False, bins=15)\n",
"fig = fig.get_figure()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"120.0\n",
"507.0\n",
"23.0\n",
"408.8103342398456\n"
]
},
{
"data": {
"text/plain": [
"4107.0"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(admissions_df['DAYS_TO_READMISSION'].quantile(0.5))\n",
"print(admissions_df['DAYS_TO_READMISSION'].quantile(0.75))\n",
"print(admissions_df['DAYS_TO_READMISSION'].quantile(0.25))\n",
"print(admissions_df['DAYS_TO_READMISSION'].mean())\n",
"admissions_df['DAYS_TO_READMISSION'].max()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Notes"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ROW_ID</th>\n",
" <th>SUBJECT_ID</th>\n",
" <th>HADM_ID</th>\n",
" <th>CHARTDATE</th>\n",
" <th>CHARTTIME</th>\n",
" <th>STORETIME</th>\n",
" <th>CATEGORY</th>\n",
" <th>DESCRIPTION</th>\n",
" <th>CGID</th>\n",
" <th>ISERROR</th>\n",
" <th>TEXT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>174</td>\n",
" <td>22532</td>\n",
" <td>167853.0</td>\n",
" <td>2151-08-04</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Discharge summary</td>\n",
" <td>Report</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Admission Date: [**2151-7-16**] Dischar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>175</td>\n",
" <td>13702</td>\n",
" <td>107527.0</td>\n",
" <td>2118-06-14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Discharge summary</td>\n",
" <td>Report</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Admission Date: [**2118-6-2**] Discharg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>176</td>\n",
" <td>13702</td>\n",
" <td>167118.0</td>\n",
" <td>2119-05-25</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Discharge summary</td>\n",
" <td>Report</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Admission Date: [**2119-5-4**] D...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>177</td>\n",
" <td>13702</td>\n",
" <td>196489.0</td>\n",
" <td>2124-08-18</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Discharge summary</td>\n",
" <td>Report</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Admission Date: [**2124-7-21**] ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>178</td>\n",
" <td>26880</td>\n",
" <td>135453.0</td>\n",
" <td>2162-03-25</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Discharge summary</td>\n",
" <td>Report</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Admission Date: [**2162-3-3**] D...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ROW_ID SUBJECT_ID HADM_ID CHARTDATE CHARTTIME STORETIME \\\n",
"0 174 22532 167853.0 2151-08-04 NaN NaN \n",
"1 175 13702 107527.0 2118-06-14 NaN NaN \n",
"2 176 13702 167118.0 2119-05-25 NaN NaN \n",
"3 177 13702 196489.0 2124-08-18 NaN NaN \n",
"4 178 26880 135453.0 2162-03-25 NaN NaN \n",
"\n",
" CATEGORY DESCRIPTION CGID ISERROR \\\n",
"0 Discharge summary Report NaN NaN \n",
"1 Discharge summary Report NaN NaN \n",
"2 Discharge summary Report NaN NaN \n",
"3 Discharge summary Report NaN NaN \n",
"4 Discharge summary Report NaN NaN \n",
"\n",
" TEXT \n",
"0 Admission Date: [**2151-7-16**] Dischar... \n",
"1 Admission Date: [**2118-6-2**] Discharg... \n",
"2 Admission Date: [**2119-5-4**] D... \n",
"3 Admission Date: [**2124-7-21**] ... \n",
"4 Admission Date: [**2162-3-3**] D... "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notes_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(2083180, 11)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notes_df.shape"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Discharge summary', 'Echo', 'ECG', 'Nursing', 'Physician ',\n",
" 'Rehab Services', 'Case Management ', 'Respiratory ', 'Nutrition',\n",
" 'General', 'Social Work', 'Pharmacy', 'Consult', 'Radiology',\n",
" 'Nursing/other'], dtype=object)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"notes_df['CATEGORY'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(59652, 11)"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discharge_notes = notes_df[notes_df['CATEGORY'] == \"Discharge summary\"]\n",
"discharge_notes.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There are 6926 admissions with more than one discharge note (HADM_ID - ID of Admissions)."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6926"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discharge_notes.duplicated(['HADM_ID']).sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Take the last row per admission"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"discharge_notes_ordered = discharge_notes.groupby(['SUBJECT_ID', 'HADM_ID']).nth(-1).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discharge_notes_ordered.duplicated(['HADM_ID']).sum()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(52726, 11)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"discharge_notes_ordered.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Merge Notes and Admissions"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"admissions_notes = pd.merge(\n",
" admissions_df[['SUBJECT_ID','HADM_ID','ADMITTIME','DISCHTIME','DAYS_TO_READMISSION','NEXT_ADMITTIME','ADMISSION_TYPE','DEATHTIME']],\n",
" discharge_notes_ordered[['SUBJECT_ID', 'HADM_ID', 'TEXT']], \n",
" on = ['SUBJECT_ID', 'HADM_ID'], how='left'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SUBJECT_ID</th>\n",
" <th>HADM_ID</th>\n",
" <th>ADMITTIME</th>\n",
" <th>DISCHTIME</th>\n",
" <th>DAYS_TO_READMISSION</th>\n",
" <th>NEXT_ADMITTIME</th>\n",
" <th>ADMISSION_TYPE</th>\n",
" <th>DEATHTIME</th>\n",
" <th>TEXT</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>163353</td>\n",
" <td>2138-07-17 19:04:00</td>\n",
" <td>2138-07-21 15:48:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NEWBORN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3</td>\n",
" <td>145834</td>\n",
" <td>2101-10-20 19:08:00</td>\n",
" <td>2101-10-31 13:58:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2101-10-20**] Discharg...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>185777</td>\n",
" <td>2191-03-16 00:28:00</td>\n",
" <td>2191-03-23 18:41:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2191-3-16**] Discharge...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>178980</td>\n",
" <td>2103-02-02 04:31:00</td>\n",
" <td>2103-02-04 12:15:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>NEWBORN</td>\n",
" <td>NaT</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>107064</td>\n",
" <td>2175-05-30 07:15:00</td>\n",
" <td>2175-06-15 16:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2175-5-30**] Dischar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58971</th>\n",
" <td>99985</td>\n",
" <td>176670</td>\n",
" <td>2181-01-27 02:47:00</td>\n",
" <td>2181-02-12 17:05:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2181-1-27**] ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58972</th>\n",
" <td>99991</td>\n",
" <td>151118</td>\n",
" <td>2184-12-24 08:30:00</td>\n",
" <td>2185-01-05 12:15:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2184-12-24**] ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58973</th>\n",
" <td>99992</td>\n",
" <td>197084</td>\n",
" <td>2144-07-25 18:03:00</td>\n",
" <td>2144-07-28 17:56:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2144-7-25**] ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58974</th>\n",
" <td>99995</td>\n",
" <td>137810</td>\n",
" <td>2147-02-08 08:00:00</td>\n",
" <td>2147-02-11 13:15:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2147-2-8**] D...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>58975</th>\n",
" <td>99999</td>\n",
" <td>113369</td>\n",
" <td>2117-12-30 07:15:00</td>\n",
" <td>2118-01-04 16:30:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2117-12-30**] ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>58976 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" SUBJECT_ID HADM_ID ADMITTIME DISCHTIME \\\n",
"0 2 163353 2138-07-17 19:04:00 2138-07-21 15:48:00 \n",
"1 3 145834 2101-10-20 19:08:00 2101-10-31 13:58:00 \n",
"2 4 185777 2191-03-16 00:28:00 2191-03-23 18:41:00 \n",
"3 5 178980 2103-02-02 04:31:00 2103-02-04 12:15:00 \n",
"4 6 107064 2175-05-30 07:15:00 2175-06-15 16:00:00 \n",
"... ... ... ... ... \n",
"58971 99985 176670 2181-01-27 02:47:00 2181-02-12 17:05:00 \n",
"58972 99991 151118 2184-12-24 08:30:00 2185-01-05 12:15:00 \n",
"58973 99992 197084 2144-07-25 18:03:00 2144-07-28 17:56:00 \n",
"58974 99995 137810 2147-02-08 08:00:00 2147-02-11 13:15:00 \n",
"58975 99999 113369 2117-12-30 07:15:00 2118-01-04 16:30:00 \n",
"\n",
" DAYS_TO_READMISSION NEXT_ADMITTIME ADMISSION_TYPE DEATHTIME \\\n",
"0 NaN NaT NEWBORN NaT \n",
"1 NaN NaT EMERGENCY NaT \n",
"2 NaN NaT EMERGENCY NaT \n",
"3 NaN NaT NEWBORN NaT \n",
"4 NaN NaT ELECTIVE NaT \n",
"... ... ... ... ... \n",
"58971 NaN NaT EMERGENCY NaT \n",
"58972 NaN NaT ELECTIVE NaT \n",
"58973 NaN NaT EMERGENCY NaT \n",
"58974 NaN NaT ELECTIVE NaT \n",
"58975 NaN NaT ELECTIVE NaT \n",
"\n",
" TEXT \n",
"0 NaN \n",
"1 Admission Date: [**2101-10-20**] Discharg... \n",
"2 Admission Date: [**2191-3-16**] Discharge... \n",
"3 NaN \n",
"4 Admission Date: [**2175-5-30**] Dischar... \n",
"... ... \n",
"58971 Admission Date: [**2181-1-27**] ... \n",
"58972 Admission Date: [**2184-12-24**] ... \n",
"58973 Admission Date: [**2144-7-25**] ... \n",
"58974 Admission Date: [**2147-2-8**] D... \n",
"58975 Admission Date: [**2117-12-30**] ... \n",
"\n",
"[58976 rows x 9 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"admissions_notes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"10% of admissions without discharge notes."
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.1059753119913185"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(admissions_notes['TEXT'].isnull()) / len(admissions_notes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"53% of NEWBORN are missing"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ADMISSION_TYPE\n",
"ELECTIVE 0.048663\n",
"EMERGENCY 0.037983\n",
"NEWBORN 0.536691\n",
"URGENT 0.042665\n",
"dtype: float64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"admissions_notes.groupby('ADMISSION_TYPE').apply(lambda g: g.TEXT.isnull().sum())/admissions_notes.groupby('ADMISSION_TYPE').size()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Remove NEWBORN admissions and create the target variable"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"adm_notes = admissions_notes[admissions_notes['ADMISSION_TYPE'] != 'NEWBORN'].copy()\n",
"adm_notes['READM_WITHIN_30'] = (adm_notes['DAYS_TO_READMISSION'] < 30).astype('int')"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3004"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(adm_notes['READM_WITHIN_30'])"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(51113, 10)"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adm_notes.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exlude patients that died during the admission"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"rows_not_death = adm_notes['DEATHTIME'].isnull()\n",
"df_adm_notes_not_death = adm_notes.loc[rows_not_death].copy()\n",
"df_adm_notes_not_death = df_adm_notes_not_death.sample(n = len(df_adm_notes_not_death))\n",
"df_adm_notes_not_death = df_adm_notes_not_death.reset_index(drop = True)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2963"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(df_adm_notes_not_death['READM_WITHIN_30'])"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"45321"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_adm_notes_not_death['READM_WITHIN_30'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Creation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Exlude patients that died during the admission"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"adm_notes = adm_notes.loc[adm_notes['DEATHTIME'].isnull()]\n",
"adm_notes = adm_notes.sample(n = len(adm_notes))\n",
"adm_notes = adm_notes.reset_index(drop = True)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SUBJECT_ID</th>\n",
" <th>HADM_ID</th>\n",
" <th>ADMITTIME</th>\n",
" <th>DISCHTIME</th>\n",
" <th>DAYS_TO_READMISSION</th>\n",
" <th>NEXT_ADMITTIME</th>\n",
" <th>ADMISSION_TYPE</th>\n",
" <th>DEATHTIME</th>\n",
" <th>TEXT</th>\n",
" <th>READM_WITHIN_30</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6090</td>\n",
" <td>175043</td>\n",
" <td>2170-05-03 07:15:00</td>\n",
" <td>2170-05-06 13:40:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2170-5-3**] D...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>27901</td>\n",
" <td>189210</td>\n",
" <td>2101-06-21 07:15:00</td>\n",
" <td>2101-07-13 15:00:00</td>\n",
" <td>50.0</td>\n",
" <td>2101-09-01 20:44:00</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2101-6-21**] ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>69531</td>\n",
" <td>102759</td>\n",
" <td>2156-08-18 23:41:00</td>\n",
" <td>2156-08-26 16:45:00</td>\n",
" <td>8.0</td>\n",
" <td>2156-09-03 21:11:00</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2156-8-18**] ...</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>8620</td>\n",
" <td>148993</td>\n",
" <td>2190-02-05 17:13:00</td>\n",
" <td>2190-02-09 17:53:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2190-2-5**] Discharge ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>24226</td>\n",
" <td>118785</td>\n",
" <td>2113-04-04 07:30:00</td>\n",
" <td>2113-04-14 11:20:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Name: [**Known lastname 10030**],[**Known fir...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45316</th>\n",
" <td>17882</td>\n",
" <td>157780</td>\n",
" <td>2146-05-01 16:33:00</td>\n",
" <td>2146-05-09 16:20:00</td>\n",
" <td>412.0</td>\n",
" <td>2147-06-25 17:42:00</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2146-5-1**] D...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45317</th>\n",
" <td>2184</td>\n",
" <td>171742</td>\n",
" <td>2154-04-21 19:25:00</td>\n",
" <td>2154-04-25 11:49:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2154-4-21**] Discharge...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45318</th>\n",
" <td>28240</td>\n",
" <td>151747</td>\n",
" <td>2195-06-21 07:27:00</td>\n",
" <td>2195-06-26 14:33:00</td>\n",
" <td>154.0</td>\n",
" <td>2195-11-28 00:15:00</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2195-6-21**] ...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45319</th>\n",
" <td>25201</td>\n",
" <td>124241</td>\n",
" <td>2149-06-02 08:00:00</td>\n",
" <td>2149-06-11 13:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>ELECTIVE</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2149-6-2**] D...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45320</th>\n",
" <td>20855</td>\n",
" <td>108604</td>\n",
" <td>2161-07-31 17:00:00</td>\n",
" <td>2161-08-20 16:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaT</td>\n",
" <td>EMERGENCY</td>\n",
" <td>NaT</td>\n",
" <td>Admission Date: [**2161-7-31**] Dischar...</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>45321 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" SUBJECT_ID HADM_ID ADMITTIME DISCHTIME \\\n",
"0 6090 175043 2170-05-03 07:15:00 2170-05-06 13:40:00 \n",
"1 27901 189210 2101-06-21 07:15:00 2101-07-13 15:00:00 \n",
"2 69531 102759 2156-08-18 23:41:00 2156-08-26 16:45:00 \n",
"3 8620 148993 2190-02-05 17:13:00 2190-02-09 17:53:00 \n",
"4 24226 118785 2113-04-04 07:30:00 2113-04-14 11:20:00 \n",
"... ... ... ... ... \n",
"45316 17882 157780 2146-05-01 16:33:00 2146-05-09 16:20:00 \n",
"45317 2184 171742 2154-04-21 19:25:00 2154-04-25 11:49:00 \n",
"45318 28240 151747 2195-06-21 07:27:00 2195-06-26 14:33:00 \n",
"45319 25201 124241 2149-06-02 08:00:00 2149-06-11 13:00:00 \n",
"45320 20855 108604 2161-07-31 17:00:00 2161-08-20 16:00:00 \n",
"\n",
" DAYS_TO_READMISSION NEXT_ADMITTIME ADMISSION_TYPE DEATHTIME \\\n",
"0 NaN NaT ELECTIVE NaT \n",
"1 50.0 2101-09-01 20:44:00 ELECTIVE NaT \n",
"2 8.0 2156-09-03 21:11:00 EMERGENCY NaT \n",
"3 NaN NaT ELECTIVE NaT \n",
"4 NaN NaT ELECTIVE NaT \n",
"... ... ... ... ... \n",
"45316 412.0 2147-06-25 17:42:00 EMERGENCY NaT \n",
"45317 NaN NaT EMERGENCY NaT \n",
"45318 154.0 2195-11-28 00:15:00 EMERGENCY NaT \n",
"45319 NaN NaT ELECTIVE NaT \n",
"45320 NaN NaT EMERGENCY NaT \n",
"\n",
" TEXT READM_WITHIN_30 \n",
"0 Admission Date: [**2170-5-3**] D... 0 \n",
"1 Admission Date: [**2101-6-21**] ... 0 \n",
"2 Admission Date: [**2156-8-18**] ... 1 \n",
"3 Admission Date: [**2190-2-5**] Discharge ... 0 \n",
"4 Name: [**Known lastname 10030**],[**Known fir... 0 \n",
"... ... ... \n",
"45316 Admission Date: [**2146-5-1**] D... 0 \n",
"45317 Admission Date: [**2154-4-21**] Discharge... 0 \n",
"45318 Admission Date: [**2195-6-21**] ... 0 \n",
"45319 Admission Date: [**2149-6-2**] D... 0 \n",
"45320 Admission Date: [**2161-7-31**] Dischar... 0 \n",
"\n",
"[45321 rows x 10 columns]"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adm_notes"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"adm_notes.to_csv(DIR + 'readmission.csv', index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data Cleaning"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Natural Language"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'!\"#$%&\\'()*+,-./:;<=>?@[\\\\]^_`{|}~'"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"string.punctuation"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"def clean_text(texts):\n",
" texts = texts.fillna(' ')\n",
" texts = texts.str.replace('\\n',' ')\n",
" texts = texts.str.replace('\\r',' ')\n",
"\n",
" table = str.maketrans('', '', string.punctuation + '0123456789')\n",
" texts = [text.lower().translate(table) for text in texts]\n",
"\n",
" return texts"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"adm_notes['TEXT'] = clean_text(adm_notes['TEXT'])"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"stop_words = stopwords.words('english')\n",
"stop_words = stop_words + ['patient', 'date', 'admission', 'discharge', 'lastname', 'firstname', 'sex']"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"porter = PorterStemmer()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"def tokenize_stem(text):\n",
" words = word_tokenize(text)\n",
" words = [word for word in words if word not in stop_words]\n",
" words = [porter.stem(word) for word in words]\n",
" return words"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2963\n",
"45321\n"
]
}
],
"source": [
"print(sum(adm_notes['READM_WITHIN_30'] == 1) )\n",
"print(len(adm_notes['READM_WITHIN_30']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}