Download this file

2351 lines (2350 with data), 105.5 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise L3 - 1: Diagnose Dataset Level and Select Last Encounter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instructions\n",
    "- Given the dataset, convert the dataset to a longitudinal level but select only the last encounter for each patient.\n",
    "- Assume that that the order of encounter IDs is indicative of the time for encounter. In other words a lower number encounter will come before a higher numbered encounter."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ehr_level_dataset_path = \"./data/ehr_level_exercise_dataset.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Level of Dataset\n",
    "What level is the dataset at? Is at the line or encounter level? "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "      <th>PRINCIPAL_DIAGNOSIS_CODE</th>\n",
       "      <th>PROCEDURE_CODE</th>\n",
       "      <th>MEDICATION_CODE</th>\n",
       "      <th>LAB_CODE</th>\n",
       "      <th>LABEL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>udacity_health_encounter_id_1</td>\n",
       "      <td>udacity_health_patient_id_186</td>\n",
       "      <td>dx_code_15406</td>\n",
       "      <td>['procedure_code_58552', 'procedure_code_39776...</td>\n",
       "      <td>['medication_code_2350', 'medication_code_8630...</td>\n",
       "      <td>['lab_code_8835', 'lab_code_9859', 'lab_code_9...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>udacity_health_encounter_id_10</td>\n",
       "      <td>udacity_health_patient_id_188</td>\n",
       "      <td>dx_code_74047</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_7789', 'medication_code_3560...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[1, 1, 1, 1]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>udacity_health_encounter_id_100</td>\n",
       "      <td>udacity_health_patient_id_585</td>\n",
       "      <td>dx_code_71465</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_7982', 'medication_code_2452...</td>\n",
       "      <td>['lab_code_4198', 'lab_code_6603', 'lab_code_3...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>udacity_health_encounter_id_1000</td>\n",
       "      <td>udacity_health_patient_id_525</td>\n",
       "      <td>dx_code_61569</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_4036']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>udacity_health_encounter_id_1001</td>\n",
       "      <td>udacity_health_patient_id_950</td>\n",
       "      <td>dx_code_90172</td>\n",
       "      <td>['procedure_code_30555']</td>\n",
       "      <td>['medication_code_6755', 'medication_code_5045']</td>\n",
       "      <td>['lab_code_9112']</td>\n",
       "      <td>[0, 0, 0, 0]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       ENCOUNTER_ID                     PATIENT_ID  \\\n",
       "0     udacity_health_encounter_id_1  udacity_health_patient_id_186   \n",
       "1    udacity_health_encounter_id_10  udacity_health_patient_id_188   \n",
       "2   udacity_health_encounter_id_100  udacity_health_patient_id_585   \n",
       "3  udacity_health_encounter_id_1000  udacity_health_patient_id_525   \n",
       "4  udacity_health_encounter_id_1001  udacity_health_patient_id_950   \n",
       "\n",
       "  PRINCIPAL_DIAGNOSIS_CODE                                     PROCEDURE_CODE  \\\n",
       "0            dx_code_15406  ['procedure_code_58552', 'procedure_code_39776...   \n",
       "1            dx_code_74047                                                 []   \n",
       "2            dx_code_71465                                                 []   \n",
       "3            dx_code_61569                                                 []   \n",
       "4            dx_code_90172                           ['procedure_code_30555']   \n",
       "\n",
       "                                     MEDICATION_CODE  \\\n",
       "0  ['medication_code_2350', 'medication_code_8630...   \n",
       "1  ['medication_code_7789', 'medication_code_3560...   \n",
       "2  ['medication_code_7982', 'medication_code_2452...   \n",
       "3                           ['medication_code_4036']   \n",
       "4   ['medication_code_6755', 'medication_code_5045']   \n",
       "\n",
       "                                            LAB_CODE  \\\n",
       "0  ['lab_code_8835', 'lab_code_9859', 'lab_code_9...   \n",
       "1                                                 []   \n",
       "2  ['lab_code_4198', 'lab_code_6603', 'lab_code_3...   \n",
       "3                                                 []   \n",
       "4                                  ['lab_code_9112']   \n",
       "\n",
       "                                               LABEL  \n",
       "0  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  \n",
       "1                                       [1, 1, 1, 1]  \n",
       "2                              [0, 0, 0, 0, 0, 0, 0]  \n",
       "3                                                [0]  \n",
       "4                                       [0, 0, 0, 0]  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ehr_level_df = pd.read_csv(ehr_level_dataset_path)\n",
    "ehr_level_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Tests**\n",
    "- Line: Total number of rows > Number of Unique Encounters\n",
    "- Encounter level: Total Number of Rows = Number of Unique Encounters\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset is not at the line level\n"
     ]
    }
   ],
   "source": [
    "# Line Test\n",
    "try:\n",
    "    assert len(ehr_level_df) > ehr_level_df['ENCOUNTER_ID'].nunique() \n",
    "    print(\"Dataset could be at the line level\")\n",
    "except:\n",
    "    print(\"Dataset is not at the line level\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset could be at the encounter level\n"
     ]
    }
   ],
   "source": [
    "# Encounter Test\n",
    "try:\n",
    "    assert len(ehr_level_df) == ehr_level_df['ENCOUNTER_ID'].nunique()\n",
    "    print(\"Dataset could be at the encounter level\")\n",
    "except:\n",
    "    print(\"Dataset is not at the encounter level\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Answer:** Dataset is at the encounter level and you can probably guess by seeing the arrays for the code sets but we did a few simple tests to confirm."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Select Last Encounter for each Patient"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "So in many cases you may only want a snapshot of a patient's history for your modeling objective. In some cases it might be important to see the changes over time but in other cases you only want the most recent case or depending on the model the first case could also be used. Really important to know how the context for how the model will be deployed in production and the time state you will be getting data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select last encounter for each patient\n",
    "#convert encounter id column to a numerical value\n",
    "def convert_encounter_id_to_number(df, encounter_id):\n",
    "    df[\"ENCOUNTER_ID_NUMBER\"] = df[encounter_id].str.replace('udacity_health_encounter_id_', '').astype(int)\n",
    "    return df\n",
    "\n",
    "def select_last_encounter(df, patient_id, encounter_id):\n",
    "    df = df.sort_values(encounter_id)\n",
    "    last_encounter_values = df.groupby(patient_id)[encounter_id].tail(1).values\n",
    "    return df[df[encounter_id].isin(last_encounter_values)] "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "ehr_encounter_number_df = convert_encounter_id_to_number(ehr_level_df, \"ENCOUNTER_ID\")\n",
    "last_encounter_df = select_last_encounter(ehr_encounter_number_df, \"PATIENT_ID\", \"ENCOUNTER_ID_NUMBER\" )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#take subset of output\n",
    "test_last_encounter_df = last_encounter_df[['ENCOUNTER_ID', 'ENCOUNTER_ID_NUMBER', 'PATIENT_ID']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Test cases \n",
    "- PATIENT_IDS - udacity_health_patient_id_309, udacity_health_patient_id_418, udacity_health_patient_id_908"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "      <th>PRINCIPAL_DIAGNOSIS_CODE</th>\n",
       "      <th>PROCEDURE_CODE</th>\n",
       "      <th>MEDICATION_CODE</th>\n",
       "      <th>LAB_CODE</th>\n",
       "      <th>LABEL</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>94</th>\n",
       "      <td>udacity_health_encounter_id_1091</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_62502</td>\n",
       "      <td>['procedure_code_69597']</td>\n",
       "      <td>['medication_code_9253', 'medication_code_7444...</td>\n",
       "      <td>['lab_code_9415', 'lab_code_8179']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>1091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>450</th>\n",
       "      <td>udacity_health_encounter_id_1436</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_37825</td>\n",
       "      <td>['procedure_code_14044', 'procedure_code_47911...</td>\n",
       "      <td>['medication_code_4081', 'medication_code_6050...</td>\n",
       "      <td>['lab_code_6246', 'lab_code_7197']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>1436</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>605</th>\n",
       "      <td>udacity_health_encounter_id_1582</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_79663</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_4450', 'medication_code_4900...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0]</td>\n",
       "      <td>1582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>665</th>\n",
       "      <td>udacity_health_encounter_id_1643</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_60773</td>\n",
       "      <td>['procedure_code_39999', 'procedure_code_31842...</td>\n",
       "      <td>['medication_code_413', 'medication_code_5966'...</td>\n",
       "      <td>['lab_code_4295', 'lab_code_2383']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>1643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>684</th>\n",
       "      <td>udacity_health_encounter_id_1662</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_29272</td>\n",
       "      <td>['procedure_code_12589', 'procedure_code_3005'...</td>\n",
       "      <td>['medication_code_9845', 'medication_code_6602...</td>\n",
       "      <td>['lab_code_4330', 'lab_code_3499', 'lab_code_1...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>1662</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>867</th>\n",
       "      <td>udacity_health_encounter_id_1837</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_56368</td>\n",
       "      <td>['procedure_code_14069', 'procedure_code_15230...</td>\n",
       "      <td>['medication_code_5820', 'medication_code_8028...</td>\n",
       "      <td>['lab_code_8964']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>1837</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1024</th>\n",
       "      <td>udacity_health_encounter_id_1985</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_38581</td>\n",
       "      <td>['procedure_code_1431', 'procedure_code_71061']</td>\n",
       "      <td>['medication_code_3134', 'medication_code_4868...</td>\n",
       "      <td>['lab_code_7702']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>1985</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1042</th>\n",
       "      <td>udacity_health_encounter_id_2000</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_91230</td>\n",
       "      <td>[]</td>\n",
       "      <td>[]</td>\n",
       "      <td>['lab_code_8062']</td>\n",
       "      <td>[0]</td>\n",
       "      <td>2000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1196</th>\n",
       "      <td>udacity_health_encounter_id_2142</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_58527</td>\n",
       "      <td>['procedure_code_56979', 'procedure_code_24797...</td>\n",
       "      <td>['medication_code_7631', 'medication_code_7508...</td>\n",
       "      <td>['lab_code_3908', 'lab_code_1500']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>2142</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1289</th>\n",
       "      <td>udacity_health_encounter_id_2229</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_46826</td>\n",
       "      <td>['procedure_code_50916', 'procedure_code_20032...</td>\n",
       "      <td>['medication_code_9204', 'medication_code_1282...</td>\n",
       "      <td>['lab_code_5607', 'lab_code_5139']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>2229</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1340</th>\n",
       "      <td>udacity_health_encounter_id_2281</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_39484</td>\n",
       "      <td>['procedure_code_33400', 'procedure_code_56', ...</td>\n",
       "      <td>['medication_code_1938', 'medication_code_4076...</td>\n",
       "      <td>['lab_code_9178', 'lab_code_3557', 'lab_code_2...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>2281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1596</th>\n",
       "      <td>udacity_health_encounter_id_2531</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_75423</td>\n",
       "      <td>['procedure_code_15600', 'procedure_code_19008...</td>\n",
       "      <td>['medication_code_7265', 'medication_code_2247...</td>\n",
       "      <td>['lab_code_8336', 'lab_code_7292', 'lab_code_8...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>2531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1702</th>\n",
       "      <td>udacity_health_encounter_id_2631</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_38210</td>\n",
       "      <td>['procedure_code_121']</td>\n",
       "      <td>['medication_code_7480', 'medication_code_7917...</td>\n",
       "      <td>['lab_code_4777']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>2631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2069</th>\n",
       "      <td>udacity_health_encounter_id_2989</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_80145</td>\n",
       "      <td>['procedure_code_28893', 'procedure_code_9124'...</td>\n",
       "      <td>['medication_code_43', 'medication_code_5602',...</td>\n",
       "      <td>['lab_code_3392', 'lab_code_9665', 'lab_code_4...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>2989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2291</th>\n",
       "      <td>udacity_health_encounter_id_3209</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_74710</td>\n",
       "      <td>['procedure_code_41970', 'procedure_code_64032...</td>\n",
       "      <td>['medication_code_7596', 'medication_code_7481...</td>\n",
       "      <td>['lab_code_9105']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>3209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2507</th>\n",
       "      <td>udacity_health_encounter_id_3416</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_43343</td>\n",
       "      <td>['procedure_code_11834', 'procedure_code_12304...</td>\n",
       "      <td>['medication_code_3767', 'medication_code_7630...</td>\n",
       "      <td>['lab_code_774', 'lab_code_9591', 'lab_code_31...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>3416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2696</th>\n",
       "      <td>udacity_health_encounter_id_360</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_94868</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_193', 'medication_code_3556'...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0]</td>\n",
       "      <td>360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2890</th>\n",
       "      <td>udacity_health_encounter_id_3792</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_37738</td>\n",
       "      <td>['procedure_code_11852', 'procedure_code_45278...</td>\n",
       "      <td>['medication_code_7882', 'medication_code_6562...</td>\n",
       "      <td>['lab_code_3787', 'lab_code_6452', 'lab_code_6...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>3792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2960</th>\n",
       "      <td>udacity_health_encounter_id_3859</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_17144</td>\n",
       "      <td>['procedure_code_69917']</td>\n",
       "      <td>['medication_code_2152', 'medication_code_7078...</td>\n",
       "      <td>['lab_code_5682']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>3859</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3064</th>\n",
       "      <td>udacity_health_encounter_id_3961</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_61465</td>\n",
       "      <td>['procedure_code_15248', 'procedure_code_65724...</td>\n",
       "      <td>['medication_code_701', 'medication_code_8324'...</td>\n",
       "      <td>['lab_code_7691', 'lab_code_4598', 'lab_code_1...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>3961</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3167</th>\n",
       "      <td>udacity_health_encounter_id_4058</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_73636</td>\n",
       "      <td>['procedure_code_5646', 'procedure_code_1381',...</td>\n",
       "      <td>['medication_code_4201', 'medication_code_5328...</td>\n",
       "      <td>['lab_code_9083', 'lab_code_1392', 'lab_code_1...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>4058</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3280</th>\n",
       "      <td>udacity_health_encounter_id_4168</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_9795</td>\n",
       "      <td>['procedure_code_44804', 'procedure_code_10917...</td>\n",
       "      <td>['medication_code_2738', 'medication_code_9919...</td>\n",
       "      <td>['lab_code_4690', 'lab_code_5570', 'lab_code_2...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>4168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3353</th>\n",
       "      <td>udacity_health_encounter_id_4239</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_28764</td>\n",
       "      <td>['procedure_code_31873', 'procedure_code_29930...</td>\n",
       "      <td>['medication_code_7607', 'medication_code_6643...</td>\n",
       "      <td>['lab_code_8492', 'lab_code_3309', 'lab_code_6...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>4239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3561</th>\n",
       "      <td>udacity_health_encounter_id_4439</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_31393</td>\n",
       "      <td>['procedure_code_69203', 'procedure_code_53820...</td>\n",
       "      <td>['medication_code_9780', 'medication_code_7008...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>4439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3585</th>\n",
       "      <td>udacity_health_encounter_id_4462</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_53624</td>\n",
       "      <td>['procedure_code_64278', 'procedure_code_19230']</td>\n",
       "      <td>['medication_code_8759', 'medication_code_59',...</td>\n",
       "      <td>['lab_code_3790', 'lab_code_591', 'lab_code_89...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>4462</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3718</th>\n",
       "      <td>udacity_health_encounter_id_4589</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_67438</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_8103']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0]</td>\n",
       "      <td>4589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3752</th>\n",
       "      <td>udacity_health_encounter_id_4619</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_79460</td>\n",
       "      <td>['procedure_code_8648', 'procedure_code_22388'...</td>\n",
       "      <td>['medication_code_4773', 'medication_code_5839...</td>\n",
       "      <td>['lab_code_5055']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>4619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3916</th>\n",
       "      <td>udacity_health_encounter_id_478</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_43778</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_1646', 'medication_code_4346']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0]</td>\n",
       "      <td>478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4552</th>\n",
       "      <td>udacity_health_encounter_id_5398</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_58513</td>\n",
       "      <td>['procedure_code_3629', 'procedure_code_56509'...</td>\n",
       "      <td>['medication_code_5697', 'medication_code_9845...</td>\n",
       "      <td>['lab_code_4909']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>5398</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4687</th>\n",
       "      <td>udacity_health_encounter_id_553</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_88408</td>\n",
       "      <td>['procedure_code_19471', 'procedure_code_17060...</td>\n",
       "      <td>['medication_code_1541', 'medication_code_5218...</td>\n",
       "      <td>['lab_code_4271', 'lab_code_7946', 'lab_code_9...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>553</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4771</th>\n",
       "      <td>udacity_health_encounter_id_5611</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_46410</td>\n",
       "      <td>['procedure_code_71656', 'procedure_code_29912...</td>\n",
       "      <td>['medication_code_3365', 'medication_code_862'...</td>\n",
       "      <td>['lab_code_8280', 'lab_code_9803', 'lab_code_1...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>5611</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4785</th>\n",
       "      <td>udacity_health_encounter_id_5624</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_96267</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_1343']</td>\n",
       "      <td>['lab_code_9589']</td>\n",
       "      <td>[0, 0]</td>\n",
       "      <td>5624</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4796</th>\n",
       "      <td>udacity_health_encounter_id_5635</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_67422</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_2864']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0]</td>\n",
       "      <td>5635</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4862</th>\n",
       "      <td>udacity_health_encounter_id_5701</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_4007</td>\n",
       "      <td>['procedure_code_8263', 'procedure_code_7509']</td>\n",
       "      <td>['medication_code_3925', 'medication_code_3163']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0, 0]</td>\n",
       "      <td>5701</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4881</th>\n",
       "      <td>udacity_health_encounter_id_5721</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_11147</td>\n",
       "      <td>['procedure_code_68127', 'procedure_code_3768'...</td>\n",
       "      <td>['medication_code_4722', 'medication_code_2681...</td>\n",
       "      <td>['lab_code_8853', 'lab_code_3111', 'lab_code_2...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>5721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5132</th>\n",
       "      <td>udacity_health_encounter_id_597</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_25907</td>\n",
       "      <td>['procedure_code_41138', 'procedure_code_28596']</td>\n",
       "      <td>['medication_code_4777', 'medication_code_4318...</td>\n",
       "      <td>['lab_code_1043']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>597</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5306</th>\n",
       "      <td>udacity_health_encounter_id_6139</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_40784</td>\n",
       "      <td>['procedure_code_48973', 'procedure_code_7953'...</td>\n",
       "      <td>['medication_code_3191', 'medication_code_105'...</td>\n",
       "      <td>['lab_code_5078', 'lab_code_2361', 'lab_code_9...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>6139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5385</th>\n",
       "      <td>udacity_health_encounter_id_622</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_25556</td>\n",
       "      <td>['procedure_code_10462']</td>\n",
       "      <td>['medication_code_3626', 'medication_code_311']</td>\n",
       "      <td>['lab_code_4960']</td>\n",
       "      <td>[0, 0, 0, 0]</td>\n",
       "      <td>622</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5472</th>\n",
       "      <td>udacity_health_encounter_id_6305</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_93595</td>\n",
       "      <td>['procedure_code_6005', 'procedure_code_13203'...</td>\n",
       "      <td>['medication_code_4513']</td>\n",
       "      <td>['lab_code_7349', 'lab_code_1043', 'lab_code_9...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>6305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5564</th>\n",
       "      <td>udacity_health_encounter_id_6394</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_17875</td>\n",
       "      <td>['procedure_code_42424']</td>\n",
       "      <td>[]</td>\n",
       "      <td>['lab_code_5222']</td>\n",
       "      <td>[0, 0]</td>\n",
       "      <td>6394</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5669</th>\n",
       "      <td>udacity_health_encounter_id_6497</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_36095</td>\n",
       "      <td>['procedure_code_33881', 'procedure_code_6396'...</td>\n",
       "      <td>['medication_code_6849', 'medication_code_2364...</td>\n",
       "      <td>['lab_code_2570', 'lab_code_2383', 'lab_code_8...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>6497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5684</th>\n",
       "      <td>udacity_health_encounter_id_6512</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_92784</td>\n",
       "      <td>['procedure_code_31090', 'procedure_code_28839...</td>\n",
       "      <td>['medication_code_5274', 'medication_code_6976...</td>\n",
       "      <td>['lab_code_4639']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>6512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5829</th>\n",
       "      <td>udacity_health_encounter_id_6662</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_26279</td>\n",
       "      <td>['procedure_code_47253', 'procedure_code_52416...</td>\n",
       "      <td>['medication_code_2602', 'medication_code_3320...</td>\n",
       "      <td>['lab_code_5441']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>6662</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5846</th>\n",
       "      <td>udacity_health_encounter_id_668</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_22666</td>\n",
       "      <td>['procedure_code_10507', 'procedure_code_39636...</td>\n",
       "      <td>['medication_code_4676', 'medication_code_4760...</td>\n",
       "      <td>['lab_code_7518', 'lab_code_1774', 'lab_code_5...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5946</th>\n",
       "      <td>udacity_health_encounter_id_6777</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_78269</td>\n",
       "      <td>['procedure_code_58888', 'procedure_code_72745']</td>\n",
       "      <td>['medication_code_227', 'medication_code_7799']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0, 0]</td>\n",
       "      <td>6777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6508</th>\n",
       "      <td>udacity_health_encounter_id_7327</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_18813</td>\n",
       "      <td>['procedure_code_9329', 'procedure_code_28542'...</td>\n",
       "      <td>['medication_code_4006', 'medication_code_485'...</td>\n",
       "      <td>['lab_code_71', 'lab_code_7769', 'lab_code_659...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>7327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6892</th>\n",
       "      <td>udacity_health_encounter_id_7696</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_24785</td>\n",
       "      <td>['procedure_code_31137', 'procedure_code_23811...</td>\n",
       "      <td>['medication_code_2626', 'medication_code_6310...</td>\n",
       "      <td>['lab_code_8317', 'lab_code_2116', 'lab_code_2...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>7696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6969</th>\n",
       "      <td>udacity_health_encounter_id_7772</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_949</td>\n",
       "      <td>['procedure_code_45976', 'procedure_code_26419...</td>\n",
       "      <td>['medication_code_9684', 'medication_code_8486...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>7772</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7217</th>\n",
       "      <td>udacity_health_encounter_id_995</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "      <td>dx_code_4976</td>\n",
       "      <td>['procedure_code_8156', 'procedure_code_23220']</td>\n",
       "      <td>['medication_code_5388', 'medication_code_5533...</td>\n",
       "      <td>['lab_code_2049']</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>995</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID                     PATIENT_ID  \\\n",
       "94    udacity_health_encounter_id_1091  udacity_health_patient_id_309   \n",
       "450   udacity_health_encounter_id_1436  udacity_health_patient_id_309   \n",
       "605   udacity_health_encounter_id_1582  udacity_health_patient_id_309   \n",
       "665   udacity_health_encounter_id_1643  udacity_health_patient_id_309   \n",
       "684   udacity_health_encounter_id_1662  udacity_health_patient_id_309   \n",
       "867   udacity_health_encounter_id_1837  udacity_health_patient_id_309   \n",
       "1024  udacity_health_encounter_id_1985  udacity_health_patient_id_309   \n",
       "1042  udacity_health_encounter_id_2000  udacity_health_patient_id_309   \n",
       "1196  udacity_health_encounter_id_2142  udacity_health_patient_id_309   \n",
       "1289  udacity_health_encounter_id_2229  udacity_health_patient_id_309   \n",
       "1340  udacity_health_encounter_id_2281  udacity_health_patient_id_309   \n",
       "1596  udacity_health_encounter_id_2531  udacity_health_patient_id_309   \n",
       "1702  udacity_health_encounter_id_2631  udacity_health_patient_id_309   \n",
       "2069  udacity_health_encounter_id_2989  udacity_health_patient_id_309   \n",
       "2291  udacity_health_encounter_id_3209  udacity_health_patient_id_309   \n",
       "2507  udacity_health_encounter_id_3416  udacity_health_patient_id_309   \n",
       "2696   udacity_health_encounter_id_360  udacity_health_patient_id_309   \n",
       "2890  udacity_health_encounter_id_3792  udacity_health_patient_id_309   \n",
       "2960  udacity_health_encounter_id_3859  udacity_health_patient_id_309   \n",
       "3064  udacity_health_encounter_id_3961  udacity_health_patient_id_309   \n",
       "3167  udacity_health_encounter_id_4058  udacity_health_patient_id_309   \n",
       "3280  udacity_health_encounter_id_4168  udacity_health_patient_id_309   \n",
       "3353  udacity_health_encounter_id_4239  udacity_health_patient_id_309   \n",
       "3561  udacity_health_encounter_id_4439  udacity_health_patient_id_309   \n",
       "3585  udacity_health_encounter_id_4462  udacity_health_patient_id_309   \n",
       "3718  udacity_health_encounter_id_4589  udacity_health_patient_id_309   \n",
       "3752  udacity_health_encounter_id_4619  udacity_health_patient_id_309   \n",
       "3916   udacity_health_encounter_id_478  udacity_health_patient_id_309   \n",
       "4552  udacity_health_encounter_id_5398  udacity_health_patient_id_309   \n",
       "4687   udacity_health_encounter_id_553  udacity_health_patient_id_309   \n",
       "4771  udacity_health_encounter_id_5611  udacity_health_patient_id_309   \n",
       "4785  udacity_health_encounter_id_5624  udacity_health_patient_id_309   \n",
       "4796  udacity_health_encounter_id_5635  udacity_health_patient_id_309   \n",
       "4862  udacity_health_encounter_id_5701  udacity_health_patient_id_309   \n",
       "4881  udacity_health_encounter_id_5721  udacity_health_patient_id_309   \n",
       "5132   udacity_health_encounter_id_597  udacity_health_patient_id_309   \n",
       "5306  udacity_health_encounter_id_6139  udacity_health_patient_id_309   \n",
       "5385   udacity_health_encounter_id_622  udacity_health_patient_id_309   \n",
       "5472  udacity_health_encounter_id_6305  udacity_health_patient_id_309   \n",
       "5564  udacity_health_encounter_id_6394  udacity_health_patient_id_309   \n",
       "5669  udacity_health_encounter_id_6497  udacity_health_patient_id_309   \n",
       "5684  udacity_health_encounter_id_6512  udacity_health_patient_id_309   \n",
       "5829  udacity_health_encounter_id_6662  udacity_health_patient_id_309   \n",
       "5846   udacity_health_encounter_id_668  udacity_health_patient_id_309   \n",
       "5946  udacity_health_encounter_id_6777  udacity_health_patient_id_309   \n",
       "6508  udacity_health_encounter_id_7327  udacity_health_patient_id_309   \n",
       "6892  udacity_health_encounter_id_7696  udacity_health_patient_id_309   \n",
       "6969  udacity_health_encounter_id_7772  udacity_health_patient_id_309   \n",
       "7217   udacity_health_encounter_id_995  udacity_health_patient_id_309   \n",
       "\n",
       "     PRINCIPAL_DIAGNOSIS_CODE  \\\n",
       "94              dx_code_62502   \n",
       "450             dx_code_37825   \n",
       "605             dx_code_79663   \n",
       "665             dx_code_60773   \n",
       "684             dx_code_29272   \n",
       "867             dx_code_56368   \n",
       "1024            dx_code_38581   \n",
       "1042            dx_code_91230   \n",
       "1196            dx_code_58527   \n",
       "1289            dx_code_46826   \n",
       "1340            dx_code_39484   \n",
       "1596            dx_code_75423   \n",
       "1702            dx_code_38210   \n",
       "2069            dx_code_80145   \n",
       "2291            dx_code_74710   \n",
       "2507            dx_code_43343   \n",
       "2696            dx_code_94868   \n",
       "2890            dx_code_37738   \n",
       "2960            dx_code_17144   \n",
       "3064            dx_code_61465   \n",
       "3167            dx_code_73636   \n",
       "3280             dx_code_9795   \n",
       "3353            dx_code_28764   \n",
       "3561            dx_code_31393   \n",
       "3585            dx_code_53624   \n",
       "3718            dx_code_67438   \n",
       "3752            dx_code_79460   \n",
       "3916            dx_code_43778   \n",
       "4552            dx_code_58513   \n",
       "4687            dx_code_88408   \n",
       "4771            dx_code_46410   \n",
       "4785            dx_code_96267   \n",
       "4796            dx_code_67422   \n",
       "4862             dx_code_4007   \n",
       "4881            dx_code_11147   \n",
       "5132            dx_code_25907   \n",
       "5306            dx_code_40784   \n",
       "5385            dx_code_25556   \n",
       "5472            dx_code_93595   \n",
       "5564            dx_code_17875   \n",
       "5669            dx_code_36095   \n",
       "5684            dx_code_92784   \n",
       "5829            dx_code_26279   \n",
       "5846            dx_code_22666   \n",
       "5946            dx_code_78269   \n",
       "6508            dx_code_18813   \n",
       "6892            dx_code_24785   \n",
       "6969              dx_code_949   \n",
       "7217             dx_code_4976   \n",
       "\n",
       "                                         PROCEDURE_CODE  \\\n",
       "94                             ['procedure_code_69597']   \n",
       "450   ['procedure_code_14044', 'procedure_code_47911...   \n",
       "605                                                  []   \n",
       "665   ['procedure_code_39999', 'procedure_code_31842...   \n",
       "684   ['procedure_code_12589', 'procedure_code_3005'...   \n",
       "867   ['procedure_code_14069', 'procedure_code_15230...   \n",
       "1024    ['procedure_code_1431', 'procedure_code_71061']   \n",
       "1042                                                 []   \n",
       "1196  ['procedure_code_56979', 'procedure_code_24797...   \n",
       "1289  ['procedure_code_50916', 'procedure_code_20032...   \n",
       "1340  ['procedure_code_33400', 'procedure_code_56', ...   \n",
       "1596  ['procedure_code_15600', 'procedure_code_19008...   \n",
       "1702                             ['procedure_code_121']   \n",
       "2069  ['procedure_code_28893', 'procedure_code_9124'...   \n",
       "2291  ['procedure_code_41970', 'procedure_code_64032...   \n",
       "2507  ['procedure_code_11834', 'procedure_code_12304...   \n",
       "2696                                                 []   \n",
       "2890  ['procedure_code_11852', 'procedure_code_45278...   \n",
       "2960                           ['procedure_code_69917']   \n",
       "3064  ['procedure_code_15248', 'procedure_code_65724...   \n",
       "3167  ['procedure_code_5646', 'procedure_code_1381',...   \n",
       "3280  ['procedure_code_44804', 'procedure_code_10917...   \n",
       "3353  ['procedure_code_31873', 'procedure_code_29930...   \n",
       "3561  ['procedure_code_69203', 'procedure_code_53820...   \n",
       "3585   ['procedure_code_64278', 'procedure_code_19230']   \n",
       "3718                                                 []   \n",
       "3752  ['procedure_code_8648', 'procedure_code_22388'...   \n",
       "3916                                                 []   \n",
       "4552  ['procedure_code_3629', 'procedure_code_56509'...   \n",
       "4687  ['procedure_code_19471', 'procedure_code_17060...   \n",
       "4771  ['procedure_code_71656', 'procedure_code_29912...   \n",
       "4785                                                 []   \n",
       "4796                                                 []   \n",
       "4862     ['procedure_code_8263', 'procedure_code_7509']   \n",
       "4881  ['procedure_code_68127', 'procedure_code_3768'...   \n",
       "5132   ['procedure_code_41138', 'procedure_code_28596']   \n",
       "5306  ['procedure_code_48973', 'procedure_code_7953'...   \n",
       "5385                           ['procedure_code_10462']   \n",
       "5472  ['procedure_code_6005', 'procedure_code_13203'...   \n",
       "5564                           ['procedure_code_42424']   \n",
       "5669  ['procedure_code_33881', 'procedure_code_6396'...   \n",
       "5684  ['procedure_code_31090', 'procedure_code_28839...   \n",
       "5829  ['procedure_code_47253', 'procedure_code_52416...   \n",
       "5846  ['procedure_code_10507', 'procedure_code_39636...   \n",
       "5946   ['procedure_code_58888', 'procedure_code_72745']   \n",
       "6508  ['procedure_code_9329', 'procedure_code_28542'...   \n",
       "6892  ['procedure_code_31137', 'procedure_code_23811...   \n",
       "6969  ['procedure_code_45976', 'procedure_code_26419...   \n",
       "7217    ['procedure_code_8156', 'procedure_code_23220']   \n",
       "\n",
       "                                        MEDICATION_CODE  \\\n",
       "94    ['medication_code_9253', 'medication_code_7444...   \n",
       "450   ['medication_code_4081', 'medication_code_6050...   \n",
       "605   ['medication_code_4450', 'medication_code_4900...   \n",
       "665   ['medication_code_413', 'medication_code_5966'...   \n",
       "684   ['medication_code_9845', 'medication_code_6602...   \n",
       "867   ['medication_code_5820', 'medication_code_8028...   \n",
       "1024  ['medication_code_3134', 'medication_code_4868...   \n",
       "1042                                                 []   \n",
       "1196  ['medication_code_7631', 'medication_code_7508...   \n",
       "1289  ['medication_code_9204', 'medication_code_1282...   \n",
       "1340  ['medication_code_1938', 'medication_code_4076...   \n",
       "1596  ['medication_code_7265', 'medication_code_2247...   \n",
       "1702  ['medication_code_7480', 'medication_code_7917...   \n",
       "2069  ['medication_code_43', 'medication_code_5602',...   \n",
       "2291  ['medication_code_7596', 'medication_code_7481...   \n",
       "2507  ['medication_code_3767', 'medication_code_7630...   \n",
       "2696  ['medication_code_193', 'medication_code_3556'...   \n",
       "2890  ['medication_code_7882', 'medication_code_6562...   \n",
       "2960  ['medication_code_2152', 'medication_code_7078...   \n",
       "3064  ['medication_code_701', 'medication_code_8324'...   \n",
       "3167  ['medication_code_4201', 'medication_code_5328...   \n",
       "3280  ['medication_code_2738', 'medication_code_9919...   \n",
       "3353  ['medication_code_7607', 'medication_code_6643...   \n",
       "3561  ['medication_code_9780', 'medication_code_7008...   \n",
       "3585  ['medication_code_8759', 'medication_code_59',...   \n",
       "3718                           ['medication_code_8103']   \n",
       "3752  ['medication_code_4773', 'medication_code_5839...   \n",
       "3916   ['medication_code_1646', 'medication_code_4346']   \n",
       "4552  ['medication_code_5697', 'medication_code_9845...   \n",
       "4687  ['medication_code_1541', 'medication_code_5218...   \n",
       "4771  ['medication_code_3365', 'medication_code_862'...   \n",
       "4785                           ['medication_code_1343']   \n",
       "4796                           ['medication_code_2864']   \n",
       "4862   ['medication_code_3925', 'medication_code_3163']   \n",
       "4881  ['medication_code_4722', 'medication_code_2681...   \n",
       "5132  ['medication_code_4777', 'medication_code_4318...   \n",
       "5306  ['medication_code_3191', 'medication_code_105'...   \n",
       "5385    ['medication_code_3626', 'medication_code_311']   \n",
       "5472                           ['medication_code_4513']   \n",
       "5564                                                 []   \n",
       "5669  ['medication_code_6849', 'medication_code_2364...   \n",
       "5684  ['medication_code_5274', 'medication_code_6976...   \n",
       "5829  ['medication_code_2602', 'medication_code_3320...   \n",
       "5846  ['medication_code_4676', 'medication_code_4760...   \n",
       "5946    ['medication_code_227', 'medication_code_7799']   \n",
       "6508  ['medication_code_4006', 'medication_code_485'...   \n",
       "6892  ['medication_code_2626', 'medication_code_6310...   \n",
       "6969  ['medication_code_9684', 'medication_code_8486...   \n",
       "7217  ['medication_code_5388', 'medication_code_5533...   \n",
       "\n",
       "                                               LAB_CODE  \\\n",
       "94                   ['lab_code_9415', 'lab_code_8179']   \n",
       "450                  ['lab_code_6246', 'lab_code_7197']   \n",
       "605                                                  []   \n",
       "665                  ['lab_code_4295', 'lab_code_2383']   \n",
       "684   ['lab_code_4330', 'lab_code_3499', 'lab_code_1...   \n",
       "867                                   ['lab_code_8964']   \n",
       "1024                                  ['lab_code_7702']   \n",
       "1042                                  ['lab_code_8062']   \n",
       "1196                 ['lab_code_3908', 'lab_code_1500']   \n",
       "1289                 ['lab_code_5607', 'lab_code_5139']   \n",
       "1340  ['lab_code_9178', 'lab_code_3557', 'lab_code_2...   \n",
       "1596  ['lab_code_8336', 'lab_code_7292', 'lab_code_8...   \n",
       "1702                                  ['lab_code_4777']   \n",
       "2069  ['lab_code_3392', 'lab_code_9665', 'lab_code_4...   \n",
       "2291                                  ['lab_code_9105']   \n",
       "2507  ['lab_code_774', 'lab_code_9591', 'lab_code_31...   \n",
       "2696                                                 []   \n",
       "2890  ['lab_code_3787', 'lab_code_6452', 'lab_code_6...   \n",
       "2960                                  ['lab_code_5682']   \n",
       "3064  ['lab_code_7691', 'lab_code_4598', 'lab_code_1...   \n",
       "3167  ['lab_code_9083', 'lab_code_1392', 'lab_code_1...   \n",
       "3280  ['lab_code_4690', 'lab_code_5570', 'lab_code_2...   \n",
       "3353  ['lab_code_8492', 'lab_code_3309', 'lab_code_6...   \n",
       "3561                                                 []   \n",
       "3585  ['lab_code_3790', 'lab_code_591', 'lab_code_89...   \n",
       "3718                                                 []   \n",
       "3752                                  ['lab_code_5055']   \n",
       "3916                                                 []   \n",
       "4552                                  ['lab_code_4909']   \n",
       "4687  ['lab_code_4271', 'lab_code_7946', 'lab_code_9...   \n",
       "4771  ['lab_code_8280', 'lab_code_9803', 'lab_code_1...   \n",
       "4785                                  ['lab_code_9589']   \n",
       "4796                                                 []   \n",
       "4862                                                 []   \n",
       "4881  ['lab_code_8853', 'lab_code_3111', 'lab_code_2...   \n",
       "5132                                  ['lab_code_1043']   \n",
       "5306  ['lab_code_5078', 'lab_code_2361', 'lab_code_9...   \n",
       "5385                                  ['lab_code_4960']   \n",
       "5472  ['lab_code_7349', 'lab_code_1043', 'lab_code_9...   \n",
       "5564                                  ['lab_code_5222']   \n",
       "5669  ['lab_code_2570', 'lab_code_2383', 'lab_code_8...   \n",
       "5684                                  ['lab_code_4639']   \n",
       "5829                                  ['lab_code_5441']   \n",
       "5846  ['lab_code_7518', 'lab_code_1774', 'lab_code_5...   \n",
       "5946                                                 []   \n",
       "6508  ['lab_code_71', 'lab_code_7769', 'lab_code_659...   \n",
       "6892  ['lab_code_8317', 'lab_code_2116', 'lab_code_2...   \n",
       "6969                                                 []   \n",
       "7217                                  ['lab_code_2049']   \n",
       "\n",
       "                                                  LABEL  ENCOUNTER_ID_NUMBER  \n",
       "94                                [0, 0, 0, 0, 0, 0, 0]                 1091  \n",
       "450   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 1436  \n",
       "605                                           [0, 0, 0]                 1582  \n",
       "665   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 1643  \n",
       "684   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 1662  \n",
       "867             [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 1837  \n",
       "1024                        [0, 0, 0, 0, 0, 0, 0, 0, 0]                 1985  \n",
       "1042                                                [0]                 2000  \n",
       "1196  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 2142  \n",
       "1289      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 2229  \n",
       "1340   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 2281  \n",
       "1596  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 2531  \n",
       "1702                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 2631  \n",
       "2069  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 2989  \n",
       "2291  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 3209  \n",
       "2507  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 3416  \n",
       "2696                                          [0, 0, 0]                  360  \n",
       "2890  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 3792  \n",
       "2960                              [0, 0, 0, 0, 0, 0, 0]                 3859  \n",
       "3064  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 3961  \n",
       "3167  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 4058  \n",
       "3280  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 4168  \n",
       "3353  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 4239  \n",
       "3561                        [0, 0, 0, 0, 0, 0, 0, 0, 0]                 4439  \n",
       "3585  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 4462  \n",
       "3718                                                [0]                 4589  \n",
       "3752      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 4619  \n",
       "3916                                             [0, 0]                  478  \n",
       "4552                     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 5398  \n",
       "4687            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                  553  \n",
       "4771   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 5611  \n",
       "4785                                             [0, 0]                 5624  \n",
       "4796                                                [0]                 5635  \n",
       "4862                                       [0, 0, 0, 0]                 5701  \n",
       "4881  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 5721  \n",
       "5132                                 [0, 0, 0, 0, 0, 0]                  597  \n",
       "5306  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 6139  \n",
       "5385                                       [0, 0, 0, 0]                  622  \n",
       "5472                              [0, 0, 0, 0, 0, 0, 0]                 6305  \n",
       "5564                                             [0, 0]                 6394  \n",
       "5669         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 6497  \n",
       "5684                        [0, 0, 0, 0, 0, 0, 0, 0, 0]                 6512  \n",
       "5829            [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 6662  \n",
       "5846      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                  668  \n",
       "5946                                       [0, 0, 0, 0]                 6777  \n",
       "6508  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 7327  \n",
       "6892  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 7696  \n",
       "6969                              [0, 0, 0, 0, 0, 0, 0]                 7772  \n",
       "7217                              [0, 0, 0, 0, 0, 0, 0]                  995  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ehr_level_df[ehr_level_df['PATIENT_ID']=='udacity_health_patient_id_309']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For patient id 309, the selected encounter should be 7772."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6969</th>\n",
       "      <td>udacity_health_encounter_id_7772</td>\n",
       "      <td>7772</td>\n",
       "      <td>udacity_health_patient_id_309</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID  ENCOUNTER_ID_NUMBER  \\\n",
       "6969  udacity_health_encounter_id_7772                 7772   \n",
       "\n",
       "                         PATIENT_ID  \n",
       "6969  udacity_health_patient_id_309  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_last_encounter_df[test_last_encounter_df['PATIENT_ID']=='udacity_health_patient_id_309']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "      <th>PRINCIPAL_DIAGNOSIS_CODE</th>\n",
       "      <th>PROCEDURE_CODE</th>\n",
       "      <th>MEDICATION_CODE</th>\n",
       "      <th>LAB_CODE</th>\n",
       "      <th>LABEL</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>263</th>\n",
       "      <td>udacity_health_encounter_id_1258</td>\n",
       "      <td>udacity_health_patient_id_418</td>\n",
       "      <td>dx_code_87320</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_7475']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0]</td>\n",
       "      <td>1258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1230</th>\n",
       "      <td>udacity_health_encounter_id_2173</td>\n",
       "      <td>udacity_health_patient_id_418</td>\n",
       "      <td>dx_code_72104</td>\n",
       "      <td>['procedure_code_37393', 'procedure_code_48757...</td>\n",
       "      <td>['medication_code_3809', 'medication_code_9141...</td>\n",
       "      <td>['lab_code_2839', 'lab_code_2665', 'lab_code_3...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>2173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2448</th>\n",
       "      <td>udacity_health_encounter_id_3362</td>\n",
       "      <td>udacity_health_patient_id_418</td>\n",
       "      <td>dx_code_77979</td>\n",
       "      <td>['procedure_code_64397']</td>\n",
       "      <td>['medication_code_2807', 'medication_code_6979']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0]</td>\n",
       "      <td>3362</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID                     PATIENT_ID  \\\n",
       "263   udacity_health_encounter_id_1258  udacity_health_patient_id_418   \n",
       "1230  udacity_health_encounter_id_2173  udacity_health_patient_id_418   \n",
       "2448  udacity_health_encounter_id_3362  udacity_health_patient_id_418   \n",
       "\n",
       "     PRINCIPAL_DIAGNOSIS_CODE  \\\n",
       "263             dx_code_87320   \n",
       "1230            dx_code_72104   \n",
       "2448            dx_code_77979   \n",
       "\n",
       "                                         PROCEDURE_CODE  \\\n",
       "263                                                  []   \n",
       "1230  ['procedure_code_37393', 'procedure_code_48757...   \n",
       "2448                           ['procedure_code_64397']   \n",
       "\n",
       "                                        MEDICATION_CODE  \\\n",
       "263                            ['medication_code_7475']   \n",
       "1230  ['medication_code_3809', 'medication_code_9141...   \n",
       "2448   ['medication_code_2807', 'medication_code_6979']   \n",
       "\n",
       "                                               LAB_CODE  \\\n",
       "263                                                  []   \n",
       "1230  ['lab_code_2839', 'lab_code_2665', 'lab_code_3...   \n",
       "2448                                                 []   \n",
       "\n",
       "                                                  LABEL  ENCOUNTER_ID_NUMBER  \n",
       "263                                                 [0]                 1258  \n",
       "1230  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...                 2173  \n",
       "2448                                          [0, 0, 0]                 3362  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ehr_level_df[ehr_level_df['PATIENT_ID']=='udacity_health_patient_id_418']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For patient id 418, the selected encounter should be 3362."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2448</th>\n",
       "      <td>udacity_health_encounter_id_3362</td>\n",
       "      <td>3362</td>\n",
       "      <td>udacity_health_patient_id_418</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID  ENCOUNTER_ID_NUMBER  \\\n",
       "2448  udacity_health_encounter_id_3362                 3362   \n",
       "\n",
       "                         PATIENT_ID  \n",
       "2448  udacity_health_patient_id_418  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_last_encounter_df[test_last_encounter_df['PATIENT_ID']=='udacity_health_patient_id_418']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "      <th>PRINCIPAL_DIAGNOSIS_CODE</th>\n",
       "      <th>PROCEDURE_CODE</th>\n",
       "      <th>MEDICATION_CODE</th>\n",
       "      <th>LAB_CODE</th>\n",
       "      <th>LABEL</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1678</th>\n",
       "      <td>udacity_health_encounter_id_2608</td>\n",
       "      <td>udacity_health_patient_id_908</td>\n",
       "      <td>dx_code_52877</td>\n",
       "      <td>[]</td>\n",
       "      <td>['medication_code_6189']</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0]</td>\n",
       "      <td>2608</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2796</th>\n",
       "      <td>udacity_health_encounter_id_3698</td>\n",
       "      <td>udacity_health_patient_id_908</td>\n",
       "      <td>dx_code_52688</td>\n",
       "      <td>['procedure_code_60912']</td>\n",
       "      <td>[]</td>\n",
       "      <td>['lab_code_4691', 'lab_code_9380']</td>\n",
       "      <td>[0, 0, 0]</td>\n",
       "      <td>3698</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5301</th>\n",
       "      <td>udacity_health_encounter_id_6132</td>\n",
       "      <td>udacity_health_patient_id_908</td>\n",
       "      <td>dx_code_36066</td>\n",
       "      <td>['procedure_code_5724', 'procedure_code_55191'...</td>\n",
       "      <td>['medication_code_9532', 'medication_code_3398...</td>\n",
       "      <td>['lab_code_9732', 'lab_code_2808', 'lab_code_3...</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>6132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7131</th>\n",
       "      <td>udacity_health_encounter_id_910</td>\n",
       "      <td>udacity_health_patient_id_908</td>\n",
       "      <td>dx_code_98539</td>\n",
       "      <td>['procedure_code_8188', 'procedure_code_59872'...</td>\n",
       "      <td>['medication_code_5112', 'medication_code_3537...</td>\n",
       "      <td>[]</td>\n",
       "      <td>[0, 0, 0, 0, 0, 0, 0]</td>\n",
       "      <td>910</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID                     PATIENT_ID  \\\n",
       "1678  udacity_health_encounter_id_2608  udacity_health_patient_id_908   \n",
       "2796  udacity_health_encounter_id_3698  udacity_health_patient_id_908   \n",
       "5301  udacity_health_encounter_id_6132  udacity_health_patient_id_908   \n",
       "7131   udacity_health_encounter_id_910  udacity_health_patient_id_908   \n",
       "\n",
       "     PRINCIPAL_DIAGNOSIS_CODE  \\\n",
       "1678            dx_code_52877   \n",
       "2796            dx_code_52688   \n",
       "5301            dx_code_36066   \n",
       "7131            dx_code_98539   \n",
       "\n",
       "                                         PROCEDURE_CODE  \\\n",
       "1678                                                 []   \n",
       "2796                           ['procedure_code_60912']   \n",
       "5301  ['procedure_code_5724', 'procedure_code_55191'...   \n",
       "7131  ['procedure_code_8188', 'procedure_code_59872'...   \n",
       "\n",
       "                                        MEDICATION_CODE  \\\n",
       "1678                           ['medication_code_6189']   \n",
       "2796                                                 []   \n",
       "5301  ['medication_code_9532', 'medication_code_3398...   \n",
       "7131  ['medication_code_5112', 'medication_code_3537...   \n",
       "\n",
       "                                               LAB_CODE  \\\n",
       "1678                                                 []   \n",
       "2796                 ['lab_code_4691', 'lab_code_9380']   \n",
       "5301  ['lab_code_9732', 'lab_code_2808', 'lab_code_3...   \n",
       "7131                                                 []   \n",
       "\n",
       "                                           LABEL  ENCOUNTER_ID_NUMBER  \n",
       "1678                                         [0]                 2608  \n",
       "2796                                   [0, 0, 0]                 3698  \n",
       "5301  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]                 6132  \n",
       "7131                       [0, 0, 0, 0, 0, 0, 0]                  910  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ehr_level_df[ehr_level_df['PATIENT_ID']=='udacity_health_patient_id_908']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For patient id 908, the selected encounter should be 6132."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ENCOUNTER_ID</th>\n",
       "      <th>ENCOUNTER_ID_NUMBER</th>\n",
       "      <th>PATIENT_ID</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5301</th>\n",
       "      <td>udacity_health_encounter_id_6132</td>\n",
       "      <td>6132</td>\n",
       "      <td>udacity_health_patient_id_908</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          ENCOUNTER_ID  ENCOUNTER_ID_NUMBER  \\\n",
       "5301  udacity_health_encounter_id_6132                 6132   \n",
       "\n",
       "                         PATIENT_ID  \n",
       "5301  udacity_health_patient_id_908  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_last_encounter_df[test_last_encounter_df['PATIENT_ID']=='udacity_health_patient_id_908']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise  L3 - 2: Dataset Splitting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instructions\n",
    "- Split the provided dataset into a train and test split but be sure not to mix patient encounter records across the two partitions\n",
    "- Be sure to run the following three tests\n",
    "    - Patient data in only one partition\n",
    "    - Total unique number of patients across all partitions = total number unique patients in original full dataset\n",
    "    - Total number of rows original dataset = sum of rows across splits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "splitting_exercise_dataset_path = \"./data/SYNTHETIC_EHR_DATASET.csv\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Solution"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This is largely a review of two parts in this lesson and you can use most of the same code for each step. The key is to identify the level of the dataset and then to convert it to the encounter level before you do your splits. Then perform the splitting and run the tests."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Convert to Encounter Level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# convert to encounter and then split but make sure \n",
    "ehr_pre_split_df = pd.read_csv(splitting_exercise_dataset_path)\n",
    "grouping_field_list = ['ENCOUNTER_ID', 'PATIENT_ID', 'PRINCIPAL_DIAGNOSIS_CODE']\n",
    "non_grouped_field_list = [c for c in ehr_pre_split_df.columns if c not in grouping_field_list]\n",
    "ehr_encounter_df = ehr_pre_split_df.groupby(grouping_field_list)[non_grouped_field_list].agg(lambda x: \n",
    "                                                        list([y for y in x if y is not np.nan ] ) ).reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Split at Patient Level"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "PATIENT_ID_FIELD = 'PATIENT_ID'\n",
    "TEST_PERCENTAGE = 0.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_dataset_patient_level(df, key, test_percentage=0.2):\n",
    "    df = df.iloc[np.random.permutation(len(df))]\n",
    "    unique_values = df[key].unique()\n",
    "    total_values = len(unique_values)\n",
    "    sample_size = round(total_values * (1 - test_percentage ))\n",
    "    train = df[df[key].isin(unique_values[:sample_size])].reset_index(drop=True)\n",
    "    test = df[df[key].isin(unique_values[sample_size:])].reset_index(drop=True)\n",
    "    return train, test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_df, test_df = split_dataset_patient_level(ehr_encounter_df, PATIENT_ID_FIELD, TEST_PERCENTAGE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test passed for patient data in only one partition\n"
     ]
    }
   ],
   "source": [
    "assert len(set(train_df[PATIENT_ID_FIELD].unique()).intersection(set(test_df[PATIENT_ID_FIELD].unique()))) == 0\n",
    "print(\"Test passed for patient data in only one partition\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test passed for number of unique patients being equal!\n"
     ]
    }
   ],
   "source": [
    "assert (train_df[PATIENT_ID_FIELD].nunique()  + test_df[PATIENT_ID_FIELD].nunique()) == ehr_encounter_df[PATIENT_ID_FIELD].nunique()\n",
    "print(\"Test passed for number of unique patients being equal!\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test passed for number of total rows equal!\n"
     ]
    }
   ],
   "source": [
    "assert len(train_df)  + len(test_df) == len(ehr_encounter_df)\n",
    "print(\"Test passed for number of total rows equal!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Optional\n",
    "- Check label distribution and use scikitlearn - https://scikit-learn.org/stable/auto_examples/model_selection/plot_cv_indices.html#sphx-glr-auto-examples-model-selection-plot-cv-indices-py"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise  L3 - 3: Build Bucketed Numeric Feature with TF"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instructions\n",
    "- Given the Swiss heart disease dataset that we worked with earlier, build a bucketed numeric feature from the age feature. \n",
    "- For this exercise, use the Tensorflow csv function for loading the dataset directly into a TF tensor -https://www.tensorflow.org/api_docs/python/tf/data/experimental/make_csv_dataset. This approach will be useful for when you have much larger datasets and also allows you to bypass loading the dataset in Pandas.\n",
    "- More information on the Tensorflow bucketized feature can be found here https://www.tensorflow.org/api_docs/python/tf/feature_column/bucketized_column. Bucketed features take as input the  numeric feature that we covered in the lesson. For the numeric feature, you do not need to normalize it like we did in the lesson. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "swiss_dataset_path = \"./data/lesson_exercise_swiss_dataset.csv\"\n",
    "BATCH_SIZE =128\n",
    "PREDICTOR_FIELD = 'num_label'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ETL with TF dataset make csv function\n",
    "swiss_tf_dataset = tf.data.experimental.make_csv_dataset( swiss_dataset_path, batch_size=BATCH_SIZE, \n",
    "                                                         num_epochs=1, label_name=PREDICTOR_FIELD, header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "OrderedDict([('age',\n",
       "              <tf.Tensor: shape=(123,), dtype=int32, numpy=\n",
       "              array([65, 70, 38, 52, 51, 60, 56, 53, 51, 56, 61, 59, 45, 62, 50, 54, 61,\n",
       "                     38, 61, 60, 53, 64, 65, 38, 58, 66, 62, 58, 68, 58, 52, 55, 32, 61,\n",
       "                     48, 54, 50, 43, 50, 51, 47, 65, 42, 53, 67, 55, 43, 61, 73, 54, 69,\n",
       "                     46, 64, 65, 60, 35, 52, 63, 56, 38, 61, 59, 61, 57, 57, 66, 62, 51,\n",
       "                     42, 36, 59, 57, 72, 64, 53, 60, 56, 47, 60, 63, 47, 53, 63, 63, 56,\n",
       "                     62, 56, 40, 59, 53, 70, 50, 51, 43, 41, 62, 34, 68, 62, 61, 60, 55,\n",
       "                     74, 69, 55, 56, 57, 52, 38, 53, 38, 62, 53, 61, 56, 57, 51, 46, 59,\n",
       "                     63, 51, 57, 47], dtype=int32)>)])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "swiss_dataset_batch = next(iter(swiss_tf_dataset))[0]\n",
    "swiss_dataset_batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create TF numeric feature\n",
    "tf_numeric_age_feature = tf.feature_column.numeric_column(key='age', default_value=0, dtype=tf.float64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "#boundaries for the different age buckets\n",
    "b_list = [ 0, 18, 25, 40, 55, 65, 80, 100]\n",
    "#create TF bucket feature from numeric feature\n",
    "tf_bucket_age_feature = tf.feature_column.bucketized_column(source_column=tf_numeric_age_feature, boundaries= b_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def demo(feature_column, example_batch):\n",
    "    feature_layer = tf.keras.layers.DenseFeatures(feature_column)\n",
    "    print(feature_layer(example_batch))\n",
    "    print(\"\\nExample of one transformed row:\")\n",
    "    print(feature_layer(example_batch).numpy()[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example bucket field:\n",
      "BucketizedColumn(source_column=NumericColumn(key='age', shape=(1,), default_value=(0,), dtype=tf.float64, normalizer_fn=None), boundaries=(0, 18, 25, 40, 55, 65, 80, 100))\n",
      "\n",
      "tf.Tensor(\n",
      "[[0. 0. 0. ... 1. 0. 0.]\n",
      " [0. 0. 0. ... 1. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]], shape=(123, 9), dtype=float32)\n",
      "\n",
      "Example of one transformed row:\n",
      "[0. 0. 0. 0. 0. 0. 1. 0. 0.]\n"
     ]
    }
   ],
   "source": [
    "print(\"Example bucket field:\\n{}\\n\".format(tf_bucket_age_feature))\n",
    "demo(tf_bucket_age_feature, swiss_dataset_batch)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise  L3 - 4:  Build Embedding Categorical Feature with TF"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Instructions\n",
    "- Build a 10 dimension embedding feature for the PRINCIPAL_DIAGNOSIS_CODE field\n",
    "- Here is the link to the Tensorflow Embedding column documentation -https://www.tensorflow.org/api_docs/python/tf/feature_column/embedding_column\n",
    "- Some functions provided below to assist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "ehr_line_df = pd.read_csv(\"./data/SYNTHETIC_EHR_DATASET.csv\")\n",
    "cat_example_df = ehr_line_df[['ENCOUNTER_ID', 'PRINCIPAL_DIAGNOSIS_CODE', 'LABEL']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "#adapted from https://www.tensorflow.org/tutorials/structured_data/feature_columns\n",
    "def df_to_dataset(df, predictor,  batch_size=32):\n",
    "    df = df.copy()\n",
    "    labels = df.pop(predictor)\n",
    "    ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))\n",
    "    ds = ds.shuffle(buffer_size=len(df))\n",
    "    ds = ds.batch(batch_size)\n",
    "    return ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "BATCH_SIZE = 64\n",
    "PREDICTOR_FIELD = 'LABEL'\n",
    "categorical_tf_ds = df_to_dataset(cat_example_df, PREDICTOR_FIELD, batch_size=BATCH_SIZE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# build vocab for categorical features\n",
    "def write_vocabulary_file(vocab_list, field_name, default_value, vocab_dir='./vocab/'):\n",
    "    output_file_path = os.path.join(vocab_dir, str(field_name) + \"_vocab.txt\")\n",
    "    # put default value in first row as TF requires\n",
    "    vocab_list = np.insert(vocab_list, 0, default_value, axis=0) \n",
    "    df = pd.DataFrame(vocab_list).to_csv(output_file_path, index=None, header=None)\n",
    "    return output_file_path\n",
    "\n",
    "def build_vocab_files(df, categorical_column_list, default_value='00'):\n",
    "    vocab_files_list = []\n",
    "    for c in categorical_column_list:\n",
    "        v_file = write_vocabulary_file(df[c].unique(), c, default_value)\n",
    "        vocab_files_list.append(v_file)\n",
    "    return vocab_files_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Solution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "# add logic to add if not exist\n",
    "#os.mkdir(\"./vocab/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'./vocab/PRINCIPAL_DIAGNOSIS_CODE_vocab.txt'"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "categorical_field_list = [\"PRINCIPAL_DIAGNOSIS_CODE\"]\n",
    "vocab_files_list = build_vocab_files(cat_example_df, categorical_field_list)\n",
    "vocab_files_list[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:vocabulary_size = 6753 in PRINCIPAL_DIAGNOSIS_CODE is inferred from the number of elements in the vocabulary_file ./vocab/PRINCIPAL_DIAGNOSIS_CODE_vocab.txt.\n"
     ]
    }
   ],
   "source": [
    "principal_diagnosis_vocab = tf.feature_column.categorical_column_with_vocabulary_file(\n",
    "            key=\"PRINCIPAL_DIAGNOSIS_CODE\", vocabulary_file = vocab_files_list[0], num_oov_buckets=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "dims = 10\n",
    "cat_embedded = tf.feature_column.embedding_column(principal_diagnosis_vocab, dimension=dims)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "categorical_tf_ds_batch = next(iter(categorical_tf_ds))[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tf.Tensor(\n",
      "[[ 2.21173733e-01 -1.72159821e-02  1.95488617e-01  3.94141227e-01\n",
      "  -1.67326014e-02 -2.14374363e-01  1.35068595e-01 -2.46864319e-01\n",
      "  -2.92306662e-01  1.10351130e-01]\n",
      " [-3.70135516e-01  2.29885086e-01  5.77204525e-01 -3.33219431e-02\n",
      "  -5.73716164e-01 -2.24834934e-01 -5.42964377e-02  2.17185676e-01\n",
      "   1.66468963e-01 -2.94320621e-02]\n",
      " [ 2.23573312e-01 -5.65836057e-02 -8.03322047e-02  9.44105014e-02\n",
      "  -4.51478630e-01 -1.10069960e-01  5.88697612e-01  1.56490564e-01\n",
      "  -4.58488345e-01  5.05169749e-01]\n",
      " [ 1.90142587e-01  6.30900711e-02  1.27180293e-01 -2.40399882e-01\n",
      "  -3.38476181e-01  1.58759262e-02 -1.73723385e-01  1.54326290e-01\n",
      "  -1.36973143e-01 -8.69284198e-02]\n",
      " [ 1.09519929e-01  2.49608710e-01 -2.60479212e-01  1.12497471e-01\n",
      "   1.42827034e-01  4.49611813e-01  8.05474147e-02  5.62058032e-01\n",
      "   4.47835207e-01 -5.23728132e-02]\n",
      " [-3.56267035e-01 -2.91776627e-01  4.70154822e-01  1.82453468e-01\n",
      "  -2.62245953e-01 -3.69706720e-01 -2.10007370e-01  2.95164227e-01\n",
      "  -1.79117620e-01  1.87984556e-02]\n",
      " [ 5.47165930e-01 -3.71892065e-01  5.82392216e-01  8.78551975e-02\n",
      "  -2.24966273e-01 -2.86136597e-01  7.21387519e-03 -5.66901900e-02\n",
      "   3.21903884e-01 -2.59898696e-02]\n",
      " [ 1.45990640e-01 -2.62489647e-01 -9.33190808e-02  1.46773815e-01\n",
      "  -3.21463913e-01  6.96207583e-02  4.30907428e-01 -1.83946386e-01\n",
      "   4.00100976e-01  1.49181694e-01]\n",
      " [ 1.65900216e-01  6.07954226e-02  4.39754426e-01  3.47980261e-01\n",
      "  -6.31673709e-02  2.34097913e-01  6.23354614e-01  9.89933461e-02\n",
      "  -3.81052673e-01  2.55292118e-01]\n",
      " [-3.63221377e-01  6.52561039e-02 -3.43514942e-02  4.38841879e-01\n",
      "   3.45863223e-01 -5.30374110e-01  2.96102375e-01 -3.76442969e-01\n",
      "  -4.73127931e-01  2.56435245e-01]\n",
      " [ 3.05163682e-01  1.24479391e-01  2.79286265e-01 -3.03077459e-01\n",
      "   1.69501066e-01 -9.67929587e-02 -5.79946399e-01  3.07954252e-01\n",
      "   1.04335368e-01  4.40943211e-01]\n",
      " [ 7.73213804e-03 -5.89087903e-01 -3.68716210e-01 -1.59685805e-01\n",
      "   3.60390127e-01  3.90282094e-01  5.56753218e-01 -5.86735643e-02\n",
      "   2.07081214e-01 -2.53996730e-01]\n",
      " [-2.05367729e-01  3.40942740e-01  2.31850058e-01 -2.97459364e-01\n",
      "  -3.11965346e-01 -2.48455569e-01  2.55972236e-01 -2.73559153e-01\n",
      "  -2.78003991e-01 -6.05599105e-01]\n",
      " [ 3.62973027e-02 -5.52548230e-01  1.54541776e-01 -4.83170375e-02\n",
      "   1.82704136e-01 -4.91367131e-02  3.41807932e-01 -3.83855760e-01\n",
      "  -3.52012664e-01 -2.12510675e-02]\n",
      " [-1.12446137e-01 -4.90654647e-01 -2.00538069e-01  5.31839609e-01\n",
      "  -3.24222744e-02 -2.23938525e-01  2.06225201e-01  3.53894889e-01\n",
      "  -1.47967964e-01 -2.02423260e-01]\n",
      " [ 3.21457535e-01  5.93174957e-02 -3.69869232e-01 -4.09282655e-01\n",
      "   9.14927498e-02  3.32246929e-01 -2.48576999e-01  4.00003642e-01\n",
      "   5.06345451e-01  2.01818004e-01]\n",
      " [-1.47743538e-01  3.67682010e-01 -8.93095881e-02  9.23893377e-02\n",
      "   3.89735103e-01 -3.53251211e-02 -2.62227654e-01  6.44264668e-02\n",
      "  -2.38305166e-01 -1.52197331e-01]\n",
      " [-4.72036660e-01 -3.21255885e-02  7.68640414e-02  3.21973473e-01\n",
      "   2.66465507e-02 -2.05376044e-01  3.84855568e-02  3.20453160e-02\n",
      "  -2.44045362e-01 -4.75476623e-01]\n",
      " [-3.80438238e-01 -4.09091324e-01  2.10142478e-01  4.05305773e-01\n",
      "   6.14984371e-02  3.84018242e-01  2.38912642e-01 -2.40870863e-01\n",
      "  -5.01134217e-01 -6.38686344e-02]\n",
      " [-2.01972052e-01  5.70804775e-02 -6.07003570e-02 -1.96827278e-01\n",
      "  -5.08159399e-01 -1.03628360e-01  1.59624040e-01 -2.37674490e-01\n",
      "   2.31359191e-02 -2.06301585e-01]\n",
      " [-6.49687648e-02 -5.00375219e-02 -3.93785596e-01 -1.91538874e-02\n",
      "   3.34195584e-01 -3.29131246e-01 -3.68286878e-01 -2.70653337e-01\n",
      "   4.14935440e-01  2.34607071e-01]\n",
      " [-5.15714847e-02 -3.64539772e-01  1.95512101e-02  1.44576281e-01\n",
      "   3.24142128e-01  3.27825323e-02  3.87537144e-02 -1.59311146e-01\n",
      "   1.18068174e-01 -2.08626002e-01]\n",
      " [ 2.09214464e-01  4.56928276e-03 -1.04735773e-02  1.77772090e-01\n",
      "  -2.06444017e-03  5.46324849e-01  1.41713530e-01  4.14380163e-01\n",
      "  -7.64702037e-02  3.37981820e-01]\n",
      " [-1.18957266e-01  4.86897141e-01  3.07010785e-02  9.68524665e-02\n",
      "   6.66879192e-02  1.35202482e-01 -1.46177948e-01 -2.02172980e-01\n",
      "  -5.08908443e-02  1.97674543e-01]\n",
      " [-1.68040857e-01 -2.06194833e-01 -3.15807194e-01 -5.24919212e-01\n",
      "   2.60535568e-01  3.90114598e-02 -4.19732146e-02  3.25531751e-01\n",
      "  -1.92902520e-01  1.62280828e-01]\n",
      " [ 3.05758744e-01  2.48721447e-02  2.02467248e-01  1.53078869e-01\n",
      "  -2.89428353e-01 -5.38650958e-04  2.89290726e-01 -2.21982226e-01\n",
      "  -4.03302729e-01  7.56065622e-02]\n",
      " [-9.32436287e-02  2.44013861e-01  1.51964977e-01  1.73874572e-01\n",
      "   1.94687024e-01  2.43246436e-01 -1.89445019e-01 -1.00778326e-01\n",
      "   1.07042216e-01 -1.24372415e-01]\n",
      " [-1.66304529e-01  2.27702618e-01  1.03454947e-01  3.77445281e-01\n",
      "  -1.39045894e-01  2.07322732e-01 -6.75291345e-02  4.79865782e-02\n",
      "   3.46478283e-01  1.17409937e-01]\n",
      " [ 4.06199872e-01  3.49460721e-01  2.44637355e-01  2.48847499e-01\n",
      "   4.78596061e-01  2.15233400e-01 -6.15224898e-01  1.12012804e-01\n",
      "   6.17501549e-02 -4.65285063e-01]\n",
      " [ 3.34114991e-02  4.51019287e-01 -3.53240311e-01 -2.11630970e-01\n",
      "  -1.54820323e-01 -6.26488253e-02 -8.36589113e-02 -7.38469809e-02\n",
      "   3.11094910e-01  1.83216110e-01]\n",
      " [ 1.17729269e-01  2.31808484e-01 -7.75934532e-02  2.38361761e-01\n",
      "  -4.84732747e-01  1.08366184e-01 -1.42730400e-01 -2.09738344e-01\n",
      "   3.87201577e-01  4.63078231e-01]\n",
      " [ 8.02836940e-02  1.29528269e-01 -1.31656677e-02  4.83411461e-01\n",
      "  -1.02488682e-01 -3.96515802e-02  8.13919865e-03 -1.78335369e-01\n",
      "   5.28594971e-01 -5.06561756e-01]\n",
      " [ 6.97559565e-02  1.62109986e-01  2.89449006e-01  2.36351043e-02\n",
      "  -2.20395207e-01  1.00556277e-01 -1.82724625e-01  2.24839896e-01\n",
      "   3.75871025e-02  5.97209215e-01]\n",
      " [-8.72354358e-02  3.24999690e-02 -1.40968591e-01 -2.79432982e-01\n",
      "  -4.57580000e-01  1.81796074e-01 -1.08092166e-01  1.91744015e-01\n",
      "   7.57934079e-02  3.99347872e-01]\n",
      " [ 8.95942599e-02 -4.12449062e-01 -5.73571861e-01  1.75338015e-01\n",
      "   5.13237193e-02  2.88603067e-01 -9.66583472e-03 -9.90523547e-02\n",
      "   2.01350898e-02 -2.24946514e-01]\n",
      " [ 3.46745461e-01  2.71048993e-01 -9.16679427e-02 -7.77366161e-02\n",
      "   4.89920676e-01 -4.81635541e-01 -3.17499459e-01  1.99631482e-01\n",
      "   2.49167159e-01 -1.57573476e-01]\n",
      " [-2.30746806e-01  4.66090620e-01  4.54338253e-01  1.15700610e-01\n",
      "  -9.24459398e-02 -3.96088123e-01  4.27640826e-01  5.73597923e-02\n",
      "  -3.38077366e-01 -4.07205671e-01]\n",
      " [ 3.77220392e-01  4.85850960e-01 -1.06350690e-01  2.83066966e-02\n",
      "   1.84345081e-01  8.95045027e-02 -3.64821970e-01 -4.65173692e-01\n",
      "  -3.24850440e-01  1.91724494e-01]\n",
      " [ 1.12438530e-01 -3.71120334e-01 -7.72717074e-02  5.04763909e-02\n",
      "   1.85718343e-01  9.57610086e-03 -8.91020149e-02  8.43717158e-03\n",
      "  -2.84862280e-01 -2.92068005e-01]\n",
      " [-2.34553739e-01 -8.34727511e-02  2.61431992e-01  1.10937811e-01\n",
      "   1.35743394e-01  1.09667197e-01 -2.40983710e-01  1.35432109e-01\n",
      "  -3.67631346e-01  2.61494190e-01]\n",
      " [-2.17249095e-01  3.94547760e-01 -1.96576677e-02  5.84245920e-01\n",
      "  -3.83032441e-01 -1.82644352e-01 -2.23389491e-01  1.96186975e-01\n",
      "   5.04969656e-01  5.06922007e-01]\n",
      " [-5.28254509e-01 -6.47731647e-02 -8.88943672e-02  3.63148332e-01\n",
      "   1.37630776e-01 -8.34298581e-02  4.63728979e-02 -5.26239574e-02\n",
      "   2.93244779e-01  4.37557340e-01]\n",
      " [-8.45043510e-02 -5.66575766e-01 -1.25844344e-01 -4.33580101e-01\n",
      "   4.50702727e-01 -4.36995141e-02 -4.68379319e-01  2.65026033e-01\n",
      "   1.11401625e-01  2.87989408e-01]\n",
      " [-1.45526037e-01  3.80581707e-01  2.30159819e-01  2.29766756e-01\n",
      "  -5.62798500e-01 -3.27541918e-01  5.41647911e-01  1.38492495e-01\n",
      "   5.79344034e-02 -3.46691966e-01]\n",
      " [ 8.84416774e-02 -5.02830684e-01 -4.80091184e-01  1.29740626e-01\n",
      "  -5.12592793e-01  4.52330530e-01 -7.29410127e-02 -6.56349361e-02\n",
      "   2.68445611e-01  3.78060251e-01]\n",
      " [ 3.10159236e-01  3.72439437e-03  1.95693165e-01 -1.10333323e-01\n",
      "  -1.79411083e-01  5.36282420e-01  4.06817608e-02 -3.30081224e-01\n",
      "  -6.11320697e-02  2.43110299e-01]\n",
      " [ 6.31699383e-01 -5.95075965e-01  3.38996686e-02 -2.86349684e-01\n",
      "   3.90133411e-01 -5.38905524e-02  1.67297889e-02 -4.77125466e-01\n",
      "  -2.29528844e-01  8.86970311e-02]\n",
      " [-1.14536852e-01 -3.88067782e-01 -1.83217540e-01 -4.07678932e-01\n",
      "   9.88018438e-02  3.09355021e-01  4.69066203e-01 -1.46911442e-01\n",
      "  -1.40690237e-01 -2.91088879e-01]\n",
      " [ 3.56209844e-01 -1.63025558e-02  4.89954472e-01 -5.74329257e-01\n",
      "  -2.02979133e-01  4.71178532e-01 -1.29683331e-01  9.22208056e-02\n",
      "  -1.81245089e-01  3.09783351e-02]\n",
      " [ 1.67314947e-01  2.32018098e-01  4.12859142e-01  2.12580293e-01\n",
      "  -1.80488937e-02 -2.63569266e-01 -1.50420174e-01  2.62736768e-01\n",
      "  -4.47872989e-02  7.92705640e-02]\n",
      " [ 4.08414751e-01  4.20303613e-01 -4.53268826e-01 -5.07035136e-01\n",
      "  -4.64152284e-02  3.04141074e-01 -4.97599095e-01  5.77130355e-02\n",
      "   3.46742779e-01 -2.61391848e-01]\n",
      " [-1.29503995e-01  3.79304081e-01 -6.86735660e-02 -5.41104019e-01\n",
      "  -4.91090156e-02 -3.48790944e-01 -2.95805395e-01  8.52178186e-02\n",
      "  -1.33517712e-01 -7.27027562e-03]\n",
      " [ 1.02716826e-01  6.80366233e-02 -4.86424804e-01 -1.81124747e-01\n",
      "  -3.86693597e-01 -4.50449556e-01 -2.97391951e-01  1.47938699e-01\n",
      "  -5.17401397e-01  8.15296099e-02]\n",
      " [-5.39604016e-02  3.00839096e-01 -1.85012221e-01  3.81570131e-01\n",
      "  -6.21404052e-01  1.27725914e-01  2.66442865e-01 -3.82936954e-01\n",
      "   2.29131177e-01  2.59222180e-01]\n",
      " [ 8.62242803e-02 -5.88915162e-02 -2.91529186e-02  2.19931975e-02\n",
      "   1.69219196e-01  1.95221096e-01 -3.58254582e-01  1.52518094e-01\n",
      "  -5.88115938e-02  2.78177381e-01]\n",
      " [ 4.64907028e-02 -2.25962177e-01 -3.37895364e-01 -1.46054432e-01\n",
      "  -5.81172466e-01  1.34139121e-01  4.05016214e-01 -4.62647155e-02\n",
      "  -5.48960306e-02 -3.15349847e-01]\n",
      " [ 1.98331531e-02  3.56559932e-01  3.58095616e-01 -3.89024049e-01\n",
      "   3.23762357e-01  1.02973975e-01 -4.31862980e-01  1.98491976e-01\n",
      "   2.03011911e-02 -8.38057250e-02]\n",
      " [ 1.22427888e-01  1.81814313e-01  4.14778471e-01 -2.66290814e-01\n",
      "   3.41357410e-01 -5.60003638e-01  4.35484767e-01 -3.64804953e-01\n",
      "   4.21479225e-01  1.25182763e-01]\n",
      " [-3.92881453e-01  3.69802624e-01  4.39309001e-01  2.53669530e-01\n",
      "  -6.71630800e-02 -2.37583295e-01 -5.01899600e-01  7.04075769e-02\n",
      "  -2.72109240e-01  5.25108771e-03]\n",
      " [-3.17871541e-01  3.04395258e-01 -1.48960665e-01  1.34421825e-01\n",
      "  -1.49788372e-02 -2.33029246e-01  4.66970354e-01  6.02961361e-01\n",
      "  -1.70079693e-01  2.17916727e-01]\n",
      " [ 3.32114726e-01  6.67006969e-02 -9.62992385e-02 -6.15109690e-02\n",
      "   4.81428474e-01 -4.47902940e-02  1.71698347e-01  1.54229686e-01\n",
      "  -3.93822677e-02  5.50741851e-01]\n",
      " [ 8.02836940e-02  1.29528269e-01 -1.31656677e-02  4.83411461e-01\n",
      "  -1.02488682e-01 -3.96515802e-02  8.13919865e-03 -1.78335369e-01\n",
      "   5.28594971e-01 -5.06561756e-01]\n",
      " [-3.25197518e-01 -4.00785357e-01  1.49951220e-01  3.13788801e-01\n",
      "   1.76164478e-01  3.29165936e-01 -1.52645614e-02 -2.66170263e-01\n",
      "   3.16257089e-01  1.78693980e-01]\n",
      " [-4.10625517e-01 -4.73436117e-02 -4.05227207e-02  8.24426562e-02\n",
      "   7.57036954e-02 -4.49555218e-01 -1.81903131e-02  7.56093115e-02\n",
      "  -1.58822700e-01  4.46603186e-02]], shape=(64, 10), dtype=float32)\n",
      "\n",
      "Example of one transformed row:\n",
      "[ 0.22117373 -0.01721598  0.19548862  0.39414123 -0.0167326  -0.21437436\n",
      "  0.1350686  -0.24686432 -0.29230666  0.11035113]\n"
     ]
    }
   ],
   "source": [
    "demo(cat_embedded, categorical_tf_ds_batch)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}