[418e14]: / 1_simple_data.ipynb

Download this file

33088 lines (33087 with data), 1.3 MB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "from IPython.display import display, HTML"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Build Data Definitions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import icu_data_defs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>units</th>\n",
       "      <th>variable_type</th>\n",
       "      <th>clinical_source</th>\n",
       "      <th>list_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>def_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>heart rate</td>\n",
       "      <td>beats/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>blood pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>blood pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>blood pressure mean</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>respiratory rate</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           label      units variable_type clinical_source  \\\n",
       "def_id                                                                      \n",
       "0                     heart rate  beats/min            qn     observation   \n",
       "1        blood pressure systolic       mmHg            qn     observation   \n",
       "2       blood pressure diastolic       mmHg            qn     observation   \n",
       "3            blood pressure mean       mmHg            qn     observation   \n",
       "4               respiratory rate   insp/min            qn     observation   \n",
       "\n",
       "        list_id  \n",
       "def_id           \n",
       "0           NaN  \n",
       "1           NaN  \n",
       "2           NaN  \n",
       "3           NaN  \n",
       "4           NaN  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict.tables.definitions.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "inr_id = data_dict.add_definition(label='INR')\n",
    "pt_id = data_dict.add_definition(label='Prothrombin Time',units='seconds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>units</th>\n",
       "      <th>variable_type</th>\n",
       "      <th>clinical_source</th>\n",
       "      <th>list_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>def_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>lactate</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>lactate</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>hemoglobin</td>\n",
       "      <td>g/dL</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>INR</td>\n",
       "      <td>NaN</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Prothrombin Time</td>\n",
       "      <td>seconds</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   label    units variable_type clinical_source  list_id\n",
       "def_id                                                                  \n",
       "23               lactate   mmol/L            qn     observation      NaN\n",
       "24               lactate    mg/dL            qn     observation      NaN\n",
       "25            hemoglobin     g/dL            qn     observation      NaN\n",
       "26                   INR      NaN            qn     observation      NaN\n",
       "27      Prothrombin Time  seconds            qn     observation      NaN"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict.tables.definitions.tail()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>units</th>\n",
       "      <th>variable_type</th>\n",
       "      <th>clinical_source</th>\n",
       "      <th>list_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>def_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>INR</td>\n",
       "      <td>NaN</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>Prothrombin Time</td>\n",
       "      <td>seconds</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   label    units variable_type clinical_source  list_id\n",
       "def_id                                                                  \n",
       "26                   INR      NaN            qn     observation      NaN\n",
       "27      Prothrombin Time  seconds            qn     observation      NaN"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from itertools import product\n",
    "\n",
    "panel_map = list(product([data_dict.table_names.definitions],[inr_id,pt_id]))\n",
    "panel_id = data_dict.add_panel('coagulation labs',panel_map)\n",
    "data_dict.get_panel_defintions(panel_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>panel_name</th>\n",
       "      <th>list_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>panel_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>blood pressure</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>vital signs</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>urine output</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>glasgow coma scale</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>normal saline</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>common fluids</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>norepinephrine</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>vasopressin</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>common pressors</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>lactate</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>oxygen delivery labs</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>simple dataset</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>coagulation labs</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    panel_name  list_id\n",
       "panel_id                               \n",
       "0               blood pressure        3\n",
       "1                  vital signs        4\n",
       "2                 urine output        5\n",
       "3           glasgow coma scale        6\n",
       "4                normal saline        7\n",
       "5             lactated ringers        8\n",
       "6                common fluids        9\n",
       "7               norepinephrine       10\n",
       "8                  vasopressin       11\n",
       "9              common pressors       12\n",
       "10                     lactate       13\n",
       "11        oxygen delivery labs       14\n",
       "12              simple dataset       15\n",
       "13            coagulation labs       16"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict.tables.panels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>units</th>\n",
       "      <th>variable_type</th>\n",
       "      <th>clinical_source</th>\n",
       "      <th>list_id</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>def_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>heart rate</td>\n",
       "      <td>beats/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>blood pressure systolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>blood pressure diastolic</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>blood pressure mean</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>respiratory rate</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>temperature body</td>\n",
       "      <td>degF</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>oxygen saturation pulse oximetry</td>\n",
       "      <td>percent</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>weight body</td>\n",
       "      <td>kg</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>output urine</td>\n",
       "      <td>mL</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>output urine</td>\n",
       "      <td>mL/hr</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>output urine</td>\n",
       "      <td>mL/kg/hr</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>glasgow coma scale motor</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ord</td>\n",
       "      <td>observation</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>glasgow coma scale eye opening</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ord</td>\n",
       "      <td>observation</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>glasgow coma scale verbal</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ord</td>\n",
       "      <td>observation</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>normal saline</td>\n",
       "      <td>mL</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>normal saline</td>\n",
       "      <td>mL/hr</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td>mL</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td>mL/hr</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>norepinephrine</td>\n",
       "      <td>mcg</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>norepinephrine</td>\n",
       "      <td>mcg/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>norepinephrine</td>\n",
       "      <td>mcg/kg/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>vasopressin</td>\n",
       "      <td>units</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>vasopressin</td>\n",
       "      <td>units/min</td>\n",
       "      <td>qn</td>\n",
       "      <td>intervention</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>hemoglobin</td>\n",
       "      <td>g/dL</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>lactate</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>lactate</td>\n",
       "      <td>mg/dL</td>\n",
       "      <td>qn</td>\n",
       "      <td>observation</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   label       units variable_type  \\\n",
       "def_id                                                               \n",
       "0                             heart rate   beats/min            qn   \n",
       "1                blood pressure systolic        mmHg            qn   \n",
       "2               blood pressure diastolic        mmHg            qn   \n",
       "3                    blood pressure mean        mmHg            qn   \n",
       "4                       respiratory rate    insp/min            qn   \n",
       "5                       temperature body        degF            qn   \n",
       "6       oxygen saturation pulse oximetry     percent            qn   \n",
       "7                            weight body          kg            qn   \n",
       "8                           output urine          mL            qn   \n",
       "9                           output urine       mL/hr            qn   \n",
       "10                          output urine    mL/kg/hr            qn   \n",
       "11              glasgow coma scale motor         NaN           ord   \n",
       "12        glasgow coma scale eye opening         NaN           ord   \n",
       "13             glasgow coma scale verbal         NaN           ord   \n",
       "14                         normal saline          mL            qn   \n",
       "15                         normal saline       mL/hr            qn   \n",
       "16                      lactated ringers          mL            qn   \n",
       "17                      lactated ringers       mL/hr            qn   \n",
       "18                        norepinephrine         mcg            qn   \n",
       "19                        norepinephrine     mcg/min            qn   \n",
       "20                        norepinephrine  mcg/kg/min            qn   \n",
       "21                           vasopressin       units            qn   \n",
       "22                           vasopressin   units/min            qn   \n",
       "25                            hemoglobin        g/dL            qn   \n",
       "23                               lactate      mmol/L            qn   \n",
       "24                               lactate       mg/dL            qn   \n",
       "\n",
       "       clinical_source  list_id  \n",
       "def_id                           \n",
       "0          observation      NaN  \n",
       "1          observation      NaN  \n",
       "2          observation      NaN  \n",
       "3          observation      NaN  \n",
       "4          observation      NaN  \n",
       "5          observation      NaN  \n",
       "6          observation      NaN  \n",
       "7          observation      NaN  \n",
       "8          observation      NaN  \n",
       "9          observation      NaN  \n",
       "10         observation      NaN  \n",
       "11         observation      0.0  \n",
       "12         observation      2.0  \n",
       "13         observation      1.0  \n",
       "14        intervention      NaN  \n",
       "15        intervention      NaN  \n",
       "16        intervention      NaN  \n",
       "17        intervention      NaN  \n",
       "18        intervention      NaN  \n",
       "19        intervention      NaN  \n",
       "20        intervention      NaN  \n",
       "21        intervention      NaN  \n",
       "22        intervention      NaN  \n",
       "25         observation      NaN  \n",
       "23         observation      NaN  \n",
       "24         observation      NaN  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict.get_panel_defintions(12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_dict.save()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MIMIC Exploration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import mimic\n",
    "from icu_data_defs import data_dictionary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "reload(mimic)\n",
    "conn = mimic.connect()\n",
    "data_dict = data_dictionary('config/data_definitions.xlsx')\n",
    "explorer = mimic.explorer(conn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "keep_dict = {}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vital Signs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Heart Rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>227018</th>\n",
       "      <td>HR_ApacheIV</td>\n",
       "      <td>HR_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223765</th>\n",
       "      <td>Orthostatic HR sitting</td>\n",
       "      <td>Orthostatic HR sitting</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1332</th>\n",
       "      <td>pulse</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227581</th>\n",
       "      <td>BiPap bpm (S/T -Back up)</td>\n",
       "      <td>BiPap bpm (S/T -Back up)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220047</th>\n",
       "      <td>Heart Rate Alarm - Low</td>\n",
       "      <td>HR Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224363</th>\n",
       "      <td>VAD Beat Rate L</td>\n",
       "      <td>VAD Beat Rate L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226764</th>\n",
       "      <td>HrApacheIIValue</td>\n",
       "      <td>HrApacheIIValue</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1341</th>\n",
       "      <td>PULSE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220045</th>\n",
       "      <td>Heart Rate</td>\n",
       "      <td>HR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223775</th>\n",
       "      <td>VAD Beat Rate R</td>\n",
       "      <td>VAD Beat Rate R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220046</th>\n",
       "      <td>Heart rate Alarm - High</td>\n",
       "      <td>HR Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224751</th>\n",
       "      <td>Temporary Pacemaker Rate</td>\n",
       "      <td>Temp Pacemaker Rate</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Cardiovascular (Pacer Data)</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223764</th>\n",
       "      <td>Orthostatic HR lying</td>\n",
       "      <td>Orthostatic HR lying</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1725</th>\n",
       "      <td>Pulse</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224422</th>\n",
       "      <td>Spont RR</td>\n",
       "      <td>Spont RR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224647</th>\n",
       "      <td>Orthostatic HR standing</td>\n",
       "      <td>Orthostatic HR standing</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>bpm</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>211</th>\n",
       "      <td>Heart Rate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220181</th>\n",
       "      <td>Non Invasive Blood Pressure mean</td>\n",
       "      <td>NBPm</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220052</th>\n",
       "      <td>Arterial Blood Pressure mean</td>\n",
       "      <td>ABPm</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3494</th>\n",
       "      <td>Lowest Heart Rate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>92.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   label              abbreviation  \\\n",
       "itemid                                                               \n",
       "227018                       HR_ApacheIV               HR_ApacheIV   \n",
       "223765            Orthostatic HR sitting    Orthostatic HR sitting   \n",
       "1332                               pulse                             \n",
       "227581          BiPap bpm (S/T -Back up)  BiPap bpm (S/T -Back up)   \n",
       "220047            Heart Rate Alarm - Low            HR Alarm - Low   \n",
       "224363                   VAD Beat Rate L           VAD Beat Rate L   \n",
       "226764                   HrApacheIIValue           HrApacheIIValue   \n",
       "1341                               PULSE                             \n",
       "220045                        Heart Rate                        HR   \n",
       "223775                   VAD Beat Rate R           VAD Beat Rate R   \n",
       "220046           Heart rate Alarm - High           HR Alarm - High   \n",
       "224751          Temporary Pacemaker Rate       Temp Pacemaker Rate   \n",
       "223764              Orthostatic HR lying      Orthostatic HR lying   \n",
       "1725                               Pulse                             \n",
       "224422                          Spont RR                  Spont RR   \n",
       "224647           Orthostatic HR standing   Orthostatic HR standing   \n",
       "211                           Heart Rate                             \n",
       "220181  Non Invasive Blood Pressure mean                      NBPm   \n",
       "220052      Arterial Blood Pressure mean                      ABPm   \n",
       "3494                   Lowest Heart Rate                             \n",
       "\n",
       "            linksto                     category unitname       score  \n",
       "itemid                                                                 \n",
       "227018  chartevents       Scores - APACHE IV (2)      bpm  110.000000  \n",
       "223765  chartevents          Routine Vital Signs      bpm  110.000000  \n",
       "1332    chartevents                                        110.000000  \n",
       "227581  chartevents                  Respiratory      bpm  110.000000  \n",
       "220047  chartevents                       Alarms      bpm  110.000000  \n",
       "224363  chartevents                 Hemodynamics      bpm  110.000000  \n",
       "226764  chartevents           Scores - APACHE II      bpm  110.000000  \n",
       "1341    chartevents                                        110.000000  \n",
       "220045  chartevents          Routine Vital Signs      bpm  110.000000  \n",
       "223775  chartevents                 Hemodynamics      bpm  110.000000  \n",
       "220046  chartevents                       Alarms      bpm  110.000000  \n",
       "224751  chartevents  Cardiovascular (Pacer Data)      bpm  110.000000  \n",
       "223764  chartevents          Routine Vital Signs      bpm  110.000000  \n",
       "1725    chartevents                                        110.000000  \n",
       "224422  chartevents                  Respiratory      bpm  110.000000  \n",
       "224647  chartevents          Routine Vital Signs      bpm  110.000000  \n",
       "211     chartevents                                        110.000000  \n",
       "220181  chartevents          Routine Vital Signs     mmHg  100.666667  \n",
       "220052  chartevents          Routine Vital Signs     mmHg  100.666667  \n",
       "3494    chartevents                                         92.666667  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'heart rate',\n",
    "        'beats',\n",
    "        'bpm',\n",
    "        'pulse'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "label = data_dict.labels.HEART_RATE\n",
    "keep_dict[label] = [211,220045,1341,1725,1332]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Respiratory Rate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>3603</th>\n",
       "      <td>Resp Rate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224688</th>\n",
       "      <td>Respiratory Rate (Set)</td>\n",
       "      <td>Respiratory Rate (Set)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226774</th>\n",
       "      <td>RRApacheIIValue</td>\n",
       "      <td>RRApacheIIValue</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>618</th>\n",
       "      <td>Respiratory Rate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224690</th>\n",
       "      <td>Respiratory Rate (Total)</td>\n",
       "      <td>Respiratory Rate (Total)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223875</th>\n",
       "      <td>Fspn High</td>\n",
       "      <td>Fspn High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224689</th>\n",
       "      <td>Respiratory Rate (spontaneous)</td>\n",
       "      <td>Respiratory Rate (spontaneous)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220210</th>\n",
       "      <td>Respiratory Rate</td>\n",
       "      <td>RR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224161</th>\n",
       "      <td>Resp Alarm - High</td>\n",
       "      <td>Resp Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227050</th>\n",
       "      <td>RR_ApacheIV</td>\n",
       "      <td>RR_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224162</th>\n",
       "      <td>Resp Alarm - Low</td>\n",
       "      <td>Resp Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>insp/min</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>619</th>\n",
       "      <td>Respiratory Rate Set</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>102.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225949</th>\n",
       "      <td>NIV Mask</td>\n",
       "      <td>NIV Mask</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223840</th>\n",
       "      <td>ETT Re-taped</td>\n",
       "      <td>ETT Re-taped</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223838</th>\n",
       "      <td>ETT Location</td>\n",
       "      <td>ETT Location</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223837</th>\n",
       "      <td>ETT Size (ID)</td>\n",
       "      <td>ETT Size (ID)</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224373</th>\n",
       "      <td>Sputum Amount</td>\n",
       "      <td>Sputum Amount</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223836</th>\n",
       "      <td>Airway Type</td>\n",
       "      <td>Airway Type</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226815</th>\n",
       "      <td>Airway problems</td>\n",
       "      <td>Airway problems</td>\n",
       "      <td></td>\n",
       "      <td>Respiratory</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223835</th>\n",
       "      <td>Inspired O2 Fraction</td>\n",
       "      <td>FiO2</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>None</td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 label                    abbreviation  \\\n",
       "itemid                                                                   \n",
       "3603                         Resp Rate                                   \n",
       "224688          Respiratory Rate (Set)          Respiratory Rate (Set)   \n",
       "226774                 RRApacheIIValue                 RRApacheIIValue   \n",
       "618                   Respiratory Rate                                   \n",
       "224690        Respiratory Rate (Total)        Respiratory Rate (Total)   \n",
       "223875                       Fspn High                       Fspn High   \n",
       "224689  Respiratory Rate (spontaneous)  Respiratory Rate (spontaneous)   \n",
       "220210                Respiratory Rate                              RR   \n",
       "224161               Resp Alarm - High               Resp Alarm - High   \n",
       "227050                     RR_ApacheIV                     RR_ApacheIV   \n",
       "224162                Resp Alarm - Low                Resp Alarm - Low   \n",
       "619               Respiratory Rate Set                                   \n",
       "225949                        NIV Mask                        NIV Mask   \n",
       "223840                    ETT Re-taped                    ETT Re-taped   \n",
       "223838                    ETT Location                    ETT Location   \n",
       "223837                   ETT Size (ID)                   ETT Size (ID)   \n",
       "224373                   Sputum Amount                   Sputum Amount   \n",
       "223836                     Airway Type                     Airway Type   \n",
       "226815                 Airway problems                 Airway problems   \n",
       "223835            Inspired O2 Fraction                            FiO2   \n",
       "\n",
       "            linksto                category  unitname       score  \n",
       "itemid                                                             \n",
       "3603    chartevents                                    110.000000  \n",
       "224688  chartevents             Respiratory  insp/min  110.000000  \n",
       "226774  chartevents      Scores - APACHE II  insp/min  110.000000  \n",
       "618     chartevents                                    110.000000  \n",
       "224690  chartevents             Respiratory  insp/min  110.000000  \n",
       "223875  chartevents             Respiratory  insp/min  110.000000  \n",
       "224689  chartevents             Respiratory  insp/min  110.000000  \n",
       "220210  chartevents             Respiratory  insp/min  110.000000  \n",
       "224161  chartevents                  Alarms  insp/min  110.000000  \n",
       "227050  chartevents  Scores - APACHE IV (2)  insp/min  110.000000  \n",
       "224162  chartevents                  Alarms  insp/min  110.000000  \n",
       "619     chartevents                                    102.666667  \n",
       "225949                          Respiratory             97.333333  \n",
       "223840                          Respiratory             97.333333  \n",
       "223838                          Respiratory             97.333333  \n",
       "223837                          Respiratory             97.333333  \n",
       "224373                          Respiratory             97.333333  \n",
       "223836                          Respiratory             97.333333  \n",
       "226815                          Respiratory             97.333333  \n",
       "223835  chartevents             Respiratory      None   97.333333  "
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'respiratory rate',\n",
    "        'resp rate',\n",
    "        'insp/min',\n",
    "        'breath/min'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "label = data_dict.labels.RESPIRATORY_RATE\n",
    "keep_dict[label] = [220210,3603,618,8113,615,219]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Blood Pressure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>224315</th>\n",
       "      <td>ABI Brachial BP L</td>\n",
       "      <td>ABI Brachial BP L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227537</th>\n",
       "      <td>ART Blood Pressure Alarm - High</td>\n",
       "      <td>ART BP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220181</th>\n",
       "      <td>Non Invasive Blood Pressure mean</td>\n",
       "      <td>NBPm</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220180</th>\n",
       "      <td>Non Invasive Blood Pressure diastolic</td>\n",
       "      <td>NBPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220179</th>\n",
       "      <td>Non Invasive Blood Pressure systolic</td>\n",
       "      <td>NBPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220074</th>\n",
       "      <td>Central Venous Pressure</td>\n",
       "      <td>CVP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220073</th>\n",
       "      <td>Central Venous Pressure  Alarm - Low</td>\n",
       "      <td>CVP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220072</th>\n",
       "      <td>Central Venous Pressure Alarm - High</td>\n",
       "      <td>CVP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220069</th>\n",
       "      <td>Left Artrial Pressure</td>\n",
       "      <td>LAP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220066</th>\n",
       "      <td>Pulmonary Artery Pressure Alarm - Low</td>\n",
       "      <td>PAP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220063</th>\n",
       "      <td>Pulmonary Artery Pressure Alarm - High</td>\n",
       "      <td>PAP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220061</th>\n",
       "      <td>Pulmonary Artery Pressure mean</td>\n",
       "      <td>PAPm</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220060</th>\n",
       "      <td>Pulmonary Artery Pressure diastolic</td>\n",
       "      <td>PAPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220059</th>\n",
       "      <td>Pulmonary Artery Pressure systolic</td>\n",
       "      <td>PAPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220058</th>\n",
       "      <td>Arterial Blood Pressure Alarm - High</td>\n",
       "      <td>ABP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227538</th>\n",
       "      <td>ART Blood Pressure Alarm - Low</td>\n",
       "      <td>ART BP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226096</th>\n",
       "      <td>Orthostatic BPd standing</td>\n",
       "      <td>Orthostatic BPd standing</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224152</th>\n",
       "      <td>Return Pressure</td>\n",
       "      <td>Return Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Dialysis</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227066</th>\n",
       "      <td>Cerebral Perfusion Pressure</td>\n",
       "      <td>CPP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226062</th>\n",
       "      <td>Venous CO2 Pressure</td>\n",
       "      <td>PCO2 (Venous)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         label              abbreviation  \\\n",
       "itemid                                                                     \n",
       "224315                       ABI Brachial BP L         ABI Brachial BP L   \n",
       "227537         ART Blood Pressure Alarm - High       ART BP Alarm - High   \n",
       "220181        Non Invasive Blood Pressure mean                      NBPm   \n",
       "220180   Non Invasive Blood Pressure diastolic                      NBPd   \n",
       "220179    Non Invasive Blood Pressure systolic                      NBPs   \n",
       "220074                 Central Venous Pressure                       CVP   \n",
       "220073    Central Venous Pressure  Alarm - Low           CVP Alarm - Low   \n",
       "220072    Central Venous Pressure Alarm - High          CVP Alarm - High   \n",
       "220069                   Left Artrial Pressure                       LAP   \n",
       "220066   Pulmonary Artery Pressure Alarm - Low           PAP Alarm - Low   \n",
       "220063  Pulmonary Artery Pressure Alarm - High          PAP Alarm - High   \n",
       "220061          Pulmonary Artery Pressure mean                      PAPm   \n",
       "220060     Pulmonary Artery Pressure diastolic                      PAPd   \n",
       "220059      Pulmonary Artery Pressure systolic                      PAPs   \n",
       "220058    Arterial Blood Pressure Alarm - High          ABP Alarm - High   \n",
       "227538          ART Blood Pressure Alarm - Low        ART BP Alarm - Low   \n",
       "226096                Orthostatic BPd standing  Orthostatic BPd standing   \n",
       "224152                         Return Pressure           Return Pressure   \n",
       "227066             Cerebral Perfusion Pressure                       CPP   \n",
       "226062                     Venous CO2 Pressure             PCO2 (Venous)   \n",
       "\n",
       "            linksto             category unitname  score  \n",
       "itemid                                                    \n",
       "224315  chartevents                 IABP     mmHg  110.0  \n",
       "227537  chartevents               Alarms     mmHg  110.0  \n",
       "220181  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "220180  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "220179  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "220074  chartevents         Hemodynamics     mmHg  110.0  \n",
       "220073  chartevents               Alarms     mmHg  110.0  \n",
       "220072  chartevents               Alarms     mmHg  110.0  \n",
       "220069  chartevents         Hemodynamics     mmHg  110.0  \n",
       "220066  chartevents               Alarms     mmHg  110.0  \n",
       "220063  chartevents               Alarms     mmHg  110.0  \n",
       "220061  chartevents         Hemodynamics     mmHg  110.0  \n",
       "220060  chartevents         Hemodynamics     mmHg  110.0  \n",
       "220059  chartevents         Hemodynamics     mmHg  110.0  \n",
       "220058  chartevents               Alarms     mmHg  110.0  \n",
       "227538  chartevents               Alarms     mmHg  110.0  \n",
       "226096  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "224152  chartevents             Dialysis     mmHg  110.0  \n",
       "227066  chartevents         Hemodynamics     mmHg  110.0  \n",
       "226062  chartevents                 Labs     mmHg  110.0  "
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'blood pressure',\n",
    "        'systolic',\n",
    "        'diastolic',\n",
    "        'mmHg'\n",
    "    ])\n",
    "out_df.iloc[0:20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>226063</th>\n",
       "      <td>Venous O2 Pressure</td>\n",
       "      <td>PO2 (Venous)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225312</th>\n",
       "      <td>ART BP mean</td>\n",
       "      <td>ART BP mean</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225310</th>\n",
       "      <td>ART BP Diastolic</td>\n",
       "      <td>ART BP Diastolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225309</th>\n",
       "      <td>ART BP Systolic</td>\n",
       "      <td>ART BP Systolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223751</th>\n",
       "      <td>Non-Invasive Blood Pressure Alarm - High</td>\n",
       "      <td>NBP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227516</th>\n",
       "      <td>PO2 (Mixed Venous)</td>\n",
       "      <td>PO2 (Mixed Venous)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224166</th>\n",
       "      <td>Doppler BP</td>\n",
       "      <td>Doppler BP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224167</th>\n",
       "      <td>Manual Blood Pressure Systolic Left</td>\n",
       "      <td>Manual BPs L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226737</th>\n",
       "      <td>AaDO2ApacheIIValue</td>\n",
       "      <td>AaDO2ApacheIIValue</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228148</th>\n",
       "      <td>ABI Ankle BP R (Impella)</td>\n",
       "      <td>ABI Ankle BP R (Impella)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220224</th>\n",
       "      <td>Arterial O2 pressure</td>\n",
       "      <td>PO2 (Arterial)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220235</th>\n",
       "      <td>Arterial CO2 Pressure</td>\n",
       "      <td>PCO2 (Arterial)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223752</th>\n",
       "      <td>Non-Invasive Blood Pressure Alarm - Low</td>\n",
       "      <td>NBP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223763</th>\n",
       "      <td>Bladder Pressure</td>\n",
       "      <td>Bladder Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224150</th>\n",
       "      <td>Filter Pressure</td>\n",
       "      <td>Filter Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Dialysis</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224149</th>\n",
       "      <td>Access Pressure</td>\n",
       "      <td>Access Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Dialysis</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227991</th>\n",
       "      <td>Intra Cranial Pressure #2 Alarm - Low</td>\n",
       "      <td>IC2 Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226094</th>\n",
       "      <td>Orthostatic BPd sitting</td>\n",
       "      <td>Orthostatic BPd sitting</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226092</th>\n",
       "      <td>Orthostatic BPd lying</td>\n",
       "      <td>Orthostatic BPd lying</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227990</th>\n",
       "      <td>Intra Cranial Pressure #2 Alarm - High</td>\n",
       "      <td>IC2 Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           label              abbreviation  \\\n",
       "itemid                                                                       \n",
       "226063                        Venous O2 Pressure              PO2 (Venous)   \n",
       "225312                               ART BP mean               ART BP mean   \n",
       "225310                          ART BP Diastolic          ART BP Diastolic   \n",
       "225309                           ART BP Systolic           ART BP Systolic   \n",
       "223751  Non-Invasive Blood Pressure Alarm - High          NBP Alarm - High   \n",
       "227516                        PO2 (Mixed Venous)        PO2 (Mixed Venous)   \n",
       "224166                                Doppler BP                Doppler BP   \n",
       "224167       Manual Blood Pressure Systolic Left              Manual BPs L   \n",
       "226737                        AaDO2ApacheIIValue        AaDO2ApacheIIValue   \n",
       "228148                  ABI Ankle BP R (Impella)  ABI Ankle BP R (Impella)   \n",
       "220224                      Arterial O2 pressure            PO2 (Arterial)   \n",
       "220235                     Arterial CO2 Pressure           PCO2 (Arterial)   \n",
       "223752   Non-Invasive Blood Pressure Alarm - Low           NBP Alarm - Low   \n",
       "223763                          Bladder Pressure          Bladder Pressure   \n",
       "224150                           Filter Pressure           Filter Pressure   \n",
       "224149                           Access Pressure           Access Pressure   \n",
       "227991     Intra Cranial Pressure #2 Alarm - Low           IC2 Alarm - Low   \n",
       "226094                   Orthostatic BPd sitting   Orthostatic BPd sitting   \n",
       "226092                     Orthostatic BPd lying     Orthostatic BPd lying   \n",
       "227990    Intra Cranial Pressure #2 Alarm - High          IC2 Alarm - High   \n",
       "\n",
       "            linksto             category unitname  score  \n",
       "itemid                                                    \n",
       "226063  chartevents                 Labs     mmHg  110.0  \n",
       "225312  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "225310  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "225309  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "223751  chartevents               Alarms     mmHg  110.0  \n",
       "227516  chartevents                 Labs     mmHg  110.0  \n",
       "224166  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "224167  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "226737  chartevents   Scores - APACHE II     mmHg  110.0  \n",
       "228148  chartevents              Impella     mmHg  110.0  \n",
       "220224  chartevents                 Labs     mmHg  110.0  \n",
       "220235  chartevents                 Labs     mmHg  110.0  \n",
       "223752  chartevents               Alarms     mmHg  110.0  \n",
       "223763  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "224150  chartevents             Dialysis     mmHg  110.0  \n",
       "224149  chartevents             Dialysis     mmHg  110.0  \n",
       "227991  chartevents               Alarms     mmHg  110.0  \n",
       "226094  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "226092  chartevents  Routine Vital Signs     mmHg  110.0  \n",
       "227990  chartevents               Alarms     mmHg  110.0  "
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] = [220179]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] = [220180]\n",
    "out_df.iloc[20:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>227989</th>\n",
       "      <td>Intra Cranial Pressure #2</td>\n",
       "      <td>IC2</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220056</th>\n",
       "      <td>Arterial Blood Pressure Alarm - Low</td>\n",
       "      <td>ABP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220052</th>\n",
       "      <td>Arterial Blood Pressure mean</td>\n",
       "      <td>ABPm</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220051</th>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>ABPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220050</th>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>ABPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228145</th>\n",
       "      <td>ABI Ankle BP L</td>\n",
       "      <td>ABI Ankle BP L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228146</th>\n",
       "      <td>ABI Brachial BP R</td>\n",
       "      <td>ABI Brachial BP R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227039</th>\n",
       "      <td>PO2_ApacheIV</td>\n",
       "      <td>PO2_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220765</th>\n",
       "      <td>Intra Cranial Pressure</td>\n",
       "      <td>ICP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227023</th>\n",
       "      <td>MAP_ApacheIV</td>\n",
       "      <td>MAP_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224309</th>\n",
       "      <td>Assisted Systole</td>\n",
       "      <td>SYS - Assisted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224310</th>\n",
       "      <td>Augmented Diastole</td>\n",
       "      <td>AUG</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224311</th>\n",
       "      <td>BAEDP</td>\n",
       "      <td>DIA - Assisted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224314</th>\n",
       "      <td>ABI Brachial BP R (Impella)</td>\n",
       "      <td>ABI Brachial BP R (Impella)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224317</th>\n",
       "      <td>ABI Ankle BP R</td>\n",
       "      <td>ABI Ankle BP R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224318</th>\n",
       "      <td>ABI Ankle BP L (Impella)</td>\n",
       "      <td>ABI Ankle BP L (Impella)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224322</th>\n",
       "      <td>IABP Mean</td>\n",
       "      <td>MAP - Assisted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223771</th>\n",
       "      <td>PCWP</td>\n",
       "      <td>PCWP</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223768</th>\n",
       "      <td>Intra Cranial Pressure Alarm - Low</td>\n",
       "      <td>ICP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223767</th>\n",
       "      <td>Intra Cranial Pressure Alarm - High</td>\n",
       "      <td>ICP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                      label                 abbreviation  \\\n",
       "itemid                                                                     \n",
       "227989            Intra Cranial Pressure #2                          IC2   \n",
       "220056  Arterial Blood Pressure Alarm - Low              ABP Alarm - Low   \n",
       "220052         Arterial Blood Pressure mean                         ABPm   \n",
       "220051    Arterial Blood Pressure diastolic                         ABPd   \n",
       "220050     Arterial Blood Pressure systolic                         ABPs   \n",
       "228145                       ABI Ankle BP L              ABI Ankle BP L    \n",
       "228146                    ABI Brachial BP R           ABI Brachial BP R    \n",
       "227039                         PO2_ApacheIV                 PO2_ApacheIV   \n",
       "220765               Intra Cranial Pressure                          ICP   \n",
       "227023                         MAP_ApacheIV                 MAP_ApacheIV   \n",
       "224309                     Assisted Systole               SYS - Assisted   \n",
       "224310                   Augmented Diastole                          AUG   \n",
       "224311                                BAEDP               DIA - Assisted   \n",
       "224314          ABI Brachial BP R (Impella)  ABI Brachial BP R (Impella)   \n",
       "224317                       ABI Ankle BP R               ABI Ankle BP R   \n",
       "224318             ABI Ankle BP L (Impella)     ABI Ankle BP L (Impella)   \n",
       "224322                            IABP Mean               MAP - Assisted   \n",
       "223771                                 PCWP                         PCWP   \n",
       "223768   Intra Cranial Pressure Alarm - Low              ICP Alarm - Low   \n",
       "223767  Intra Cranial Pressure Alarm - High             ICP Alarm - High   \n",
       "\n",
       "            linksto                category unitname  score  \n",
       "itemid                                                       \n",
       "227989  chartevents            Hemodynamics     mmHg  110.0  \n",
       "220056  chartevents                  Alarms     mmHg  110.0  \n",
       "220052  chartevents     Routine Vital Signs     mmHg  110.0  \n",
       "220051  chartevents     Routine Vital Signs     mmHg  110.0  \n",
       "220050  chartevents     Routine Vital Signs     mmHg  110.0  \n",
       "228145  chartevents                    IABP     mmHg  110.0  \n",
       "228146  chartevents                    IABP     mmHg  110.0  \n",
       "227039  chartevents  Scores - APACHE IV (2)     mmHg  110.0  \n",
       "220765  chartevents            Hemodynamics     mmHg  110.0  \n",
       "227023  chartevents  Scores - APACHE IV (2)     mmHg  110.0  \n",
       "224309  chartevents                    IABP     mmHg  110.0  \n",
       "224310  chartevents                    IABP     mmHg  110.0  \n",
       "224311  chartevents                    IABP     mmHg  110.0  \n",
       "224314  chartevents                 Impella     mmHg  110.0  \n",
       "224317  chartevents                    IABP     mmHg  110.0  \n",
       "224318  chartevents                 Impella     mmHg  110.0  \n",
       "224322  chartevents                    IABP     mmHg  110.0  \n",
       "223771  chartevents            Hemodynamics     mmHg  110.0  \n",
       "223768  chartevents                  Alarms     mmHg  110.0  \n",
       "223767  chartevents                  Alarms     mmHg  110.0  "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [225309,224167]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [225310,224643]\n",
    "out_df.iloc[40:60]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>223766</th>\n",
       "      <td>Orthostatic BPs standing</td>\n",
       "      <td>Orthostatic BPs standing</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228149</th>\n",
       "      <td>ABI Brachial BP L (Impella)</td>\n",
       "      <td>ABI Brachial BP L (Impella)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224151</th>\n",
       "      <td>Effluent Pressure</td>\n",
       "      <td>Effluent Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Dialysis</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224643</th>\n",
       "      <td>Manual Blood Pressure Diastolic Left</td>\n",
       "      <td>Manual BPd L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224654</th>\n",
       "      <td>PAEDP</td>\n",
       "      <td>DIA - Unassisted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226855</th>\n",
       "      <td>PCWP (mean) (PA Line)</td>\n",
       "      <td>PCWP (mean) (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226857</th>\n",
       "      <td>PA mean pressure (PA Line)</td>\n",
       "      <td>PA mean pressure (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224646</th>\n",
       "      <td>Orthostatic BPs sitting</td>\n",
       "      <td>Orthostatic BPs sitting</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227716</th>\n",
       "      <td>Cerebral Perfusion Pressure Alarm - High</td>\n",
       "      <td>CPP Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227717</th>\n",
       "      <td>Cerebral Perfusion Pressure Alarm - Low</td>\n",
       "      <td>CPP Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224645</th>\n",
       "      <td>Orthostatic BPs lying</td>\n",
       "      <td>Orthostatic BPs lying</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227242</th>\n",
       "      <td>Manual Blood Pressure Diastolic Right</td>\n",
       "      <td>Manual BPd R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226766</th>\n",
       "      <td>MapApacheIIValue</td>\n",
       "      <td>MapApacheIIValue</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227243</th>\n",
       "      <td>Manual Blood Pressure Systolic Right</td>\n",
       "      <td>Manual BPs R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226853</th>\n",
       "      <td>PA diastolic pressure(PA Line)</td>\n",
       "      <td>PA diastolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226849</th>\n",
       "      <td>RA (mean) pressure (PA Line)</td>\n",
       "      <td>RA (mean) pressure (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226850</th>\n",
       "      <td>RV systolic pressure(PA Line)</td>\n",
       "      <td>RV systolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226851</th>\n",
       "      <td>RV diastolic pressure(PA Line)</td>\n",
       "      <td>RV diastolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224659</th>\n",
       "      <td>Vacuum Assist</td>\n",
       "      <td>Vacuum Assist</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226854</th>\n",
       "      <td>PCWP (v wave) (PA Line)</td>\n",
       "      <td>PCWP (v wave) (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           label  \\\n",
       "itemid                                             \n",
       "223766                  Orthostatic BPs standing   \n",
       "228149               ABI Brachial BP L (Impella)   \n",
       "224151                         Effluent Pressure   \n",
       "224643      Manual Blood Pressure Diastolic Left   \n",
       "224654                                     PAEDP   \n",
       "226855                     PCWP (mean) (PA Line)   \n",
       "226857                PA mean pressure (PA Line)   \n",
       "224646                   Orthostatic BPs sitting   \n",
       "227716  Cerebral Perfusion Pressure Alarm - High   \n",
       "227717   Cerebral Perfusion Pressure Alarm - Low   \n",
       "224645                     Orthostatic BPs lying   \n",
       "227242     Manual Blood Pressure Diastolic Right   \n",
       "226766                          MapApacheIIValue   \n",
       "227243      Manual Blood Pressure Systolic Right   \n",
       "226853            PA diastolic pressure(PA Line)   \n",
       "226849              RA (mean) pressure (PA Line)   \n",
       "226850             RV systolic pressure(PA Line)   \n",
       "226851            RV diastolic pressure(PA Line)   \n",
       "224659                             Vacuum Assist   \n",
       "226854                   PCWP (v wave) (PA Line)   \n",
       "\n",
       "                          abbreviation      linksto             category  \\\n",
       "itemid                                                                     \n",
       "223766        Orthostatic BPs standing  chartevents  Routine Vital Signs   \n",
       "228149     ABI Brachial BP L (Impella)  chartevents              Impella   \n",
       "224151               Effluent Pressure  chartevents             Dialysis   \n",
       "224643                    Manual BPd L  chartevents  Routine Vital Signs   \n",
       "224654                DIA - Unassisted  chartevents                 IABP   \n",
       "226855           PCWP (mean) (PA Line)  chartevents    PA Line Insertion   \n",
       "226857      PA mean pressure (PA Line)  chartevents    PA Line Insertion   \n",
       "224646         Orthostatic BPs sitting  chartevents  Routine Vital Signs   \n",
       "227716                CPP Alarm - High  chartevents               Alarms   \n",
       "227717                 CPP Alarm - Low  chartevents               Alarms   \n",
       "224645           Orthostatic BPs lying  chartevents  Routine Vital Signs   \n",
       "227242                    Manual BPd R  chartevents  Routine Vital Signs   \n",
       "226766                MapApacheIIValue  chartevents   Scores - APACHE II   \n",
       "227243                    Manual BPs R  chartevents  Routine Vital Signs   \n",
       "226853  PA diastolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "226849    RA (mean) pressure (PA Line)  chartevents    PA Line Insertion   \n",
       "226850   RV systolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "226851  RV diastolic pressure(PA Line)  chartevents    PA Line Insertion   \n",
       "224659                   Vacuum Assist  chartevents         Hemodynamics   \n",
       "226854         PCWP (v wave) (PA Line)  chartevents    PA Line Insertion   \n",
       "\n",
       "       unitname  score  \n",
       "itemid                  \n",
       "223766     mmHg  110.0  \n",
       "228149     mmHg  110.0  \n",
       "224151     mmHg  110.0  \n",
       "224643     mmHg  110.0  \n",
       "224654     mmHg  110.0  \n",
       "226855     mmHg  110.0  \n",
       "226857     mmHg  110.0  \n",
       "224646     mmHg  110.0  \n",
       "227716     mmHg  110.0  \n",
       "227717     mmHg  110.0  \n",
       "224645     mmHg  110.0  \n",
       "227242     mmHg  110.0  \n",
       "226766     mmHg  110.0  \n",
       "227243     mmHg  110.0  \n",
       "226853     mmHg  110.0  \n",
       "226849     mmHg  110.0  \n",
       "226850     mmHg  110.0  \n",
       "226851     mmHg  110.0  \n",
       "224659     mmHg  110.0  \n",
       "226854     mmHg  110.0  "
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [220050]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [220051]\n",
    "out_df.iloc[60:80]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>228158</th>\n",
       "      <td>Purge Pressure</td>\n",
       "      <td>Purge Pressure</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226852</th>\n",
       "      <td>PA systolic pressure(PA Line)</td>\n",
       "      <td>PA systolic pressure(PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224652</th>\n",
       "      <td>Unassisted Systole</td>\n",
       "      <td>SYS - Unassisted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>IABP</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7643</th>\n",
       "      <td>RVSYSTOLIC</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>102.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8441</th>\n",
       "      <td>NBP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8364</th>\n",
       "      <td>ABP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8448</th>\n",
       "      <td>PAP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>455</th>\n",
       "      <td>NBP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>ABP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>PAP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8508</th>\n",
       "      <td>BP UAC [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>89.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8505</th>\n",
       "      <td>BP PAL [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>89.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3319</th>\n",
       "      <td>BP PAL [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3325</th>\n",
       "      <td>BP UAC [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8502</th>\n",
       "      <td>BP Cuff [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>87.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3313</th>\n",
       "      <td>BP Cuff [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>86.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>153</th>\n",
       "      <td>Diastolic Unloading</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>86.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227539</th>\n",
       "      <td>ART Blood Pressure Alarm Source</td>\n",
       "      <td>ART BP Alarm Source</td>\n",
       "      <td></td>\n",
       "      <td>Alarms</td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8440</th>\n",
       "      <td>Manual BP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>666</th>\n",
       "      <td>Systolic Unloading</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  label                   abbreviation  \\\n",
       "itemid                                                                   \n",
       "228158                   Purge Pressure                 Purge Pressure   \n",
       "226852    PA systolic pressure(PA Line)  PA systolic pressure(PA Line)   \n",
       "224652               Unassisted Systole               SYS - Unassisted   \n",
       "7643                         RVSYSTOLIC                                  \n",
       "8441                    NBP [Diastolic]                                  \n",
       "8364                    ABP [Diastolic]                                  \n",
       "8448                    PAP [Diastolic]                                  \n",
       "455                      NBP [Systolic]                                  \n",
       "6                        ABP [Systolic]                                  \n",
       "492                      PAP [Systolic]                                  \n",
       "8508                 BP UAC [Diastolic]                                  \n",
       "8505                 BP PAL [Diastolic]                                  \n",
       "3319                  BP PAL [Systolic]                                  \n",
       "3325                  BP UAC [Systolic]                                  \n",
       "8502                BP Cuff [Diastolic]                                  \n",
       "3313                 BP Cuff [Systolic]                                  \n",
       "153                 Diastolic Unloading                                  \n",
       "227539  ART Blood Pressure Alarm Source            ART BP Alarm Source   \n",
       "8440              Manual BP [Diastolic]                                  \n",
       "666                  Systolic Unloading                                  \n",
       "\n",
       "            linksto           category unitname       score  \n",
       "itemid                                                       \n",
       "228158  chartevents            Impella     mmHg  110.000000  \n",
       "226852  chartevents  PA Line Insertion     mmHg  110.000000  \n",
       "224652  chartevents               IABP     mmHg  110.000000  \n",
       "7643    chartevents                              102.666667  \n",
       "8441    chartevents                               95.666667  \n",
       "8364    chartevents                               95.666667  \n",
       "8448    chartevents                               95.666667  \n",
       "455     chartevents                               94.333333  \n",
       "6       chartevents                               94.333333  \n",
       "492     chartevents                               94.333333  \n",
       "8508    chartevents                               89.666667  \n",
       "8505    chartevents                               89.666667  \n",
       "3319    chartevents                               88.000000  \n",
       "3325    chartevents                               88.000000  \n",
       "8502    chartevents                               87.666667  \n",
       "3313    chartevents                               86.333333  \n",
       "153     chartevents                               86.000000  \n",
       "227539                          Alarms            84.666667  \n",
       "8440    chartevents                               84.666667  \n",
       "666     chartevents                               84.666667  "
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [227243]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [227242]\n",
    "out_df.iloc[80:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6417</th>\n",
       "      <td>low pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7133</th>\n",
       "      <td>abd pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6944</th>\n",
       "      <td>LOW PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2027</th>\n",
       "      <td>Low pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6107</th>\n",
       "      <td>Low Pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442</th>\n",
       "      <td>Manual BP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>83.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8503</th>\n",
       "      <td>BP Left Arm [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8504</th>\n",
       "      <td>BP Left Leg [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8368</th>\n",
       "      <td>Arterial BP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8506</th>\n",
       "      <td>BP Right Arm [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8507</th>\n",
       "      <td>BP Right Leg [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>Arterial BP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3315</th>\n",
       "      <td>BP Left Arm [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3317</th>\n",
       "      <td>BP Left Leg [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3321</th>\n",
       "      <td>BP Right Arm [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3323</th>\n",
       "      <td>BP Right Leg [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8555</th>\n",
       "      <td>Arterial BP #2 [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>Access mmHg</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44560</th>\n",
       "      <td>blood</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70015</th>\n",
       "      <td>BLOOD</td>\n",
       "      <td></td>\n",
       "      <td>microbiologyevents</td>\n",
       "      <td>SPECIMEN</td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             label abbreviation             linksto  category  \\\n",
       "itemid                                                                          \n",
       "6417                  low pressure                      chartevents             \n",
       "7133                  abd pressure                      chartevents             \n",
       "6944                  LOW PRESSURE                      chartevents             \n",
       "2027                  Low pressure                      chartevents             \n",
       "6107                  Low Pressure                      chartevents             \n",
       "442           Manual BP [Systolic]                      chartevents             \n",
       "8503       BP Left Arm [Diastolic]                      chartevents             \n",
       "8504       BP Left Leg [Diastolic]                      chartevents             \n",
       "8368       Arterial BP [Diastolic]                      chartevents             \n",
       "8506      BP Right Arm [Diastolic]                      chartevents             \n",
       "8507      BP Right Leg [Diastolic]                      chartevents             \n",
       "51          Arterial BP [Systolic]                      chartevents             \n",
       "3315        BP Left Arm [Systolic]                      chartevents             \n",
       "3317        BP Left Leg [Systolic]                      chartevents             \n",
       "3321       BP Right Arm [Systolic]                      chartevents             \n",
       "3323       BP Right Leg [Systolic]                      chartevents             \n",
       "8555    Arterial BP #2 [Diastolic]                      chartevents             \n",
       "29                     Access mmHg                      chartevents             \n",
       "44560                        blood                   inputevents_cv             \n",
       "70015                        BLOOD               microbiologyevents  SPECIMEN   \n",
       "\n",
       "       unitname      score  \n",
       "itemid                      \n",
       "6417             84.333333  \n",
       "7133             84.333333  \n",
       "6944             84.333333  \n",
       "2027             84.333333  \n",
       "6107             84.333333  \n",
       "442              83.000000  \n",
       "8503             82.000000  \n",
       "8504             82.000000  \n",
       "8368             82.000000  \n",
       "8506             81.000000  \n",
       "8507             81.000000  \n",
       "51               80.000000  \n",
       "3315             80.000000  \n",
       "3317             80.000000  \n",
       "3321             79.000000  \n",
       "3323             79.000000  \n",
       "8555             79.000000  \n",
       "29               78.666667  \n",
       "44560            78.666667  \n",
       "70015            78.666667  "
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [455,6]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [8441,8364]\n",
    "out_df.iloc[100:120]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>51466</th>\n",
       "      <td>BLOOD</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6127</th>\n",
       "      <td>LO Press</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46780</th>\n",
       "      <td>Bladdar pressure</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1404</th>\n",
       "      <td>Bladder pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>993</th>\n",
       "      <td>bladder pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6701</th>\n",
       "      <td>Arterial BP #2 [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1411</th>\n",
       "      <td>Bladder Pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>996</th>\n",
       "      <td>BLADDER PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44862</th>\n",
       "      <td>BLADDER PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>77.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>770</th>\n",
       "      <td>AST</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Enzymes</td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220587</th>\n",
       "      <td>AST</td>\n",
       "      <td>AST</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>None</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2347</th>\n",
       "      <td>BLADDER PRESSURE.</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6631</th>\n",
       "      <td>pressure low</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>75.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3108</th>\n",
       "      <td>PA PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>75.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7504</th>\n",
       "      <td>Driv pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2779</th>\n",
       "      <td>bladder pressures</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43763</th>\n",
       "      <td>Bladder presure</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>74.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1509</th>\n",
       "      <td>bld pres</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>73.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6540</th>\n",
       "      <td>IntraABd pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>73.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8444</th>\n",
       "      <td>Orthostat BP sitting [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>73.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   label abbreviation         linksto  \\\n",
       "itemid                                                                  \n",
       "51466                              BLOOD          NaN       labevents   \n",
       "6127                            LO Press                  chartevents   \n",
       "46780                   Bladdar pressure               inputevents_cv   \n",
       "1404                    Bladder pressure                  chartevents   \n",
       "993                     bladder pressure                  chartevents   \n",
       "6701           Arterial BP #2 [Systolic]                  chartevents   \n",
       "1411                    Bladder Pressure                  chartevents   \n",
       "996                     BLADDER PRESSURE                  chartevents   \n",
       "44862                   BLADDER PRESSURE               inputevents_cv   \n",
       "770                                  AST                  chartevents   \n",
       "220587                               AST          AST     chartevents   \n",
       "2347                   BLADDER PRESSURE.                  chartevents   \n",
       "6631                        pressure low                  chartevents   \n",
       "3108                         PA PRESSURE                  chartevents   \n",
       "7504                       Driv pressure                  chartevents   \n",
       "2779                   bladder pressures                  chartevents   \n",
       "43763                    Bladder presure               inputevents_cv   \n",
       "1509                            bld pres                  chartevents   \n",
       "6540                   IntraABd pressure                  chartevents   \n",
       "8444    Orthostat BP sitting [Diastolic]                  chartevents   \n",
       "\n",
       "                category unitname      score  \n",
       "itemid                                        \n",
       "51466         HEMATOLOGY      NaN  78.666667  \n",
       "6127                               78.000000  \n",
       "46780                              77.000000  \n",
       "1404                               77.000000  \n",
       "993                                77.000000  \n",
       "6701                               77.000000  \n",
       "1411                               77.000000  \n",
       "996                                77.000000  \n",
       "44862   Free Form Intake           77.000000  \n",
       "770              Enzymes           76.666667  \n",
       "220587              Labs     None  76.666667  \n",
       "2347                               76.000000  \n",
       "6631                               75.666667  \n",
       "3108                               75.333333  \n",
       "7504                               75.000000  \n",
       "2779                               75.000000  \n",
       "43763                              74.333333  \n",
       "1509                               73.666667  \n",
       "6540                               73.666667  \n",
       "8444                               73.333333  "
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [3313]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [8502,8440]\n",
    "out_df.iloc[120:140]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8446</th>\n",
       "      <td>Orthostatic BP lying [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>73.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8445</th>\n",
       "      <td>OrthostatBP standing [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>73.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6302</th>\n",
       "      <td>low press</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>72.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2562</th>\n",
       "      <td>INTRA ABD PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>72.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45682</th>\n",
       "      <td>bladder pressure in</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>72.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7124</th>\n",
       "      <td>Low pressure Alarm</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>72.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43744</th>\n",
       "      <td>NS bladder pressure</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>72.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228151</th>\n",
       "      <td>Aortic Pressure Signal - Diastolic</td>\n",
       "      <td>Aortic Pressure Signal - Diastolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>None</td>\n",
       "      <td>72.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42735</th>\n",
       "      <td>Intra-abd. Pressure</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>71.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>484</th>\n",
       "      <td>Orthostatic BP lying [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>482</th>\n",
       "      <td>OrthostatBP standing [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>480</th>\n",
       "      <td>Orthostat BP sitting [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44344</th>\n",
       "      <td>BLADDER PRESSURE FLD</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>71.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44187</th>\n",
       "      <td>Blood emesis</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>71.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3238</th>\n",
       "      <td>CSF PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>71.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2965</th>\n",
       "      <td>INTRAABDOM. PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>70.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228152</th>\n",
       "      <td>Aortic Pressure Signal - Systolic</td>\n",
       "      <td>Aortic Pressure Signal - Systolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>None</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50934</th>\n",
       "      <td>H</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7574</th>\n",
       "      <td>lumbar pressure</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>69.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2704</th>\n",
       "      <td>ABDOMINAL PRESSURE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>69.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     label  \\\n",
       "itemid                                       \n",
       "8446      Orthostatic BP lying [Diastolic]   \n",
       "8445      OrthostatBP standing [Diastolic]   \n",
       "6302                             low press   \n",
       "2562                    INTRA ABD PRESSURE   \n",
       "45682                  bladder pressure in   \n",
       "7124                    Low pressure Alarm   \n",
       "43744                  NS bladder pressure   \n",
       "228151  Aortic Pressure Signal - Diastolic   \n",
       "42735                  Intra-abd. Pressure   \n",
       "484        Orthostatic BP lying [Systolic]   \n",
       "482        OrthostatBP standing [Systolic]   \n",
       "480        Orthostat BP sitting [Systolic]   \n",
       "44344                 BLADDER PRESSURE FLD   \n",
       "44187                         Blood emesis   \n",
       "3238                          CSF PRESSURE   \n",
       "2965                  INTRAABDOM. PRESSURE   \n",
       "228152   Aortic Pressure Signal - Systolic   \n",
       "50934                                    H   \n",
       "7574                       lumbar pressure   \n",
       "2704                    ABDOMINAL PRESSURE   \n",
       "\n",
       "                              abbreviation         linksto          category  \\\n",
       "itemid                                                                         \n",
       "8446                                           chartevents                     \n",
       "8445                                           chartevents                     \n",
       "6302                                           chartevents                     \n",
       "2562                                           chartevents                     \n",
       "45682                                       inputevents_cv  Free Form Intake   \n",
       "7124                                           chartevents                     \n",
       "43744                                       inputevents_cv  Free Form Intake   \n",
       "228151  Aortic Pressure Signal - Diastolic     chartevents           Impella   \n",
       "42735                                       inputevents_cv  Free Form Intake   \n",
       "484                                            chartevents                     \n",
       "482                                            chartevents                     \n",
       "480                                            chartevents                     \n",
       "44344                                       inputevents_cv  Free Form Intake   \n",
       "44187                                         outputevents  Free Form Intake   \n",
       "3238                                           chartevents                     \n",
       "2965                                           chartevents                     \n",
       "228152   Aortic Pressure Signal - Systolic     chartevents           Impella   \n",
       "50934                                  NaN       labevents         CHEMISTRY   \n",
       "7574                                           chartevents                     \n",
       "2704                                           chartevents                     \n",
       "\n",
       "       unitname      score  \n",
       "itemid                      \n",
       "8446             73.333333  \n",
       "8445             73.333333  \n",
       "6302             72.666667  \n",
       "2562             72.333333  \n",
       "45682            72.333333  \n",
       "7124             72.333333  \n",
       "43744            72.333333  \n",
       "228151     None  72.000000  \n",
       "42735            71.666667  \n",
       "484              71.333333  \n",
       "482              71.333333  \n",
       "480              71.333333  \n",
       "44344            71.000000  \n",
       "44187            71.000000  \n",
       "3238             71.000000  \n",
       "2965             70.333333  \n",
       "228152     None  70.000000  \n",
       "50934       NaN  70.000000  \n",
       "7574             69.666667  \n",
       "2704             69.666667  "
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [442,3315,51,3317]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [8368,8503,8504,8507,8506]\n",
    "out_df.iloc[140:160]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [3321,3323]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC] += [8555]\n",
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_MEAN] = [220181,225312,220052]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC] += [6701]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>220179</th>\n",
       "      <td>Non Invasive Blood Pressure systolic</td>\n",
       "      <td>NBPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225309</th>\n",
       "      <td>ART BP Systolic</td>\n",
       "      <td>ART BP Systolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224167</th>\n",
       "      <td>Manual Blood Pressure Systolic Left</td>\n",
       "      <td>Manual BPs L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220050</th>\n",
       "      <td>Arterial Blood Pressure systolic</td>\n",
       "      <td>ABPs</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227243</th>\n",
       "      <td>Manual Blood Pressure Systolic Right</td>\n",
       "      <td>Manual BPs R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>455</th>\n",
       "      <td>NBP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>ABP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3313</th>\n",
       "      <td>BP Cuff [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>86.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>442</th>\n",
       "      <td>Manual BP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>83.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3315</th>\n",
       "      <td>BP Left Arm [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>Arterial BP [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3317</th>\n",
       "      <td>BP Left Leg [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3321</th>\n",
       "      <td>BP Right Arm [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3323</th>\n",
       "      <td>BP Right Leg [Systolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       label     abbreviation      linksto  \\\n",
       "itemid                                                                       \n",
       "220179  Non Invasive Blood Pressure systolic             NBPs  chartevents   \n",
       "225309                       ART BP Systolic  ART BP Systolic  chartevents   \n",
       "224167   Manual Blood Pressure Systolic Left     Manual BPs L  chartevents   \n",
       "220050      Arterial Blood Pressure systolic             ABPs  chartevents   \n",
       "227243  Manual Blood Pressure Systolic Right     Manual BPs R  chartevents   \n",
       "455                           NBP [Systolic]                   chartevents   \n",
       "6                             ABP [Systolic]                   chartevents   \n",
       "3313                      BP Cuff [Systolic]                   chartevents   \n",
       "442                     Manual BP [Systolic]                   chartevents   \n",
       "3315                  BP Left Arm [Systolic]                   chartevents   \n",
       "51                    Arterial BP [Systolic]                   chartevents   \n",
       "3317                  BP Left Leg [Systolic]                   chartevents   \n",
       "3321                 BP Right Arm [Systolic]                   chartevents   \n",
       "3323                 BP Right Leg [Systolic]                   chartevents   \n",
       "\n",
       "                   category unitname       score  \n",
       "itemid                                            \n",
       "220179  Routine Vital Signs     mmHg  110.000000  \n",
       "225309  Routine Vital Signs     mmHg  110.000000  \n",
       "224167  Routine Vital Signs     mmHg  110.000000  \n",
       "220050  Routine Vital Signs     mmHg  110.000000  \n",
       "227243  Routine Vital Signs     mmHg  110.000000  \n",
       "455                                    94.333333  \n",
       "6                                      94.333333  \n",
       "3313                                   86.333333  \n",
       "442                                    83.000000  \n",
       "3315                                   80.000000  \n",
       "51                                     80.000000  \n",
       "3317                                   80.000000  \n",
       "3321                                   79.000000  \n",
       "3323                                   79.000000  "
      ]
     },
     "execution_count": 77,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.loc[keep_dict[data_dict.labels.BLOOD_PRESSURE_SYSTOLIC]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>220180</th>\n",
       "      <td>Non Invasive Blood Pressure diastolic</td>\n",
       "      <td>NBPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225310</th>\n",
       "      <td>ART BP Diastolic</td>\n",
       "      <td>ART BP Diastolic</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224643</th>\n",
       "      <td>Manual Blood Pressure Diastolic Left</td>\n",
       "      <td>Manual BPd L</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220051</th>\n",
       "      <td>Arterial Blood Pressure diastolic</td>\n",
       "      <td>ABPd</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227242</th>\n",
       "      <td>Manual Blood Pressure Diastolic Right</td>\n",
       "      <td>Manual BPd R</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8441</th>\n",
       "      <td>NBP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8364</th>\n",
       "      <td>ABP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8502</th>\n",
       "      <td>BP Cuff [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>87.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8440</th>\n",
       "      <td>Manual BP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8368</th>\n",
       "      <td>Arterial BP [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8503</th>\n",
       "      <td>BP Left Arm [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8504</th>\n",
       "      <td>BP Left Leg [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8507</th>\n",
       "      <td>BP Right Leg [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8506</th>\n",
       "      <td>BP Right Arm [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8555</th>\n",
       "      <td>Arterial BP #2 [Diastolic]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        label      abbreviation      linksto  \\\n",
       "itemid                                                                         \n",
       "220180  Non Invasive Blood Pressure diastolic              NBPd  chartevents   \n",
       "225310                       ART BP Diastolic  ART BP Diastolic  chartevents   \n",
       "224643   Manual Blood Pressure Diastolic Left      Manual BPd L  chartevents   \n",
       "220051      Arterial Blood Pressure diastolic              ABPd  chartevents   \n",
       "227242  Manual Blood Pressure Diastolic Right      Manual BPd R  chartevents   \n",
       "8441                          NBP [Diastolic]                    chartevents   \n",
       "8364                          ABP [Diastolic]                    chartevents   \n",
       "8502                      BP Cuff [Diastolic]                    chartevents   \n",
       "8440                    Manual BP [Diastolic]                    chartevents   \n",
       "8368                  Arterial BP [Diastolic]                    chartevents   \n",
       "8503                  BP Left Arm [Diastolic]                    chartevents   \n",
       "8504                  BP Left Leg [Diastolic]                    chartevents   \n",
       "8507                 BP Right Leg [Diastolic]                    chartevents   \n",
       "8506                 BP Right Arm [Diastolic]                    chartevents   \n",
       "8555               Arterial BP #2 [Diastolic]                    chartevents   \n",
       "\n",
       "                   category unitname       score  \n",
       "itemid                                            \n",
       "220180  Routine Vital Signs     mmHg  110.000000  \n",
       "225310  Routine Vital Signs     mmHg  110.000000  \n",
       "224643  Routine Vital Signs     mmHg  110.000000  \n",
       "220051  Routine Vital Signs     mmHg  110.000000  \n",
       "227242  Routine Vital Signs     mmHg  110.000000  \n",
       "8441                                   95.666667  \n",
       "8364                                   95.666667  \n",
       "8502                                   87.666667  \n",
       "8440                                   84.666667  \n",
       "8368                                   82.000000  \n",
       "8503                                   82.000000  \n",
       "8504                                   82.000000  \n",
       "8507                                   81.000000  \n",
       "8506                                   81.000000  \n",
       "8555                                   79.000000  "
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.loc[keep_dict[data_dict.labels.BLOOD_PRESSURE_DIASTOLIC]]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### O2 Sat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>50817</th>\n",
       "      <td>OXYGEN SATURATION</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220277</th>\n",
       "      <td>O2 saturation pulseoxymetry</td>\n",
       "      <td>SpO2</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>%</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>646</th>\n",
       "      <td>SpO2</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228232</th>\n",
       "      <td>PAR-Oxygen saturation</td>\n",
       "      <td>PAR-Oxygen saturation</td>\n",
       "      <td></td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td></td>\n",
       "      <td>102.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3785</th>\n",
       "      <td>PO2</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>ABG's</td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3837</th>\n",
       "      <td>pO2</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>ABG'S</td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50821</th>\n",
       "      <td>PO2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6719</th>\n",
       "      <td>SpO2-L</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1341</th>\n",
       "      <td>PULSE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1725</th>\n",
       "      <td>Pulse</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1332</th>\n",
       "      <td>pulse</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50816</th>\n",
       "      <td>OXYGEN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2955</th>\n",
       "      <td>JVO2 SAT</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2574</th>\n",
       "      <td>MVO2 SAT</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227873</th>\n",
       "      <td>Recovery O2 Sat - Aerobic Capacity</td>\n",
       "      <td>Recovery O2 Sat - Aerobic Capacity</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>OT Notes</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223772</th>\n",
       "      <td>SvO2</td>\n",
       "      <td>SvO2</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223769</th>\n",
       "      <td>O2 Saturation Pulseoxymetry Alarm - High</td>\n",
       "      <td>SpO2 Alarm - High</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227867</th>\n",
       "      <td>Activity O2 Sat - Aerobic Capacity</td>\n",
       "      <td>Activity O2 Sat - Aerobic Capacity</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>OT Notes</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223770</th>\n",
       "      <td>O2 Saturation Pulseoxymetry Alarm - Low</td>\n",
       "      <td>SpO2 Alarm - Low</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226861</th>\n",
       "      <td>ART %O2 saturation (PA Line)</td>\n",
       "      <td>ART %O2 saturation (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225674</th>\n",
       "      <td>Mixed Venous O2% Sat</td>\n",
       "      <td>Mixed Venous O2% Sat</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226862</th>\n",
       "      <td>PA %O2 Saturation (PA Line)</td>\n",
       "      <td>PA %O2 Saturation (PA Line)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PA Line Insertion</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227686</th>\n",
       "      <td>Central Venous O2% Sat</td>\n",
       "      <td>Central Venous O2% Sat</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227546</th>\n",
       "      <td>SVV (Arterial)</td>\n",
       "      <td>SVV (Arterial)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227549</th>\n",
       "      <td>ScvO2 (Presep)</td>\n",
       "      <td>ScvO2 (Presep)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226993</th>\n",
       "      <td>ApacheIV_LOS</td>\n",
       "      <td>ApacheIV_LOS</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227008</th>\n",
       "      <td>Ejection Fraction</td>\n",
       "      <td>Ejection Fraction</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227010</th>\n",
       "      <td>FiO2_ApacheIV</td>\n",
       "      <td>FiO2_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226272</th>\n",
       "      <td>EF (CCO)</td>\n",
       "      <td>EF (CCO)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hemodynamics</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228184</th>\n",
       "      <td>SVV (PiCCO)</td>\n",
       "      <td>SVV (PiCCO)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PiCCO</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227861</th>\n",
       "      <td>Rest O2 Sat - Aerobic Capacity</td>\n",
       "      <td>Rest O2 Sat - Aerobic Capacity</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>OT Notes</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226253</th>\n",
       "      <td>SpO2 Desat Limit</td>\n",
       "      <td>SpO2 Desat Limit</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Alarms</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224704</th>\n",
       "      <td>ATC %</td>\n",
       "      <td>ATC %</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Respiratory</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228378</th>\n",
       "      <td>TFCd (NICOM)</td>\n",
       "      <td>TFCd (NICOM)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>NICOM</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226745</th>\n",
       "      <td>APACHE II Predecited Death Rate</td>\n",
       "      <td>APACHE II PDR</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227919</th>\n",
       "      <td>Rest O2 Sat - Aerobic Activity Response</td>\n",
       "      <td>Rest O2 Sat - Aerobic Activity Response</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>OT Notes</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226744</th>\n",
       "      <td>APACHE II PDR - Adjusted</td>\n",
       "      <td>APACHE II PDR - Adjusted</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228377</th>\n",
       "      <td>SVI Change</td>\n",
       "      <td>SVI Change</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>NICOM</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226754</th>\n",
       "      <td>FiO2ApacheIIValue</td>\n",
       "      <td>FiO2ApacheIIValue</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228375</th>\n",
       "      <td>Stroke Volume Index (SVI NICOM)</td>\n",
       "      <td>Stroke Volume Index (SVI NICOM)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>NICOM</td>\n",
       "      <td>%</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                           label  \\\n",
       "itemid                                             \n",
       "50817                          OXYGEN SATURATION   \n",
       "220277               O2 saturation pulseoxymetry   \n",
       "646                                         SpO2   \n",
       "228232                     PAR-Oxygen saturation   \n",
       "3785                                         PO2   \n",
       "3837                                         pO2   \n",
       "50821                                        PO2   \n",
       "6719                                      SpO2-L   \n",
       "1341                                       PULSE   \n",
       "1725                                       Pulse   \n",
       "1332                                       pulse   \n",
       "50816                                     OXYGEN   \n",
       "2955                                    JVO2 SAT   \n",
       "2574                                    MVO2 SAT   \n",
       "227873        Recovery O2 Sat - Aerobic Capacity   \n",
       "223772                                      SvO2   \n",
       "223769  O2 Saturation Pulseoxymetry Alarm - High   \n",
       "227867        Activity O2 Sat - Aerobic Capacity   \n",
       "223770   O2 Saturation Pulseoxymetry Alarm - Low   \n",
       "226861              ART %O2 saturation (PA Line)   \n",
       "225674                      Mixed Venous O2% Sat   \n",
       "226862               PA %O2 Saturation (PA Line)   \n",
       "227686                    Central Venous O2% Sat   \n",
       "227546                            SVV (Arterial)   \n",
       "227549                            ScvO2 (Presep)   \n",
       "226993                              ApacheIV_LOS   \n",
       "227008                         Ejection Fraction   \n",
       "227010                             FiO2_ApacheIV   \n",
       "226272                                  EF (CCO)   \n",
       "228184                               SVV (PiCCO)   \n",
       "227861            Rest O2 Sat - Aerobic Capacity   \n",
       "226253                          SpO2 Desat Limit   \n",
       "224704                                     ATC %   \n",
       "228378                              TFCd (NICOM)   \n",
       "226745           APACHE II Predecited Death Rate   \n",
       "227919   Rest O2 Sat - Aerobic Activity Response   \n",
       "226744                  APACHE II PDR - Adjusted   \n",
       "228377                                SVI Change   \n",
       "226754                         FiO2ApacheIIValue   \n",
       "228375           Stroke Volume Index (SVI NICOM)   \n",
       "\n",
       "                                   abbreviation      linksto  \\\n",
       "itemid                                                         \n",
       "50817                                       NaN    labevents   \n",
       "220277                                     SpO2  chartevents   \n",
       "646                                              chartevents   \n",
       "228232                    PAR-Oxygen saturation                \n",
       "3785                                             chartevents   \n",
       "3837                                             chartevents   \n",
       "50821                                       NaN    labevents   \n",
       "6719                                             chartevents   \n",
       "1341                                             chartevents   \n",
       "1725                                             chartevents   \n",
       "1332                                             chartevents   \n",
       "50816                                       NaN    labevents   \n",
       "2955                                             chartevents   \n",
       "2574                                             chartevents   \n",
       "227873       Recovery O2 Sat - Aerobic Capacity  chartevents   \n",
       "223772                                     SvO2  chartevents   \n",
       "223769                        SpO2 Alarm - High  chartevents   \n",
       "227867       Activity O2 Sat - Aerobic Capacity  chartevents   \n",
       "223770                         SpO2 Alarm - Low  chartevents   \n",
       "226861             ART %O2 saturation (PA Line)  chartevents   \n",
       "225674                     Mixed Venous O2% Sat  chartevents   \n",
       "226862              PA %O2 Saturation (PA Line)  chartevents   \n",
       "227686                   Central Venous O2% Sat  chartevents   \n",
       "227546                           SVV (Arterial)  chartevents   \n",
       "227549                           ScvO2 (Presep)  chartevents   \n",
       "226993                             ApacheIV_LOS  chartevents   \n",
       "227008                        Ejection Fraction  chartevents   \n",
       "227010                            FiO2_ApacheIV  chartevents   \n",
       "226272                                 EF (CCO)  chartevents   \n",
       "228184                              SVV (PiCCO)  chartevents   \n",
       "227861           Rest O2 Sat - Aerobic Capacity  chartevents   \n",
       "226253                         SpO2 Desat Limit  chartevents   \n",
       "224704                                    ATC %  chartevents   \n",
       "228378                             TFCd (NICOM)  chartevents   \n",
       "226745                            APACHE II PDR  chartevents   \n",
       "227919  Rest O2 Sat - Aerobic Activity Response  chartevents   \n",
       "226744                 APACHE II PDR - Adjusted  chartevents   \n",
       "228377                               SVI Change  chartevents   \n",
       "226754                        FiO2ApacheIIValue  chartevents   \n",
       "228375          Stroke Volume Index (SVI NICOM)  chartevents   \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "50817                BLOOD GAS      NaN  110.000000  \n",
       "220277             Respiratory        %  110.000000  \n",
       "646                                      110.000000  \n",
       "228232     Routine Vital Signs           102.666667  \n",
       "3785                     ABG's           100.666667  \n",
       "3837                     ABG'S           100.666667  \n",
       "50821                BLOOD GAS      NaN  100.666667  \n",
       "6719                                      96.666667  \n",
       "1341                                      78.666667  \n",
       "1725                                      78.666667  \n",
       "1332                                      78.666667  \n",
       "50816                BLOOD GAS      NaN   78.000000  \n",
       "2955                                      78.000000  \n",
       "2574                                      78.000000  \n",
       "227873                OT Notes        %   76.666667  \n",
       "223772            Hemodynamics        %   76.666667  \n",
       "223769                  Alarms        %   76.666667  \n",
       "227867                OT Notes        %   76.666667  \n",
       "223770                  Alarms        %   76.666667  \n",
       "226861       PA Line Insertion        %   76.666667  \n",
       "225674                    Labs        %   76.666667  \n",
       "226862       PA Line Insertion        %   76.666667  \n",
       "227686                    Labs        %   76.666667  \n",
       "227546            Hemodynamics        %   76.666667  \n",
       "227549            Hemodynamics        %   76.666667  \n",
       "226993  Scores - APACHE IV (2)        %   76.666667  \n",
       "227008  Scores - APACHE IV (2)        %   76.666667  \n",
       "227010  Scores - APACHE IV (2)        %   76.666667  \n",
       "226272            Hemodynamics        %   76.666667  \n",
       "228184                   PiCCO        %   76.666667  \n",
       "227861                OT Notes        %   76.666667  \n",
       "226253                  Alarms        %   76.666667  \n",
       "224704             Respiratory        %   76.666667  \n",
       "228378                   NICOM        %   76.666667  \n",
       "226745      Scores - APACHE II        %   76.666667  \n",
       "227919                OT Notes        %   76.666667  \n",
       "226744      Scores - APACHE II        %   76.666667  \n",
       "228377                   NICOM        %   76.666667  \n",
       "226754      Scores - APACHE II        %   76.666667  \n",
       "228375                   NICOM        %   76.666667  "
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'oxygen saturation',\n",
    "        'O2sat',\n",
    "        'pulse oximetry',\n",
    "        '%',\n",
    "        'spo2'\n",
    "    ])\n",
    "out_df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.OXYGEN_SATURATION_PULSE_OXIMETRY] = [646,220277,228232]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Body Temp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>50825</th>\n",
       "      <td>TEMPERATURE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226170</th>\n",
       "      <td>Head of Bead Measurement</td>\n",
       "      <td>HOB Measurement</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Treatments</td>\n",
       "      <td>Degree</td>\n",
       "      <td>104.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223762</th>\n",
       "      <td>Temperature Celsius</td>\n",
       "      <td>Temperature C</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>?C</td>\n",
       "      <td>104.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223761</th>\n",
       "      <td>Temperature Fahrenheit</td>\n",
       "      <td>Temperature F</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td>?F</td>\n",
       "      <td>104.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>678</th>\n",
       "      <td>Temperature F</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>104.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>676</th>\n",
       "      <td>Temperature C</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>104.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>591</th>\n",
       "      <td>RLE [Temperature]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>597</th>\n",
       "      <td>RUE [Temperature]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224027</th>\n",
       "      <td>Skin Temperature</td>\n",
       "      <td>Skin Temp</td>\n",
       "      <td></td>\n",
       "      <td>Skin - Assessment</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224642</th>\n",
       "      <td>Temperature Site</td>\n",
       "      <td>Temp Site</td>\n",
       "      <td></td>\n",
       "      <td>Routine Vital Signs</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>645</th>\n",
       "      <td>Skin [Temperature]</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>679</th>\n",
       "      <td>Temperature F (calc)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>92.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>677</th>\n",
       "      <td>Temperature C (calc)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>92.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227054</th>\n",
       "      <td>TemperatureF_ApacheIV</td>\n",
       "      <td>TemperatureF_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>?F</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224769</th>\n",
       "      <td>LUE Temp</td>\n",
       "      <td>LUE Temp</td>\n",
       "      <td></td>\n",
       "      <td>Cardiovascular</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224771</th>\n",
       "      <td>RLE Temp</td>\n",
       "      <td>RLE Temp</td>\n",
       "      <td></td>\n",
       "      <td>Cardiovascular</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224773</th>\n",
       "      <td>LLE Temp</td>\n",
       "      <td>LLE Temp</td>\n",
       "      <td></td>\n",
       "      <td>Cardiovascular</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2798</th>\n",
       "      <td>arm 90 degrees</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224674</th>\n",
       "      <td>Changes in Temperature</td>\n",
       "      <td>Changes in Temperature</td>\n",
       "      <td></td>\n",
       "      <td>Toxicology</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224767</th>\n",
       "      <td>RUE Temp</td>\n",
       "      <td>RUE Temp</td>\n",
       "      <td></td>\n",
       "      <td>Cardiovascular</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           label            abbreviation      linksto  \\\n",
       "itemid                                                                  \n",
       "50825                TEMPERATURE                     NaN    labevents   \n",
       "226170  Head of Bead Measurement         HOB Measurement  chartevents   \n",
       "223762       Temperature Celsius           Temperature C  chartevents   \n",
       "223761    Temperature Fahrenheit           Temperature F  chartevents   \n",
       "678                Temperature F                          chartevents   \n",
       "676                Temperature C                          chartevents   \n",
       "591            RLE [Temperature]                          chartevents   \n",
       "597            RUE [Temperature]                          chartevents   \n",
       "224027          Skin Temperature               Skin Temp                \n",
       "224642          Temperature Site               Temp Site                \n",
       "645           Skin [Temperature]                          chartevents   \n",
       "679         Temperature F (calc)                          chartevents   \n",
       "677         Temperature C (calc)                          chartevents   \n",
       "227054     TemperatureF_ApacheIV   TemperatureF_ApacheIV  chartevents   \n",
       "224769                  LUE Temp                LUE Temp                \n",
       "224771                  RLE Temp                RLE Temp                \n",
       "224773                  LLE Temp                LLE Temp                \n",
       "2798              arm 90 degrees                          chartevents   \n",
       "224674    Changes in Temperature  Changes in Temperature                \n",
       "224767                  RUE Temp                RUE Temp                \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "50825                BLOOD GAS      NaN  110.000000  \n",
       "226170              Treatments   Degree  104.666667  \n",
       "223762     Routine Vital Signs       ?C  104.666667  \n",
       "223761     Routine Vital Signs       ?F  104.666667  \n",
       "678                                      104.666667  \n",
       "676                                      104.666667  \n",
       "591                                       98.000000  \n",
       "597                                       98.000000  \n",
       "224027       Skin - Assessment            97.333333  \n",
       "224642     Routine Vital Signs            97.333333  \n",
       "645                                       95.666667  \n",
       "679                                       92.333333  \n",
       "677                                       92.333333  \n",
       "227054  Scores - APACHE IV (2)       ?F   89.333333  \n",
       "224769          Cardiovascular            88.000000  \n",
       "224771          Cardiovascular            88.000000  \n",
       "224773          Cardiovascular            88.000000  \n",
       "2798                                      88.000000  \n",
       "224674              Toxicology            88.000000  \n",
       "224767          Cardiovascular            88.000000  "
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'temperature',\n",
    "        'temp',\n",
    "        'celcius',\n",
    "        'farenheit',\n",
    "        'degrees',\n",
    "        'deg'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.TEMPERATURE_BODY] = [223761,678,223762,676]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Weight"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>763</th>\n",
       "      <td>Daily Weight</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226512</th>\n",
       "      <td>Admission Weight (Kg)</td>\n",
       "      <td>Admission Weight (Kg)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>kg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226846</th>\n",
       "      <td>Feeding Weight</td>\n",
       "      <td>Feeding Weight</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>kg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224639</th>\n",
       "      <td>Daily Weight</td>\n",
       "      <td>Daily Weight</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>kg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3693</th>\n",
       "      <td>Weight Kg</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>968</th>\n",
       "      <td>EKG</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225402</th>\n",
       "      <td>EKG</td>\n",
       "      <td>EKG</td>\n",
       "      <td>procedureevents_mv</td>\n",
       "      <td>4-Procedures</td>\n",
       "      <td>None</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226184</th>\n",
       "      <td>Estimated Protein Needs/Kg</td>\n",
       "      <td>Estimated Protein Needs/Kg</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>g/kg</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>733</th>\n",
       "      <td>Weight Change</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>85.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226707</th>\n",
       "      <td>Height</td>\n",
       "      <td>Height</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>General</td>\n",
       "      <td>Inch</td>\n",
       "      <td>83.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4183</th>\n",
       "      <td>Birthweight (kg)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228179</th>\n",
       "      <td>ELWI (PiCCO)</td>\n",
       "      <td>ELWI (PiCCO)</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>PiCCO</td>\n",
       "      <td>ml/kg</td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>580</th>\n",
       "      <td>Previous Weight</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43622</th>\n",
       "      <td>cc/kg</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>581</th>\n",
       "      <td>Previous WeightF</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7000</th>\n",
       "      <td>ideal body weight</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45271</th>\n",
       "      <td>Chucks Pad Weight</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3723</th>\n",
       "      <td>Birth Weight    (kg)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3581</th>\n",
       "      <td>Present Weight  (lb)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3692</th>\n",
       "      <td>Weight Change  (gms)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                             label                abbreviation  \\\n",
       "itemid                                                           \n",
       "763                   Daily Weight                               \n",
       "226512       Admission Weight (Kg)       Admission Weight (Kg)   \n",
       "226846              Feeding Weight              Feeding Weight   \n",
       "224639                Daily Weight                Daily Weight   \n",
       "3693                     Weight Kg                               \n",
       "968                            EKG                               \n",
       "225402                         EKG                         EKG   \n",
       "226184  Estimated Protein Needs/Kg  Estimated Protein Needs/Kg   \n",
       "733                  Weight Change                               \n",
       "226707                      Height                      Height   \n",
       "4183              Birthweight (kg)                               \n",
       "228179                ELWI (PiCCO)                ELWI (PiCCO)   \n",
       "580                Previous Weight                               \n",
       "43622                        cc/kg                               \n",
       "581               Previous WeightF                               \n",
       "7000             ideal body weight                               \n",
       "45271            Chucks Pad Weight                               \n",
       "3723          Birth Weight    (kg)                               \n",
       "3581          Present Weight  (lb)                               \n",
       "3692          Weight Change  (gms)                               \n",
       "\n",
       "                   linksto      category unitname       score  \n",
       "itemid                                                         \n",
       "763            chartevents                         110.000000  \n",
       "226512         chartevents       General       kg  110.000000  \n",
       "226846         chartevents       General       kg  110.000000  \n",
       "224639         chartevents       General       kg  110.000000  \n",
       "3693           chartevents                          96.666667  \n",
       "968            chartevents                          96.666667  \n",
       "225402  procedureevents_mv  4-Procedures     None   96.666667  \n",
       "226184         chartevents       General     g/kg   88.000000  \n",
       "733            chartevents                          85.333333  \n",
       "226707         chartevents       General     Inch   83.000000  \n",
       "4183           chartevents                          81.666667  \n",
       "228179         chartevents         PiCCO    ml/kg   81.333333  \n",
       "580            chartevents                          81.333333  \n",
       "43622       inputevents_cv                          81.333333  \n",
       "581            chartevents                          80.000000  \n",
       "7000           chartevents                          78.000000  \n",
       "45271       inputevents_cv                          78.000000  \n",
       "3723           chartevents                          77.666667  \n",
       "3581           chartevents                          76.000000  \n",
       "3692           chartevents                          76.000000  "
      ]
     },
     "execution_count": 97,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'weight',\n",
    "        'daily weight',\n",
    "        'kg' \n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.WEIGHT_BODY] =[763,224639,3693]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Urine Output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>226560</th>\n",
       "      <td>Void</td>\n",
       "      <td>Void</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43332</th>\n",
       "      <td>void</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7672</th>\n",
       "      <td>Foley</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3686</th>\n",
       "      <td>Void</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45967</th>\n",
       "      <td>foley</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43053</th>\n",
       "      <td>URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226559</th>\n",
       "      <td>Foley</td>\n",
       "      <td>Foley</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44103</th>\n",
       "      <td>ER urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44834</th>\n",
       "      <td>er urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227519</th>\n",
       "      <td>Urine output_ApacheIV</td>\n",
       "      <td>Urine output</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>None</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44706</th>\n",
       "      <td>urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42892</th>\n",
       "      <td>EW URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45415</th>\n",
       "      <td>ED Urine OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43987</th>\n",
       "      <td>urine out or</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42666</th>\n",
       "      <td>E.R. URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44237</th>\n",
       "      <td>E.R. urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42592</th>\n",
       "      <td>VICU URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40069</th>\n",
       "      <td>Urine Out Void</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46423</th>\n",
       "      <td>ed foley</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43931</th>\n",
       "      <td>Floor urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        label  abbreviation         linksto  \\\n",
       "itemid                                                        \n",
       "226560                   Void          Void    outputevents   \n",
       "43332                    void                inputevents_cv   \n",
       "7672                    Foley                   chartevents   \n",
       "3686                     Void                   chartevents   \n",
       "45967                   foley                inputevents_cv   \n",
       "43053               URINE OUT                  outputevents   \n",
       "226559                  Foley         Foley    outputevents   \n",
       "44103            ER urine out                  outputevents   \n",
       "44834            er urine out                  outputevents   \n",
       "227519  Urine output_ApacheIV  Urine output     chartevents   \n",
       "44706            urine output                  outputevents   \n",
       "42892            EW URINE OUT                  outputevents   \n",
       "45415            ED Urine OUT                  outputevents   \n",
       "43987            urine out or                  outputevents   \n",
       "42666          E.R. URINE OUT                  outputevents   \n",
       "44237          E.R. urine out                  outputevents   \n",
       "42592          VICU URINE OUT                  outputevents   \n",
       "40069          Urine Out Void                  outputevents   \n",
       "46423                ed foley                inputevents_cv   \n",
       "43931         Floor urine out                  outputevents   \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "226560                  Output       mL  110.000000  \n",
       "43332                                    110.000000  \n",
       "7672                                     110.000000  \n",
       "3686                                     110.000000  \n",
       "45967                                    110.000000  \n",
       "43053                                    110.000000  \n",
       "226559                  Output       mL  110.000000  \n",
       "44103                                    100.666667  \n",
       "44834                                    100.666667  \n",
       "227519  Scores - APACHE IV (2)     None  100.666667  \n",
       "44706                                    100.666667  \n",
       "42892                                    100.666667  \n",
       "45415                                    100.666667  \n",
       "43987                                    100.666667  \n",
       "42666                                     96.666667  \n",
       "44237                                     96.666667  \n",
       "42592                                     95.333333  \n",
       "40069                                     95.333333  \n",
       "46423                                     94.666667  \n",
       "43931                                     93.333333  "
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'urine out',\n",
    "        'void',\n",
    "        'foley'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>226560</th>\n",
       "      <td>Void</td>\n",
       "      <td>Void</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43332</th>\n",
       "      <td>void</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7672</th>\n",
       "      <td>Foley</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3686</th>\n",
       "      <td>Void</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45967</th>\n",
       "      <td>foley</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43053</th>\n",
       "      <td>URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226559</th>\n",
       "      <td>Foley</td>\n",
       "      <td>Foley</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44103</th>\n",
       "      <td>ER urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44834</th>\n",
       "      <td>er urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227519</th>\n",
       "      <td>Urine output_ApacheIV</td>\n",
       "      <td>Urine output</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>None</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44706</th>\n",
       "      <td>urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42892</th>\n",
       "      <td>EW URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45415</th>\n",
       "      <td>ED Urine OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43987</th>\n",
       "      <td>urine out or</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42666</th>\n",
       "      <td>E.R. URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44237</th>\n",
       "      <td>E.R. urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42592</th>\n",
       "      <td>VICU URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40069</th>\n",
       "      <td>Urine Out Void</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46423</th>\n",
       "      <td>ed foley</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>94.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43931</th>\n",
       "      <td>Floor urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43348</th>\n",
       "      <td>urine output/kg</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40405</th>\n",
       "      <td>Urine Out Other</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42810</th>\n",
       "      <td>angio urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46180</th>\n",
       "      <td>urine out-angio</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40055</th>\n",
       "      <td>Urine Out Foley</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44253</th>\n",
       "      <td>Urine out angio</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44325</th>\n",
       "      <td>ED URINE OUTPUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41857</th>\n",
       "      <td>urine out in er</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44824</th>\n",
       "      <td>EW urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45991</th>\n",
       "      <td>ew-urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42042</th>\n",
       "      <td>ANGIO URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46177</th>\n",
       "      <td>URINE OUT-ANGIO</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44684</th>\n",
       "      <td>floor urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46658</th>\n",
       "      <td>ED Urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46578</th>\n",
       "      <td>URINE OUTPUT-ER</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42765</th>\n",
       "      <td>FARR 6 URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>91.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70081</th>\n",
       "      <td>URINE</td>\n",
       "      <td></td>\n",
       "      <td>microbiologyevents</td>\n",
       "      <td>SPECIMEN</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3819</th>\n",
       "      <td>Urine Leukocytes</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70079</th>\n",
       "      <td>URINE</td>\n",
       "      <td></td>\n",
       "      <td>microbiologyevents</td>\n",
       "      <td>SPECIMEN</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43462</th>\n",
       "      <td>urine</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        label  abbreviation             linksto  \\\n",
       "itemid                                                            \n",
       "226560                   Void          Void        outputevents   \n",
       "43332                    void                    inputevents_cv   \n",
       "7672                    Foley                       chartevents   \n",
       "3686                     Void                       chartevents   \n",
       "45967                   foley                    inputevents_cv   \n",
       "43053               URINE OUT                      outputevents   \n",
       "226559                  Foley         Foley        outputevents   \n",
       "44103            ER urine out                      outputevents   \n",
       "44834            er urine out                      outputevents   \n",
       "227519  Urine output_ApacheIV  Urine output         chartevents   \n",
       "44706            urine output                      outputevents   \n",
       "42892            EW URINE OUT                      outputevents   \n",
       "45415            ED Urine OUT                      outputevents   \n",
       "43987            urine out or                      outputevents   \n",
       "42666          E.R. URINE OUT                      outputevents   \n",
       "44237          E.R. urine out                      outputevents   \n",
       "42592          VICU URINE OUT                      outputevents   \n",
       "40069          Urine Out Void                      outputevents   \n",
       "46423                ed foley                    inputevents_cv   \n",
       "43931         Floor urine out                      outputevents   \n",
       "43348         urine output/kg                      outputevents   \n",
       "40405         Urine Out Other                      outputevents   \n",
       "42810         angio urine out                      outputevents   \n",
       "46180         urine out-angio                      outputevents   \n",
       "40055         Urine Out Foley                      outputevents   \n",
       "44253         Urine out angio                      outputevents   \n",
       "44325         ED URINE OUTPUT                      outputevents   \n",
       "41857         urine out in er                      outputevents   \n",
       "44824         EW urine output                      outputevents   \n",
       "45991         ew-urine output                      outputevents   \n",
       "42042         ANGIO URINE OUT                      outputevents   \n",
       "46177         URINE OUT-ANGIO                      outputevents   \n",
       "44684         floor urine out                      outputevents   \n",
       "46658         ED Urine output                      outputevents   \n",
       "46578         URINE OUTPUT-ER                      outputevents   \n",
       "42765        FARR 6 URINE OUT                      outputevents   \n",
       "70081                   URINE                microbiologyevents   \n",
       "3819         Urine Leukocytes                       chartevents   \n",
       "70079                   URINE                microbiologyevents   \n",
       "43462                   urine                      outputevents   \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "226560                  Output       mL  110.000000  \n",
       "43332                                    110.000000  \n",
       "7672                                     110.000000  \n",
       "3686                                     110.000000  \n",
       "45967                                    110.000000  \n",
       "43053                                    110.000000  \n",
       "226559                  Output       mL  110.000000  \n",
       "44103                                    100.666667  \n",
       "44834                                    100.666667  \n",
       "227519  Scores - APACHE IV (2)     None  100.666667  \n",
       "44706                                    100.666667  \n",
       "42892                                    100.666667  \n",
       "45415                                    100.666667  \n",
       "43987                                    100.666667  \n",
       "42666                                     96.666667  \n",
       "44237                                     96.666667  \n",
       "42592                                     95.333333  \n",
       "40069                                     95.333333  \n",
       "46423                                     94.666667  \n",
       "43931                                     93.333333  \n",
       "43348                                     93.333333  \n",
       "40405                                     93.333333  \n",
       "42810                                     93.333333  \n",
       "46180                                     93.333333  \n",
       "40055                                     93.333333  \n",
       "44253                                     93.333333  \n",
       "44325                                     93.333333  \n",
       "41857                                     93.333333  \n",
       "44824                                     93.333333  \n",
       "45991                                     93.333333  \n",
       "42042                                     93.333333  \n",
       "46177                                     93.333333  \n",
       "44684                                     93.333333  \n",
       "46658                                     93.333333  \n",
       "46578                                     93.333333  \n",
       "42765                                     91.333333  \n",
       "70081                 SPECIMEN            90.666667  \n",
       "3819                     Urine            90.666667  \n",
       "70079                 SPECIMEN            90.666667  \n",
       "43462                                     90.666667  "
      ]
     },
     "execution_count": 85,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "out_df = out_df[out_df.linksto.isin(['outputevents','chartevents'])]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>226560</th>\n",
       "      <td>Void</td>\n",
       "      <td>Void</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7672</th>\n",
       "      <td>Foley</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3686</th>\n",
       "      <td>Void</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43053</th>\n",
       "      <td>URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226559</th>\n",
       "      <td>Foley</td>\n",
       "      <td>Foley</td>\n",
       "      <td>outputevents</td>\n",
       "      <td>Output</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44103</th>\n",
       "      <td>ER urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44834</th>\n",
       "      <td>er urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227519</th>\n",
       "      <td>Urine output_ApacheIV</td>\n",
       "      <td>Urine output</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>None</td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44706</th>\n",
       "      <td>urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42892</th>\n",
       "      <td>EW URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45415</th>\n",
       "      <td>ED Urine OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43987</th>\n",
       "      <td>urine out or</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42666</th>\n",
       "      <td>E.R. URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44237</th>\n",
       "      <td>E.R. urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42592</th>\n",
       "      <td>VICU URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40069</th>\n",
       "      <td>Urine Out Void</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>95.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43931</th>\n",
       "      <td>Floor urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43348</th>\n",
       "      <td>urine output/kg</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40405</th>\n",
       "      <td>Urine Out Other</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42810</th>\n",
       "      <td>angio urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46180</th>\n",
       "      <td>urine out-angio</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40055</th>\n",
       "      <td>Urine Out Foley</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44253</th>\n",
       "      <td>Urine out angio</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44325</th>\n",
       "      <td>ED URINE OUTPUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41857</th>\n",
       "      <td>urine out in er</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44824</th>\n",
       "      <td>EW urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45991</th>\n",
       "      <td>ew-urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42042</th>\n",
       "      <td>ANGIO URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46177</th>\n",
       "      <td>URINE OUT-ANGIO</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44684</th>\n",
       "      <td>floor urine out</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46658</th>\n",
       "      <td>ED Urine output</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46578</th>\n",
       "      <td>URINE OUTPUT-ER</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>93.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42765</th>\n",
       "      <td>FARR 6 URINE OUT</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>91.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3819</th>\n",
       "      <td>Urine Leukocytes</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43462</th>\n",
       "      <td>urine</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3816</th>\n",
       "      <td>Urine Glucose</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3817</th>\n",
       "      <td>Urine Heme</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3818</th>\n",
       "      <td>Urine Ketones</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3822</th>\n",
       "      <td>Urine Protein</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Urine</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6298</th>\n",
       "      <td>foley d/c</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        label  abbreviation       linksto  \\\n",
       "itemid                                                      \n",
       "226560                   Void          Void  outputevents   \n",
       "7672                    Foley                 chartevents   \n",
       "3686                     Void                 chartevents   \n",
       "43053               URINE OUT                outputevents   \n",
       "226559                  Foley         Foley  outputevents   \n",
       "44103            ER urine out                outputevents   \n",
       "44834            er urine out                outputevents   \n",
       "227519  Urine output_ApacheIV  Urine output   chartevents   \n",
       "44706            urine output                outputevents   \n",
       "42892            EW URINE OUT                outputevents   \n",
       "45415            ED Urine OUT                outputevents   \n",
       "43987            urine out or                outputevents   \n",
       "42666          E.R. URINE OUT                outputevents   \n",
       "44237          E.R. urine out                outputevents   \n",
       "42592          VICU URINE OUT                outputevents   \n",
       "40069          Urine Out Void                outputevents   \n",
       "43931         Floor urine out                outputevents   \n",
       "43348         urine output/kg                outputevents   \n",
       "40405         Urine Out Other                outputevents   \n",
       "42810         angio urine out                outputevents   \n",
       "46180         urine out-angio                outputevents   \n",
       "40055         Urine Out Foley                outputevents   \n",
       "44253         Urine out angio                outputevents   \n",
       "44325         ED URINE OUTPUT                outputevents   \n",
       "41857         urine out in er                outputevents   \n",
       "44824         EW urine output                outputevents   \n",
       "45991         ew-urine output                outputevents   \n",
       "42042         ANGIO URINE OUT                outputevents   \n",
       "46177         URINE OUT-ANGIO                outputevents   \n",
       "44684         floor urine out                outputevents   \n",
       "46658         ED Urine output                outputevents   \n",
       "46578         URINE OUTPUT-ER                outputevents   \n",
       "42765        FARR 6 URINE OUT                outputevents   \n",
       "3819         Urine Leukocytes                 chartevents   \n",
       "43462                   urine                outputevents   \n",
       "3816            Urine Glucose                 chartevents   \n",
       "3817               Urine Heme                 chartevents   \n",
       "3818            Urine Ketones                 chartevents   \n",
       "3822            Urine Protein                 chartevents   \n",
       "6298                foley d/c                 chartevents   \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "226560                  Output       mL  110.000000  \n",
       "7672                                     110.000000  \n",
       "3686                                     110.000000  \n",
       "43053                                    110.000000  \n",
       "226559                  Output       mL  110.000000  \n",
       "44103                                    100.666667  \n",
       "44834                                    100.666667  \n",
       "227519  Scores - APACHE IV (2)     None  100.666667  \n",
       "44706                                    100.666667  \n",
       "42892                                    100.666667  \n",
       "45415                                    100.666667  \n",
       "43987                                    100.666667  \n",
       "42666                                     96.666667  \n",
       "44237                                     96.666667  \n",
       "42592                                     95.333333  \n",
       "40069                                     95.333333  \n",
       "43931                                     93.333333  \n",
       "43348                                     93.333333  \n",
       "40405                                     93.333333  \n",
       "42810                                     93.333333  \n",
       "46180                                     93.333333  \n",
       "40055                                     93.333333  \n",
       "44253                                     93.333333  \n",
       "44325                                     93.333333  \n",
       "41857                                     93.333333  \n",
       "44824                                     93.333333  \n",
       "45991                                     93.333333  \n",
       "42042                                     93.333333  \n",
       "46177                                     93.333333  \n",
       "44684                                     93.333333  \n",
       "46658                                     93.333333  \n",
       "46578                                     93.333333  \n",
       "42765                                     91.333333  \n",
       "3819                     Urine            90.666667  \n",
       "43462                                     90.666667  \n",
       "3816                     Urine            90.666667  \n",
       "3817                     Urine            90.666667  \n",
       "3818                     Urine            90.666667  \n",
       "3822                     Urine            90.666667  \n",
       "6298                                      90.666667  "
      ]
     },
     "execution_count": 87,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "out_df.drop(227519,axis=0,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.OUTPUT_URINE] = out_df.loc[:46578].index.unique().tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://github.com/MIT-LCP/mimic-code/blob/travis/concepts/cookbook/uo.sql"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.OUTPUT_URINE] = keep_dict[data_dict.labels.OUTPUT_URINE]\n",
    "to_add = [43175,40094,40715,40473,40085,40057,40056,40428,40086,40096,40651]\n",
    "to_add += [227510,226561,226584,226563,226564,226565,226567,226557,226558  ]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Glasgow Coma Scale (GCS)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Eye Opening"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>184</th>\n",
       "      <td>Eye Opening</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220739</th>\n",
       "      <td>GCS - Eye Opening</td>\n",
       "      <td>Eye Opening</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>GCS Total</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70033</th>\n",
       "      <td>EYE</td>\n",
       "      <td></td>\n",
       "      <td>microbiologyevents</td>\n",
       "      <td>SPECIMEN</td>\n",
       "      <td></td>\n",
       "      <td>72.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3742</th>\n",
       "      <td>Basos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3791</th>\n",
       "      <td>Polys</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3779</th>\n",
       "      <td>Monos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2834</th>\n",
       "      <td>ICS</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227011</th>\n",
       "      <td>GCSEye_ApacheIV</td>\n",
       "      <td>GCSEye_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>65.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226755</th>\n",
       "      <td>GcsApacheIIScore</td>\n",
       "      <td>GcsApacheIIScore</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>None</td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41556</th>\n",
       "      <td>ng</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>64.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227012</th>\n",
       "      <td>GCSMotor_ApacheIV</td>\n",
       "      <td>GCSMotor_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227013</th>\n",
       "      <td>GcsScore_ApacheIV</td>\n",
       "      <td>GcsScore_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>None</td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5700</th>\n",
       "      <td>Gent eye oint</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>63.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227014</th>\n",
       "      <td>GCSVerbal_ApacheIV</td>\n",
       "      <td>GCSVerbal_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>62.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50931</th>\n",
       "      <td>GLUCOSE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1529</th>\n",
       "      <td>Glucose</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Chemistry</td>\n",
       "      <td></td>\n",
       "      <td>62.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50809</th>\n",
       "      <td>GLUCOSE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51478</th>\n",
       "      <td>GLUCOSE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>62.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223901</th>\n",
       "      <td>GCS - Motor Response</td>\n",
       "      <td>Motor Response</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>61.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       label        abbreviation             linksto  \\\n",
       "itemid                                                                 \n",
       "184              Eye Opening                             chartevents   \n",
       "220739     GCS - Eye Opening         Eye Opening                       \n",
       "198                GCS Total                             chartevents   \n",
       "70033                    EYE                      microbiologyevents   \n",
       "3742                   Basos                             chartevents   \n",
       "3791                   Polys                             chartevents   \n",
       "3779                   Monos                             chartevents   \n",
       "2834                     ICS                             chartevents   \n",
       "227011       GCSEye_ApacheIV     GCSEye_ApacheIV                       \n",
       "226755      GcsApacheIIScore    GcsApacheIIScore         chartevents   \n",
       "41556                     ng                          inputevents_cv   \n",
       "227012     GCSMotor_ApacheIV   GCSMotor_ApacheIV                       \n",
       "227013     GcsScore_ApacheIV   GcsScore_ApacheIV         chartevents   \n",
       "5700           Gent eye oint                             chartevents   \n",
       "227014    GCSVerbal_ApacheIV  GCSVerbal_ApacheIV                       \n",
       "50931                GLUCOSE                 NaN           labevents   \n",
       "1529                 Glucose                             chartevents   \n",
       "50809                GLUCOSE                 NaN           labevents   \n",
       "51478                GLUCOSE                 NaN           labevents   \n",
       "223901  GCS - Motor Response      Motor Response                       \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "184                                      110.000000  \n",
       "220739            Neurological           110.000000  \n",
       "198                                       76.666667  \n",
       "70033                 SPECIMEN            72.000000  \n",
       "3742                       CSF            71.333333  \n",
       "3791                       CSF            71.333333  \n",
       "3779                       CSF            71.333333  \n",
       "2834                                      67.000000  \n",
       "227011  Scores - APACHE IV (2)            65.333333  \n",
       "226755      Scores - APACHE II     None   64.666667  \n",
       "41556                                     64.000000  \n",
       "227012  Scores - APACHE IV (2)            63.333333  \n",
       "227013  Scores - APACHE IV (2)     None   63.333333  \n",
       "5700                                      63.000000  \n",
       "227014  Scores - APACHE IV (2)            62.666667  \n",
       "50931                CHEMISTRY      NaN   62.333333  \n",
       "1529                 Chemistry            62.333333  \n",
       "50809                BLOOD GAS      NaN   62.333333  \n",
       "51478               HEMATOLOGY      NaN   62.333333  \n",
       "223901            Neurological            61.666667  "
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'glasgow come scale',\n",
    "        'GCS',\n",
    "        'eye opening'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.GLASGOW_COMA_SCALE_EYE_OPENING] = [184,220739]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Motor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>228405</th>\n",
       "      <td>Motor L Leg</td>\n",
       "      <td>Motor L Leg</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>85.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228404</th>\n",
       "      <td>Motor L Arm</td>\n",
       "      <td>Motor L Arm</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>85.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228407</th>\n",
       "      <td>Motor R Leg</td>\n",
       "      <td>Motor R Leg</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>85.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228406</th>\n",
       "      <td>Motor R Arm</td>\n",
       "      <td>Motor R Arm</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>85.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227120</th>\n",
       "      <td>Motor Deficit</td>\n",
       "      <td>Motor Deficit</td>\n",
       "      <td></td>\n",
       "      <td>Pain/Sedation</td>\n",
       "      <td></td>\n",
       "      <td>80.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>453</th>\n",
       "      <td>Motor Deficits</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>454</th>\n",
       "      <td>Motor Response</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>223901</th>\n",
       "      <td>GCS - Motor Response</td>\n",
       "      <td>Motor Response</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>GCS Total</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227012</th>\n",
       "      <td>GCSMotor_ApacheIV</td>\n",
       "      <td>GCSMotor_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>73.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3779</th>\n",
       "      <td>Monos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3742</th>\n",
       "      <td>Basos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3791</th>\n",
       "      <td>Polys</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226757</th>\n",
       "      <td>GCSMotorApacheIIValue</td>\n",
       "      <td>GCSMotorApacheIIValue</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td></td>\n",
       "      <td>68.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2834</th>\n",
       "      <td>ICS</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225472</th>\n",
       "      <td>Pneumothorax</td>\n",
       "      <td>Pneumothorax</td>\n",
       "      <td>procedureevents_mv</td>\n",
       "      <td>3-Significant Events</td>\n",
       "      <td>None</td>\n",
       "      <td>66.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227011</th>\n",
       "      <td>GCSEye_ApacheIV</td>\n",
       "      <td>GCSEye_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>65.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226755</th>\n",
       "      <td>GcsApacheIIScore</td>\n",
       "      <td>GcsApacheIIScore</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td>None</td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220739</th>\n",
       "      <td>GCS - Eye Opening</td>\n",
       "      <td>Eye Opening</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>64.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227013</th>\n",
       "      <td>GcsScore_ApacheIV</td>\n",
       "      <td>GcsScore_ApacheIV</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td>None</td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                        label           abbreviation             linksto  \\\n",
       "itemid                                                                     \n",
       "228405            Motor L Leg            Motor L Leg                       \n",
       "228404            Motor L Arm            Motor L Arm                       \n",
       "228407            Motor R Leg            Motor R Leg                       \n",
       "228406            Motor R Arm            Motor R Arm                       \n",
       "227120          Motor Deficit          Motor Deficit                       \n",
       "453            Motor Deficits                                chartevents   \n",
       "454            Motor Response                                chartevents   \n",
       "223901   GCS - Motor Response         Motor Response                       \n",
       "198                 GCS Total                                chartevents   \n",
       "227012      GCSMotor_ApacheIV      GCSMotor_ApacheIV                       \n",
       "3779                    Monos                                chartevents   \n",
       "3742                    Basos                                chartevents   \n",
       "3791                    Polys                                chartevents   \n",
       "226757  GCSMotorApacheIIValue  GCSMotorApacheIIValue                       \n",
       "2834                      ICS                                chartevents   \n",
       "225472           Pneumothorax           Pneumothorax  procedureevents_mv   \n",
       "227011        GCSEye_ApacheIV        GCSEye_ApacheIV                       \n",
       "226755       GcsApacheIIScore       GcsApacheIIScore         chartevents   \n",
       "220739      GCS - Eye Opening            Eye Opening                       \n",
       "227013      GcsScore_ApacheIV      GcsScore_ApacheIV         chartevents   \n",
       "\n",
       "                      category unitname      score  \n",
       "itemid                                              \n",
       "228405            Neurological           85.333333  \n",
       "228404            Neurological           85.333333  \n",
       "228407            Neurological           85.333333  \n",
       "228406            Neurological           85.333333  \n",
       "227120           Pain/Sedation           80.666667  \n",
       "453                                      78.666667  \n",
       "454                                      78.666667  \n",
       "223901            Neurological           78.666667  \n",
       "198                                      76.666667  \n",
       "227012  Scores - APACHE IV (2)           73.333333  \n",
       "3779                       CSF           71.333333  \n",
       "3742                       CSF           71.333333  \n",
       "3791                       CSF           71.333333  \n",
       "226757      Scores - APACHE II           68.666667  \n",
       "2834                                     67.000000  \n",
       "225472    3-Significant Events     None  66.000000  \n",
       "227011  Scores - APACHE IV (2)           65.333333  \n",
       "226755      Scores - APACHE II     None  64.666667  \n",
       "220739            Neurological           64.333333  \n",
       "227013  Scores - APACHE IV (2)     None  63.333333  "
      ]
     },
     "execution_count": 101,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'glasgow come scale',\n",
    "        'GCS',\n",
    "        'motor',\n",
    "        'motor response'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.GLASGOW_COMA_SCALE_MOTOR] = [454,223901]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Verbal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>223900</th>\n",
       "      <td>GCS - Verbal Response</td>\n",
       "      <td>Verbal Response</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>723</th>\n",
       "      <td>Verbal Response</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224756</th>\n",
       "      <td>Response</td>\n",
       "      <td>Response</td>\n",
       "      <td></td>\n",
       "      <td>Neurological</td>\n",
       "      <td></td>\n",
       "      <td>90.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41610</th>\n",
       "      <td>ER</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227014</th>\n",
       "      <td>GCSVerbal_ApacheIV</td>\n",
       "      <td>GCSVerbal_ApacheIV</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE IV (2)</td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44473</th>\n",
       "      <td>er</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>198</th>\n",
       "      <td>GCS Total</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226758</th>\n",
       "      <td>GCSVerbalApacheIIValue</td>\n",
       "      <td>GCSVerbalApacheIIValue</td>\n",
       "      <td></td>\n",
       "      <td>Scores - APACHE II</td>\n",
       "      <td></td>\n",
       "      <td>72.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3779</th>\n",
       "      <td>Monos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3742</th>\n",
       "      <td>Basos</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3791</th>\n",
       "      <td>Polys</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>CSF</td>\n",
       "      <td></td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224413</th>\n",
       "      <td>TOF Response</td>\n",
       "      <td>TOF Response</td>\n",
       "      <td></td>\n",
       "      <td>Pain/Sedation</td>\n",
       "      <td></td>\n",
       "      <td>69.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40450</th>\n",
       "      <td>EBL</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41693</th>\n",
       "      <td>Verapamil</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1968</th>\n",
       "      <td>Verapamil</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42047</th>\n",
       "      <td>ebl</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222318</th>\n",
       "      <td>Verapamil</td>\n",
       "      <td>Verapamil</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46484</th>\n",
       "      <td>verapamil</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2834</th>\n",
       "      <td>ICS</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224409</th>\n",
       "      <td>Pain Level Response</td>\n",
       "      <td>Pain Level Response</td>\n",
       "      <td></td>\n",
       "      <td>Pain/Sedation</td>\n",
       "      <td></td>\n",
       "      <td>66.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                         label            abbreviation         linksto  \\\n",
       "itemid                                                                   \n",
       "223900   GCS - Verbal Response         Verbal Response                   \n",
       "723            Verbal Response                             chartevents   \n",
       "224756                Response                Response                   \n",
       "41610                       ER                          inputevents_cv   \n",
       "227014      GCSVerbal_ApacheIV      GCSVerbal_ApacheIV                   \n",
       "44473                       er                          inputevents_cv   \n",
       "198                  GCS Total                             chartevents   \n",
       "226758  GCSVerbalApacheIIValue  GCSVerbalApacheIIValue                   \n",
       "3779                     Monos                             chartevents   \n",
       "3742                     Basos                             chartevents   \n",
       "3791                     Polys                             chartevents   \n",
       "224413            TOF Response            TOF Response                   \n",
       "40450                      EBL                          inputevents_cv   \n",
       "41693                Verapamil                          inputevents_cv   \n",
       "1968                 Verapamil                             chartevents   \n",
       "42047                      ebl                          inputevents_cv   \n",
       "222318               Verapamil               Verapamil  inputevents_mv   \n",
       "46484                verapamil                          inputevents_cv   \n",
       "2834                       ICS                             chartevents   \n",
       "224409     Pain Level Response     Pain Level Response                   \n",
       "\n",
       "                      category unitname       score  \n",
       "itemid                                               \n",
       "223900            Neurological           110.000000  \n",
       "723                                      110.000000  \n",
       "224756            Neurological            90.000000  \n",
       "41610         Free Form Intake            76.666667  \n",
       "227014  Scores - APACHE IV (2)            76.666667  \n",
       "44473         Free Form Intake            76.666667  \n",
       "198                                       76.666667  \n",
       "226758      Scores - APACHE II            72.000000  \n",
       "3779                       CSF            71.333333  \n",
       "3742                       CSF            71.333333  \n",
       "3791                       CSF            71.333333  \n",
       "224413           Pain/Sedation            69.666667  \n",
       "40450                                     67.000000  \n",
       "41693         Free Form Intake            67.000000  \n",
       "1968                                      67.000000  \n",
       "42047                                     67.000000  \n",
       "222318             Medications       mg   67.000000  \n",
       "46484         Free Form Intake            67.000000  \n",
       "2834                                      67.000000  \n",
       "224409           Pain/Sedation            66.000000  "
      ]
     },
     "execution_count": 103,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'glasgow come scale',\n",
    "        'GCS',\n",
    "        'verbal',\n",
    "        'verbal response'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.GLASGOW_COMA_SCALE_VERBAL] = [723,223900]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Labs\n",
    "\n",
    "### Lactate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1531</th>\n",
       "      <td>Lactic Acid</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Chemistry</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225668</th>\n",
       "      <td>Lactic Acid</td>\n",
       "      <td>Lactic Acid</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>None</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50813</th>\n",
       "      <td>LACTATE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220228</th>\n",
       "      <td>Hemoglobin</td>\n",
       "      <td>Hemoglobin</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>g/dl</td>\n",
       "      <td>102.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2773</th>\n",
       "      <td>sjlactate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>102.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225835</th>\n",
       "      <td>Na Phos</td>\n",
       "      <td>Na Phos</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mmol</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225834</th>\n",
       "      <td>K Phos</td>\n",
       "      <td>K Phos</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mmol</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227526</th>\n",
       "      <td>Citrate</td>\n",
       "      <td>Citrate</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mmol</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225925</th>\n",
       "      <td>Potassium Phosphate</td>\n",
       "      <td>Potassium Phosphate</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Nutrition - Supplements</td>\n",
       "      <td>mmol</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>818</th>\n",
       "      <td>Lactic Acid(0.5-2.0)</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Chemistry</td>\n",
       "      <td></td>\n",
       "      <td>91.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220955</th>\n",
       "      <td>Ringers Lactate</td>\n",
       "      <td>Ringers Lactate</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>86.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2638</th>\n",
       "      <td>CEREBRAL LACTATE</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1634</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30021</th>\n",
       "      <td>Lactated Ringers</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1520</th>\n",
       "      <td>ACT</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Coags</td>\n",
       "      <td></td>\n",
       "      <td>83.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220507</th>\n",
       "      <td>Activated Clotting Time</td>\n",
       "      <td>ACT</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>None</td>\n",
       "      <td>83.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1671</th>\n",
       "      <td>act</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>83.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221319</th>\n",
       "      <td>Alteplase (TPA)</td>\n",
       "      <td>Alteplase (TPA)</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221347</th>\n",
       "      <td>Amiodarone</td>\n",
       "      <td>Amiodarone</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221342</th>\n",
       "      <td>Aminophylline</td>\n",
       "      <td>Aminophylline</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          label         abbreviation         linksto  \\\n",
       "itemid                                                                 \n",
       "1531                Lactic Acid                          chartevents   \n",
       "225668              Lactic Acid          Lactic Acid     chartevents   \n",
       "50813                   LACTATE                  NaN       labevents   \n",
       "220228               Hemoglobin           Hemoglobin     chartevents   \n",
       "2773                  sjlactate                          chartevents   \n",
       "225835                  Na Phos              Na Phos  inputevents_mv   \n",
       "225834                   K Phos               K Phos  inputevents_mv   \n",
       "227526                  Citrate              Citrate  inputevents_mv   \n",
       "225925      Potassium Phosphate  Potassium Phosphate  inputevents_mv   \n",
       "818        Lactic Acid(0.5-2.0)                          chartevents   \n",
       "220955          Ringers Lactate      Ringers Lactate  inputevents_mv   \n",
       "2638           CEREBRAL LACTATE                          chartevents   \n",
       "1634           lactated ringers                          chartevents   \n",
       "30021          Lactated Ringers                       inputevents_cv   \n",
       "1520                        ACT                          chartevents   \n",
       "220507  Activated Clotting Time                  ACT     chartevents   \n",
       "1671                        act                          chartevents   \n",
       "221319          Alteplase (TPA)      Alteplase (TPA)  inputevents_mv   \n",
       "221347               Amiodarone           Amiodarone  inputevents_mv   \n",
       "221342            Aminophylline        Aminophylline  inputevents_mv   \n",
       "\n",
       "                           category unitname       score  \n",
       "itemid                                                    \n",
       "1531                      Chemistry           110.000000  \n",
       "225668                         Labs     None  110.000000  \n",
       "50813                     BLOOD GAS      NaN  110.000000  \n",
       "220228                         Labs     g/dl  102.666667  \n",
       "2773                                          102.000000  \n",
       "225835                  Medications     mmol   96.666667  \n",
       "225834                  Medications     mmol   96.666667  \n",
       "227526                  Medications     mmol   96.666667  \n",
       "225925      Nutrition - Supplements     mmol   96.666667  \n",
       "818                       Chemistry            91.333333  \n",
       "220955  Fluids - Other (Not In Use)       mL   86.000000  \n",
       "2638                                           84.000000  \n",
       "1634                                           84.000000  \n",
       "30021                                          84.000000  \n",
       "1520                          Coags            83.333333  \n",
       "220507                         Labs     None   83.333333  \n",
       "1671                                           83.333333  \n",
       "221319                  Medications       mg   81.333333  \n",
       "221347                  Medications       mg   81.333333  \n",
       "221342                  Medications       mg   81.333333  "
      ]
     },
     "execution_count": 105,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'lactate',\n",
    "        'lactic acid',\n",
    "        'mmol/L',\n",
    "        'mg/dL'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.LACTATE] = [1531,50813,225668,818]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hemoglobin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>51222</th>\n",
       "      <td>HEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220228</th>\n",
       "      <td>Hemoglobin</td>\n",
       "      <td>Hemoglobin</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>g/dl</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>814</th>\n",
       "      <td>Hemoglobin</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Hematology</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1165</th>\n",
       "      <td>Hgb</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50811</th>\n",
       "      <td>HEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51225</th>\n",
       "      <td>HEMOGLOBIN F</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>104.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51224</th>\n",
       "      <td>HEMOGLOBIN C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>104.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50814</th>\n",
       "      <td>METHEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51223</th>\n",
       "      <td>HEMOGLOBIN A2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7965</th>\n",
       "      <td>methhemoglobin</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50852</th>\n",
       "      <td>% HEMOGLOBIN A1C</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51212</th>\n",
       "      <td>FETAL HEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50805</th>\n",
       "      <td>CARBOXYHEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50855</th>\n",
       "      <td>ABSOLUTE HEMOGLOBIN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51226</th>\n",
       "      <td>HEMOGLOBLIN A</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51227</th>\n",
       "      <td>HEMOGLOBLIN S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42232</th>\n",
       "      <td>THYMOGLOBLIN</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45486</th>\n",
       "      <td>Hemo</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42117</th>\n",
       "      <td>THYMOGLOBULIN</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>78.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50934</th>\n",
       "      <td>H</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>76.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      label abbreviation         linksto          category  \\\n",
       "itemid                                                                       \n",
       "51222            HEMOGLOBIN          NaN       labevents        HEMATOLOGY   \n",
       "220228           Hemoglobin   Hemoglobin     chartevents              Labs   \n",
       "814              Hemoglobin                  chartevents        Hematology   \n",
       "1165                    Hgb                  chartevents                     \n",
       "50811            HEMOGLOBIN          NaN       labevents         BLOOD GAS   \n",
       "51225          HEMOGLOBIN F          NaN       labevents        HEMATOLOGY   \n",
       "51224          HEMOGLOBIN C          NaN       labevents        HEMATOLOGY   \n",
       "50814         METHEMOGLOBIN          NaN       labevents         BLOOD GAS   \n",
       "51223         HEMOGLOBIN A2          NaN       labevents        HEMATOLOGY   \n",
       "7965         methhemoglobin                  chartevents                     \n",
       "50852      % HEMOGLOBIN A1C          NaN       labevents         CHEMISTRY   \n",
       "51212      FETAL HEMOGLOBIN          NaN       labevents        HEMATOLOGY   \n",
       "50805     CARBOXYHEMOGLOBIN          NaN       labevents         BLOOD GAS   \n",
       "50855   ABSOLUTE HEMOGLOBIN          NaN       labevents         CHEMISTRY   \n",
       "51226         HEMOGLOBLIN A          NaN       labevents        HEMATOLOGY   \n",
       "51227         HEMOGLOBLIN S          NaN       labevents        HEMATOLOGY   \n",
       "42232          THYMOGLOBLIN               inputevents_cv  Free Form Intake   \n",
       "45486                  Hemo               inputevents_cv                     \n",
       "42117         THYMOGLOBULIN               inputevents_cv  Free Form Intake   \n",
       "50934                     H          NaN       labevents         CHEMISTRY   \n",
       "\n",
       "       unitname       score  \n",
       "itemid                       \n",
       "51222       NaN  110.000000  \n",
       "220228     g/dl  110.000000  \n",
       "814              110.000000  \n",
       "1165             110.000000  \n",
       "50811       NaN  110.000000  \n",
       "51225       NaN  104.000000  \n",
       "51224       NaN  104.000000  \n",
       "50814       NaN  101.333333  \n",
       "51223       NaN  101.333333  \n",
       "7965              98.666667  \n",
       "50852       NaN   96.666667  \n",
       "51212       NaN   94.666667  \n",
       "50805       NaN   92.666667  \n",
       "50855       NaN   89.333333  \n",
       "51226       NaN   88.000000  \n",
       "51227       NaN   88.000000  \n",
       "42232             81.333333  \n",
       "45486             81.333333  \n",
       "42117             78.666667  \n",
       "50934       NaN   76.666667  "
      ]
     },
     "execution_count": 107,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'hgb',\n",
    "        'hemoglobin',\n",
    "        'g/dL'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.HEMOGLOBIN] = [51222,220228,814,1165,50811]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fluids\n",
    "\n",
    "### Normal Saline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>41913</th>\n",
       "      <td>NS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6190</th>\n",
       "      <td>Normal saline</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30143</th>\n",
       "      <td>3% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>104.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30168</th>\n",
       "      <td>Normal Saline_GU</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30160</th>\n",
       "      <td>D5 Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30161</th>\n",
       "      <td>.3% normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30018</th>\n",
       "      <td>.9% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30020</th>\n",
       "      <td>.45% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30176</th>\n",
       "      <td>.25% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30352</th>\n",
       "      <td>0.9% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220962</th>\n",
       "      <td>Saline 3%</td>\n",
       "      <td>Saline 3%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>98.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30353</th>\n",
       "      <td>0.45% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44440</th>\n",
       "      <td>Normal Saline Bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44053</th>\n",
       "      <td>normal saline bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4647</th>\n",
       "      <td>normal saline bolus</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43354</th>\n",
       "      <td>normal saline flushs</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>96.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221213</th>\n",
       "      <td>Saline 30%</td>\n",
       "      <td>Saline 30%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>95.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220959</th>\n",
       "      <td>Saline 0,3%</td>\n",
       "      <td>Saline 0,3%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220954</th>\n",
       "      <td>Saline 0,9%</td>\n",
       "      <td>Saline 0,9%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220960</th>\n",
       "      <td>Saline 0,45%</td>\n",
       "      <td>Saline 0,45%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       label  abbreviation         linksto  \\\n",
       "itemid                                                       \n",
       "41913                     NS                inputevents_cv   \n",
       "6190           Normal saline                   chartevents   \n",
       "30143       3% Normal Saline                inputevents_cv   \n",
       "30168       Normal Saline_GU                inputevents_cv   \n",
       "30160       D5 Normal Saline                inputevents_cv   \n",
       "30161      .3% normal Saline                inputevents_cv   \n",
       "30018      .9% Normal Saline                inputevents_cv   \n",
       "30020     .45% Normal Saline                inputevents_cv   \n",
       "30176     .25% Normal Saline                inputevents_cv   \n",
       "30352     0.9% Normal Saline                inputevents_cv   \n",
       "220962             Saline 3%     Saline 3%  inputevents_mv   \n",
       "30353    0.45% Normal Saline                inputevents_cv   \n",
       "44440    Normal Saline Bolus                inputevents_cv   \n",
       "44053    normal saline bolus                inputevents_cv   \n",
       "4647     normal saline bolus                   chartevents   \n",
       "43354   normal saline flushs                inputevents_cv   \n",
       "221213            Saline 30%    Saline 30%  inputevents_mv   \n",
       "220959           Saline 0,3%   Saline 0,3%  inputevents_mv   \n",
       "220954           Saline 0,9%   Saline 0,9%  inputevents_mv   \n",
       "220960          Saline 0,45%  Saline 0,45%  inputevents_mv   \n",
       "\n",
       "                           category unitname       score  \n",
       "itemid                                                    \n",
       "41913              Free Form Intake           110.000000  \n",
       "6190                                          110.000000  \n",
       "30143                                         104.333333  \n",
       "30168                                         103.333333  \n",
       "30160                                         103.333333  \n",
       "30161                                         103.333333  \n",
       "30018                                         103.333333  \n",
       "30020                                         101.333333  \n",
       "30176                                         101.333333  \n",
       "30352                                         100.333333  \n",
       "220962  Fluids - Other (Not In Use)       mL   98.666667  \n",
       "30353                                          98.333333  \n",
       "44440              Free Form Intake            97.333333  \n",
       "44053              Free Form Intake            97.333333  \n",
       "4647                                           97.333333  \n",
       "43354              Free Form Intake            96.000000  \n",
       "221213  Fluids - Other (Not In Use)       mL   95.000000  \n",
       "220959  Fluids - Other (Not In Use)       mL   92.000000  \n",
       "220954  Fluids - Other (Not In Use)       mL   92.000000  \n",
       "220960  Fluids - Other (Not In Use)       mL   89.333333  "
      ]
     },
     "execution_count": 109,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'saline',\n",
    "        'NS',\n",
    "        '0.9%',\n",
    "        'normal saline'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.NORMAL_SALINE] = [41913,6190,20018,30252,44440,44053,4647,220954]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>41913</th>\n",
       "      <td>NS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6190</th>\n",
       "      <td>Normal saline</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30143</th>\n",
       "      <td>3% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>104.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30168</th>\n",
       "      <td>Normal Saline_GU</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30160</th>\n",
       "      <td>D5 Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30161</th>\n",
       "      <td>.3% normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30018</th>\n",
       "      <td>.9% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>103.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30020</th>\n",
       "      <td>.45% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30176</th>\n",
       "      <td>.25% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>101.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30352</th>\n",
       "      <td>0.9% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>100.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220962</th>\n",
       "      <td>Saline 3%</td>\n",
       "      <td>Saline 3%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>98.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30353</th>\n",
       "      <td>0.45% Normal Saline</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44440</th>\n",
       "      <td>Normal Saline Bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44053</th>\n",
       "      <td>normal saline bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4647</th>\n",
       "      <td>normal saline bolus</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43354</th>\n",
       "      <td>normal saline flushs</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>96.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221213</th>\n",
       "      <td>Saline 30%</td>\n",
       "      <td>Saline 30%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>95.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220959</th>\n",
       "      <td>Saline 0,3%</td>\n",
       "      <td>Saline 0,3%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220954</th>\n",
       "      <td>Saline 0,9%</td>\n",
       "      <td>Saline 0,9%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>92.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220960</th>\n",
       "      <td>Saline 0,45%</td>\n",
       "      <td>Saline 0,45%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220961</th>\n",
       "      <td>Saline 0,65%</td>\n",
       "      <td>Saline 0,65%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221212</th>\n",
       "      <td>Saline 0,18%</td>\n",
       "      <td>Saline 0,18%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>89.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41392</th>\n",
       "      <td>ns b</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225825</th>\n",
       "      <td>D5NS</td>\n",
       "      <td>D5NS</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5333</th>\n",
       "      <td>saline flush</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30060</th>\n",
       "      <td>D5NS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>226401</th>\n",
       "      <td>GU Irrigant - Normal Saline</td>\n",
       "      <td>GU Irrigant - Normal Saline</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>87.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220958</th>\n",
       "      <td>Saline 0,255%</td>\n",
       "      <td>Saline 0,255%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>86.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2619</th>\n",
       "      <td>3% NS</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227344</th>\n",
       "      <td>IV/Saline lock</td>\n",
       "      <td>IV/Saline lock</td>\n",
       "      <td></td>\n",
       "      <td>Restraint/Support Systems</td>\n",
       "      <td></td>\n",
       "      <td>83.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30190</th>\n",
       "      <td>NS .9%</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>82.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225158</th>\n",
       "      <td>NaCl 0.9%</td>\n",
       "      <td>NaCl 0.9%</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>82.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45298</th>\n",
       "      <td>ED NS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2072</th>\n",
       "      <td>Pinsp</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2107</th>\n",
       "      <td>pinsp</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7092</th>\n",
       "      <td>Tinsp</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6384</th>\n",
       "      <td>PINSP</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44498</th>\n",
       "      <td>er ns</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3204</th>\n",
       "      <td>Pinsp.</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2404</th>\n",
       "      <td>.45%ns</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>79.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                              label                 abbreviation  \\\n",
       "itemid                                                             \n",
       "41913                            NS                                \n",
       "6190                  Normal saline                                \n",
       "30143              3% Normal Saline                                \n",
       "30168              Normal Saline_GU                                \n",
       "30160              D5 Normal Saline                                \n",
       "30161             .3% normal Saline                                \n",
       "30018             .9% Normal Saline                                \n",
       "30020            .45% Normal Saline                                \n",
       "30176            .25% Normal Saline                                \n",
       "30352            0.9% Normal Saline                                \n",
       "220962                    Saline 3%                    Saline 3%   \n",
       "30353           0.45% Normal Saline                                \n",
       "44440           Normal Saline Bolus                                \n",
       "44053           normal saline bolus                                \n",
       "4647            normal saline bolus                                \n",
       "43354          normal saline flushs                                \n",
       "221213                   Saline 30%                   Saline 30%   \n",
       "220959                  Saline 0,3%                  Saline 0,3%   \n",
       "220954                  Saline 0,9%                  Saline 0,9%   \n",
       "220960                 Saline 0,45%                 Saline 0,45%   \n",
       "220961                 Saline 0,65%                 Saline 0,65%   \n",
       "221212                 Saline 0,18%                 Saline 0,18%   \n",
       "41392                          ns b                                \n",
       "225825                         D5NS                         D5NS   \n",
       "5333                   saline flush                                \n",
       "30060                          D5NS                                \n",
       "226401  GU Irrigant - Normal Saline  GU Irrigant - Normal Saline   \n",
       "220958                Saline 0,255%                Saline 0,255%   \n",
       "2619                          3% NS                                \n",
       "227344               IV/Saline lock               IV/Saline lock   \n",
       "30190                        NS .9%                                \n",
       "225158                    NaCl 0.9%                    NaCl 0.9%   \n",
       "45298                         ED NS                                \n",
       "2072                          Pinsp                                \n",
       "2107                          pinsp                                \n",
       "7092                          Tinsp                                \n",
       "6384                          PINSP                                \n",
       "44498                         er ns                                \n",
       "3204                         Pinsp.                                \n",
       "2404                         .45%ns                                \n",
       "\n",
       "               linksto                     category unitname       score  \n",
       "itemid                                                                    \n",
       "41913   inputevents_cv             Free Form Intake           110.000000  \n",
       "6190       chartevents                                        110.000000  \n",
       "30143   inputevents_cv                                        104.333333  \n",
       "30168   inputevents_cv                                        103.333333  \n",
       "30160   inputevents_cv                                        103.333333  \n",
       "30161   inputevents_cv                                        103.333333  \n",
       "30018   inputevents_cv                                        103.333333  \n",
       "30020   inputevents_cv                                        101.333333  \n",
       "30176   inputevents_cv                                        101.333333  \n",
       "30352   inputevents_cv                                        100.333333  \n",
       "220962  inputevents_mv  Fluids - Other (Not In Use)       mL   98.666667  \n",
       "30353   inputevents_cv                                         98.333333  \n",
       "44440   inputevents_cv             Free Form Intake            97.333333  \n",
       "44053   inputevents_cv             Free Form Intake            97.333333  \n",
       "4647       chartevents                                         97.333333  \n",
       "43354   inputevents_cv             Free Form Intake            96.000000  \n",
       "221213  inputevents_mv  Fluids - Other (Not In Use)       mL   95.000000  \n",
       "220959  inputevents_mv  Fluids - Other (Not In Use)       mL   92.000000  \n",
       "220954  inputevents_mv  Fluids - Other (Not In Use)       mL   92.000000  \n",
       "220960  inputevents_mv  Fluids - Other (Not In Use)       mL   89.333333  \n",
       "220961  inputevents_mv  Fluids - Other (Not In Use)       mL   89.333333  \n",
       "221212  inputevents_mv  Fluids - Other (Not In Use)       mL   89.333333  \n",
       "41392   inputevents_cv             Free Form Intake            88.000000  \n",
       "225825  inputevents_mv                Fluids/Intake       mL   88.000000  \n",
       "5333       chartevents                                         88.000000  \n",
       "30060   inputevents_cv                                         88.000000  \n",
       "226401  inputevents_mv                Fluids/Intake       mL   87.666667  \n",
       "220958  inputevents_mv  Fluids - Other (Not In Use)       mL   86.666667  \n",
       "2619       chartevents                                         84.666667  \n",
       "227344                    Restraint/Support Systems            83.333333  \n",
       "30190   inputevents_cv                                         82.333333  \n",
       "225158  inputevents_mv                Fluids/Intake       mL   82.333333  \n",
       "45298   inputevents_cv             Free Form Intake            81.333333  \n",
       "2072       chartevents                                         81.333333  \n",
       "2107       chartevents                                         81.333333  \n",
       "7092       chartevents                                         81.333333  \n",
       "6384       chartevents                                         81.333333  \n",
       "44498   inputevents_cv             Free Form Intake            81.333333  \n",
       "3204       chartevents                                         79.000000  \n",
       "2404       chartevents                                         79.000000  "
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.NORMAL_SALINE] += [30190,225158]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lactated Ringers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>225828</th>\n",
       "      <td>LR</td>\n",
       "      <td>LR</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44367</th>\n",
       "      <td>LR</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2971</th>\n",
       "      <td>LR</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1634</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30021</th>\n",
       "      <td>Lactated Ringers</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45532</th>\n",
       "      <td>IR Lactated ringers</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>104.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225943</th>\n",
       "      <td>Solution</td>\n",
       "      <td>Solution</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225827</th>\n",
       "      <td>D5LR</td>\n",
       "      <td>D5LR</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50813</th>\n",
       "      <td>LACTATE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220953</th>\n",
       "      <td>Ringers</td>\n",
       "      <td>Ringers</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46207</th>\n",
       "      <td>OR LR</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44184</th>\n",
       "      <td>LR Bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44521</th>\n",
       "      <td>LR bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46781</th>\n",
       "      <td>lr bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44815</th>\n",
       "      <td>LR BOLUS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44915</th>\n",
       "      <td>D5LR 40K</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46538</th>\n",
       "      <td>PD solution in</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>68.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44837</th>\n",
       "      <td>ED URINE</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8313</th>\n",
       "      <td>Nystatin solution</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30125</th>\n",
       "      <td>Milrinone</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      label abbreviation         linksto  \\\n",
       "itemid                                                     \n",
       "225828                   LR           LR  inputevents_mv   \n",
       "44367                    LR               inputevents_cv   \n",
       "2971                     LR                  chartevents   \n",
       "1634       lactated ringers                  chartevents   \n",
       "30021      Lactated Ringers               inputevents_cv   \n",
       "45532   IR Lactated ringers               inputevents_cv   \n",
       "225943             Solution     Solution  inputevents_mv   \n",
       "225827                 D5LR         D5LR  inputevents_mv   \n",
       "50813               LACTATE          NaN       labevents   \n",
       "220953              Ringers      Ringers  inputevents_mv   \n",
       "46207                 OR LR               inputevents_cv   \n",
       "44184              LR Bolus               inputevents_cv   \n",
       "44521              LR bolus               inputevents_cv   \n",
       "46781              lr bolus               inputevents_cv   \n",
       "44815              LR BOLUS               inputevents_cv   \n",
       "44915              D5LR 40K               inputevents_cv   \n",
       "46538        PD solution in               inputevents_cv   \n",
       "44837              ED URINE                 outputevents   \n",
       "8313      Nystatin solution                  chartevents   \n",
       "30125             Milrinone               inputevents_cv   \n",
       "\n",
       "                           category unitname       score  \n",
       "itemid                                                    \n",
       "225828                Fluids/Intake       mL  110.000000  \n",
       "44367              Free Form Intake           110.000000  \n",
       "2971                                          110.000000  \n",
       "1634                                          110.000000  \n",
       "30021                                         110.000000  \n",
       "45532              Free Form Intake           104.000000  \n",
       "225943                Fluids/Intake       mL   88.000000  \n",
       "225827                Fluids/Intake       mL   88.000000  \n",
       "50813                     BLOOD GAS      NaN   84.000000  \n",
       "220953  Fluids - Other (Not In Use)       mL   84.000000  \n",
       "46207              Free Form Intake            81.333333  \n",
       "44184              Free Form Intake            70.000000  \n",
       "44521              Free Form Intake            70.000000  \n",
       "46781              Free Form Intake            70.000000  \n",
       "44815              Free Form Intake            70.000000  \n",
       "44915              Free Form Intake            70.000000  \n",
       "46538              Free Form Intake            68.333333  \n",
       "44837                                          68.000000  \n",
       "8313                                           67.666667  \n",
       "30125                                          67.333333  "
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'LR',\n",
    "        'ringers solution',\n",
    "        'lactated ringers'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>225828</th>\n",
       "      <td>LR</td>\n",
       "      <td>LR</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44367</th>\n",
       "      <td>LR</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2971</th>\n",
       "      <td>LR</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1634</th>\n",
       "      <td>lactated ringers</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30021</th>\n",
       "      <td>Lactated Ringers</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45532</th>\n",
       "      <td>IR Lactated ringers</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>104.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225943</th>\n",
       "      <td>Solution</td>\n",
       "      <td>Solution</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225827</th>\n",
       "      <td>D5LR</td>\n",
       "      <td>D5LR</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids/Intake</td>\n",
       "      <td>mL</td>\n",
       "      <td>88.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50813</th>\n",
       "      <td>LACTATE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220953</th>\n",
       "      <td>Ringers</td>\n",
       "      <td>Ringers</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>84.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46207</th>\n",
       "      <td>OR LR</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>81.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44184</th>\n",
       "      <td>LR Bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44521</th>\n",
       "      <td>LR bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46781</th>\n",
       "      <td>lr bolus</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44815</th>\n",
       "      <td>LR BOLUS</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44915</th>\n",
       "      <td>D5LR 40K</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46538</th>\n",
       "      <td>PD solution in</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>68.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44837</th>\n",
       "      <td>ED URINE</td>\n",
       "      <td></td>\n",
       "      <td>outputevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8313</th>\n",
       "      <td>Nystatin solution</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30125</th>\n",
       "      <td>Milrinone</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221986</th>\n",
       "      <td>Milrinone</td>\n",
       "      <td>Milrinone</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>67.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30159</th>\n",
       "      <td>D5 Ringers Lact.</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>67.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228159</th>\n",
       "      <td>Purge Solution Flow Rate</td>\n",
       "      <td>Purge Solution Flow Rate</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Impella</td>\n",
       "      <td>ml/hr</td>\n",
       "      <td>66.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225953</th>\n",
       "      <td>Solution (Peritoneal Dialysis)</td>\n",
       "      <td>Solution (PD)</td>\n",
       "      <td></td>\n",
       "      <td>Dialysis</td>\n",
       "      <td></td>\n",
       "      <td>66.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45983</th>\n",
       "      <td>Pitocin/LR</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>65.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42409</th>\n",
       "      <td>D5LR W/40K</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>65.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42978</th>\n",
       "      <td>D5LR 20KCL</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>65.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1189</th>\n",
       "      <td>finger stick</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>65.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5743</th>\n",
       "      <td>NT suction</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1520</th>\n",
       "      <td>ACT</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Coags</td>\n",
       "      <td></td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220507</th>\n",
       "      <td>Activated Clotting Time</td>\n",
       "      <td>ACT</td>\n",
       "      <td>chartevents</td>\n",
       "      <td>Labs</td>\n",
       "      <td>None</td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1671</th>\n",
       "      <td>act</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>64.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8339</th>\n",
       "      <td>Neo Opium Solution</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>64.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42288</th>\n",
       "      <td>LR w/40 kcl</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>64.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225072</th>\n",
       "      <td>Living situation</td>\n",
       "      <td>Living situation</td>\n",
       "      <td></td>\n",
       "      <td>Adm History/FHPA</td>\n",
       "      <td></td>\n",
       "      <td>63.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42345</th>\n",
       "      <td>LR w/ 40 mEq</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2773</th>\n",
       "      <td>sjlactate</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5088</th>\n",
       "      <td>fingers</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42265</th>\n",
       "      <td>LR W/ 20 KCL</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>63.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>220955</th>\n",
       "      <td>Ringers Lactate</td>\n",
       "      <td>Ringers Lactate</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Fluids - Other (Not In Use)</td>\n",
       "      <td>mL</td>\n",
       "      <td>63.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 label              abbreviation  \\\n",
       "itemid                                                             \n",
       "225828                              LR                        LR   \n",
       "44367                               LR                             \n",
       "2971                                LR                             \n",
       "1634                  lactated ringers                             \n",
       "30021                 Lactated Ringers                             \n",
       "45532              IR Lactated ringers                             \n",
       "225943                        Solution                  Solution   \n",
       "225827                            D5LR                      D5LR   \n",
       "50813                          LACTATE                       NaN   \n",
       "220953                         Ringers                   Ringers   \n",
       "46207                            OR LR                             \n",
       "44184                         LR Bolus                             \n",
       "44521                         LR bolus                             \n",
       "46781                         lr bolus                             \n",
       "44815                         LR BOLUS                             \n",
       "44915                         D5LR 40K                             \n",
       "46538                   PD solution in                             \n",
       "44837                         ED URINE                             \n",
       "8313                 Nystatin solution                             \n",
       "30125                        Milrinone                             \n",
       "221986                       Milrinone                 Milrinone   \n",
       "30159                 D5 Ringers Lact.                             \n",
       "228159        Purge Solution Flow Rate  Purge Solution Flow Rate   \n",
       "225953  Solution (Peritoneal Dialysis)             Solution (PD)   \n",
       "45983                       Pitocin/LR                             \n",
       "42409                       D5LR W/40K                             \n",
       "42978                       D5LR 20KCL                             \n",
       "1189                      finger stick                             \n",
       "5743                        NT suction                             \n",
       "1520                               ACT                             \n",
       "220507         Activated Clotting Time                       ACT   \n",
       "1671                               act                             \n",
       "8339                Neo Opium Solution                             \n",
       "42288                      LR w/40 kcl                             \n",
       "225072                Living situation          Living situation   \n",
       "42345                     LR w/ 40 mEq                             \n",
       "2773                         sjlactate                             \n",
       "5088                           fingers                             \n",
       "42265                     LR W/ 20 KCL                             \n",
       "220955                 Ringers Lactate           Ringers Lactate   \n",
       "\n",
       "               linksto                     category unitname       score  \n",
       "itemid                                                                    \n",
       "225828  inputevents_mv                Fluids/Intake       mL  110.000000  \n",
       "44367   inputevents_cv             Free Form Intake           110.000000  \n",
       "2971       chartevents                                        110.000000  \n",
       "1634       chartevents                                        110.000000  \n",
       "30021   inputevents_cv                                        110.000000  \n",
       "45532   inputevents_cv             Free Form Intake           104.000000  \n",
       "225943  inputevents_mv                Fluids/Intake       mL   88.000000  \n",
       "225827  inputevents_mv                Fluids/Intake       mL   88.000000  \n",
       "50813        labevents                    BLOOD GAS      NaN   84.000000  \n",
       "220953  inputevents_mv  Fluids - Other (Not In Use)       mL   84.000000  \n",
       "46207   inputevents_cv             Free Form Intake            81.333333  \n",
       "44184   inputevents_cv             Free Form Intake            70.000000  \n",
       "44521   inputevents_cv             Free Form Intake            70.000000  \n",
       "46781   inputevents_cv             Free Form Intake            70.000000  \n",
       "44815   inputevents_cv             Free Form Intake            70.000000  \n",
       "44915   inputevents_cv             Free Form Intake            70.000000  \n",
       "46538   inputevents_cv             Free Form Intake            68.333333  \n",
       "44837     outputevents                                         68.000000  \n",
       "8313       chartevents                                         67.666667  \n",
       "30125   inputevents_cv                                         67.333333  \n",
       "221986  inputevents_mv                  Medications       mg   67.333333  \n",
       "30159   inputevents_cv                                         67.333333  \n",
       "228159     chartevents                      Impella    ml/hr   66.666667  \n",
       "225953                                     Dialysis            66.000000  \n",
       "45983   inputevents_cv             Free Form Intake            65.333333  \n",
       "42409   inputevents_cv             Free Form Intake            65.333333  \n",
       "42978   inputevents_cv             Free Form Intake            65.333333  \n",
       "1189       chartevents                                         65.000000  \n",
       "5743       chartevents                                         64.666667  \n",
       "1520       chartevents                        Coags            64.666667  \n",
       "220507     chartevents                         Labs     None   64.666667  \n",
       "1671       chartevents                                         64.666667  \n",
       "8339       chartevents                                         64.333333  \n",
       "42288   inputevents_cv             Free Form Intake            64.000000  \n",
       "225072                             Adm History/FHPA            63.666667  \n",
       "42345   inputevents_cv             Free Form Intake            63.333333  \n",
       "2773       chartevents                                         63.333333  \n",
       "5088       chartevents                                         63.333333  \n",
       "42265   inputevents_cv             Free Form Intake            63.333333  \n",
       "220955  inputevents_mv  Fluids - Other (Not In Use)       mL   63.000000  "
      ]
     },
     "execution_count": 114,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.LACTATED_RINGERS] = [225828,44367,2971,1634,30021,220953,46207,44184,44521,46781,44815]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Pressors"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Norepinephrine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>30047</th>\n",
       "      <td>Levophed</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221906</th>\n",
       "      <td>Norepinephrine</td>\n",
       "      <td>Norepinephrine</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30120</th>\n",
       "      <td>Levophed-k</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>102.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30044</th>\n",
       "      <td>Epinephrine</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>102.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221289</th>\n",
       "      <td>Epinephrine</td>\n",
       "      <td>Epinephrine</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>mg</td>\n",
       "      <td>102.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5752</th>\n",
       "      <td>Epinephrin</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>98.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51201</th>\n",
       "      <td>EPINEPHERINE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>88.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30119</th>\n",
       "      <td>Epinephrine-k</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>84.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30309</th>\n",
       "      <td>Epinephrine Drip</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>77.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30127</th>\n",
       "      <td>Neosynephrine</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>72.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>225922</th>\n",
       "      <td>Nephramine</td>\n",
       "      <td>Nephramine</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Nutrition - Supplements</td>\n",
       "      <td>mL</td>\n",
       "      <td>71.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3112</th>\n",
       "      <td>epinephrine mcg/min</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>71.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50820</th>\n",
       "      <td>PH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51491</th>\n",
       "      <td>PH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>HEMATOLOGY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51094</th>\n",
       "      <td>PH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>CHEMISTRY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50831</th>\n",
       "      <td>PH</td>\n",
       "      <td>NaN</td>\n",
       "      <td>labevents</td>\n",
       "      <td>BLOOD GAS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45183</th>\n",
       "      <td>ED</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7459</th>\n",
       "      <td>Ph</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1673</th>\n",
       "      <td>PH</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>70.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30128</th>\n",
       "      <td>Neosynephrine-k</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>69.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      label    abbreviation         linksto  \\\n",
       "itemid                                                        \n",
       "30047              Levophed                  inputevents_cv   \n",
       "221906       Norepinephrine  Norepinephrine  inputevents_mv   \n",
       "30120            Levophed-k                  inputevents_cv   \n",
       "30044           Epinephrine                  inputevents_cv   \n",
       "221289          Epinephrine     Epinephrine  inputevents_mv   \n",
       "5752             Epinephrin                     chartevents   \n",
       "51201          EPINEPHERINE             NaN       labevents   \n",
       "30119         Epinephrine-k                  inputevents_cv   \n",
       "30309      Epinephrine Drip                  inputevents_cv   \n",
       "30127         Neosynephrine                  inputevents_cv   \n",
       "225922           Nephramine      Nephramine  inputevents_mv   \n",
       "3112    epinephrine mcg/min                     chartevents   \n",
       "50820                    PH             NaN       labevents   \n",
       "51491                    PH             NaN       labevents   \n",
       "51094                    PH             NaN       labevents   \n",
       "50831                    PH             NaN       labevents   \n",
       "45183                    ED                  inputevents_cv   \n",
       "7459                     Ph                     chartevents   \n",
       "1673                     PH                     chartevents   \n",
       "30128       Neosynephrine-k                  inputevents_cv   \n",
       "\n",
       "                       category unitname       score  \n",
       "itemid                                                \n",
       "30047                                     110.000000  \n",
       "221906              Medications       mg  110.000000  \n",
       "30120                                     102.666667  \n",
       "30044                                     102.000000  \n",
       "221289              Medications       mg  102.000000  \n",
       "5752                                       98.666667  \n",
       "51201                HEMATOLOGY      NaN   88.666667  \n",
       "30119                                      84.666667  \n",
       "30309                                      77.333333  \n",
       "30127                                      72.333333  \n",
       "225922  Nutrition - Supplements       mL   71.333333  \n",
       "3112                                       71.000000  \n",
       "50820                 BLOOD GAS      NaN   70.000000  \n",
       "51491                HEMATOLOGY      NaN   70.000000  \n",
       "51094                 CHEMISTRY      NaN   70.000000  \n",
       "50831                 BLOOD GAS      NaN   70.000000  \n",
       "45183          Free Form Intake            70.000000  \n",
       "7459                                       70.000000  \n",
       "1673                                       70.000000  \n",
       "30128                                      69.666667  "
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'levophed',\n",
    "        'norepinephrine'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.NOREPINEPHRINE] = [30047,221906,30120]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Vasopressin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>abbreviation</th>\n",
       "      <th>linksto</th>\n",
       "      <th>category</th>\n",
       "      <th>unitname</th>\n",
       "      <th>score</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>itemid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>30051</th>\n",
       "      <td>Vasopressin</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2445</th>\n",
       "      <td>Vasopressin</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>222315</th>\n",
       "      <td>Vasopressin</td>\n",
       "      <td>Vasopressin</td>\n",
       "      <td>inputevents_mv</td>\n",
       "      <td>Medications</td>\n",
       "      <td>units</td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1136</th>\n",
       "      <td>vasopressin</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1222</th>\n",
       "      <td>VASOPRESSIN</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>110.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2334</th>\n",
       "      <td>vasopressin u/hr</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2561</th>\n",
       "      <td>VASOPRESSIN U/HR</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>97.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7341</th>\n",
       "      <td>Vasopressin  u/hr</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>96.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46570</th>\n",
       "      <td>vassopressin</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>94.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42802</th>\n",
       "      <td>VASOPRESSIN  CC/HR.</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>94.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6255</th>\n",
       "      <td>VAsopressin 0.04   s</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>92.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2765</th>\n",
       "      <td>VASOPRESSIN   UNIT/R</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>92.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2248</th>\n",
       "      <td>VASOPRESSIN UNIT/MIN</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42273</th>\n",
       "      <td>vasopressin unit/min</td>\n",
       "      <td></td>\n",
       "      <td>inputevents_cv</td>\n",
       "      <td>Free Form Intake</td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1327</th>\n",
       "      <td>vasopressin unit/min</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>90.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6269</th>\n",
       "      <td>Dressing</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>Angio Dressing #2</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6691</th>\n",
       "      <td>DRIV PRESS</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>Angio Dressing #1</td>\n",
       "      <td></td>\n",
       "      <td>chartevents</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>68.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>228448</th>\n",
       "      <td>Angio Dressing # 4</td>\n",
       "      <td>Angio Dressing # 4</td>\n",
       "      <td></td>\n",
       "      <td>Cardiovascular</td>\n",
       "      <td></td>\n",
       "      <td>67.333333</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       label        abbreviation         linksto  \\\n",
       "itemid                                                             \n",
       "30051            Vasopressin                      inputevents_cv   \n",
       "2445             Vasopressin                         chartevents   \n",
       "222315           Vasopressin         Vasopressin  inputevents_mv   \n",
       "1136             vasopressin                         chartevents   \n",
       "1222             VASOPRESSIN                         chartevents   \n",
       "2334        vasopressin u/hr                         chartevents   \n",
       "2561        VASOPRESSIN U/HR                         chartevents   \n",
       "7341       Vasopressin  u/hr                         chartevents   \n",
       "46570           vassopressin                      inputevents_cv   \n",
       "42802    VASOPRESSIN  CC/HR.                      inputevents_cv   \n",
       "6255    VAsopressin 0.04   s                         chartevents   \n",
       "2765    VASOPRESSIN   UNIT/R                         chartevents   \n",
       "2248    VASOPRESSIN UNIT/MIN                         chartevents   \n",
       "42273   vasopressin unit/min                      inputevents_cv   \n",
       "1327    vasopressin unit/min                         chartevents   \n",
       "6269                Dressing                         chartevents   \n",
       "46         Angio Dressing #2                         chartevents   \n",
       "6691              DRIV PRESS                         chartevents   \n",
       "45         Angio Dressing #1                         chartevents   \n",
       "228448    Angio Dressing # 4  Angio Dressing # 4                   \n",
       "\n",
       "                category unitname       score  \n",
       "itemid                                         \n",
       "30051                              110.000000  \n",
       "2445                               110.000000  \n",
       "222315       Medications    units  110.000000  \n",
       "1136                               110.000000  \n",
       "1222                               110.000000  \n",
       "2334                                97.333333  \n",
       "2561                                97.333333  \n",
       "7341                                96.666667  \n",
       "46570   Free Form Intake            94.333333  \n",
       "42802   Free Form Intake            94.000000  \n",
       "6255                                92.333333  \n",
       "2765                                92.333333  \n",
       "2248                                90.666667  \n",
       "42273   Free Form Intake            90.666667  \n",
       "1327                                90.666667  \n",
       "6269                                68.666667  \n",
       "46                                  68.000000  \n",
       "6691                                68.000000  \n",
       "45                                  68.000000  \n",
       "228448    Cardiovascular            67.333333  "
      ]
     },
     "execution_count": 119,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_df = explorer.search([\n",
    "        'vasopressin',\n",
    "        'argipressin',\n",
    "        'arginine vasopressin'\n",
    "    ])\n",
    "out_df.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "keep_dict[data_dict.labels.VASOPRESSIN] = out_df.loc[:1327].index.tolist()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Now build our mapping dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from itertools import product"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "tuples = []\n",
    "\n",
    "for label,items in keep_dict.iteritems():\n",
    "    tuples += list(product([label],items))\n",
    "\n",
    "item_map = pd.DataFrame(tuples,columns=['label','itemid'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>glasgow coma scale eye opening</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>glasgow coma scale eye opening</td>\n",
       "      <td>220739</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>glasgow coma scale motor</td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>glasgow coma scale motor</td>\n",
       "      <td>223901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>blood pressure systolic</td>\n",
       "      <td>220179</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            label  itemid\n",
       "0  glasgow coma scale eye opening     184\n",
       "1  glasgow coma scale eye opening  220739\n",
       "2        glasgow coma scale motor     454\n",
       "3        glasgow coma scale motor  223901\n",
       "4         blood pressure systolic  220179"
      ]
     },
     "execution_count": 132,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "item_map.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "item_map.to_csv('config/mimic_item_map.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extract MIMIC III data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import mimic\n",
    "from constants import ALL\n",
    "import icu_data_defs \n",
    "import utils\n",
    "import logger\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(icu_data_defs)\n",
    "reload(mimic)\n",
    "reload(logger)\n",
    "\n",
    "def extract_labels(conn,labels,item_map_fname,hdf5_fname,hadm_ids=ALL):\n",
    "    logger.log('Start extracting {} labels'.format(len(labels)),new_level=True)\n",
    "    extractor = mimic.mimic_extractor(conn,item_map_fname)\n",
    "    for label in labels:\n",
    "        logger.log(label.upper(),new_level=True)\n",
    "        df = extractor.extract_label(label,hadm_ids)\n",
    "        if df is None: continue\n",
    "        utils.save_df(df,hdf5_fname,'extract/{}'.format(label))\n",
    "        display(df.head())\n",
    "        print df.shape\n",
    "        del df\n",
    "    logger.end_log()\n",
    "    return \n",
    "\n",
    "#connect to the mimic database\n",
    "conn = mimic.connect()\n",
    "\n",
    "#these are the default config files we will be using\n",
    "item_map_fname = 'config/mimic_item_map.csv'\n",
    "mimic_data_sef_fname = 'config/data_definitions.xlsx'\n",
    "\n",
    "#get all labels\n",
    "data_dict = icu_data_defs.data_dictionary(mimic_data_sef_fname)\n",
    "simple_data = data_dict.get_panel_defintions(12) #12 is \"simple data\"\n",
    "labels = simple_data.label.unique().tolist()\n",
    "\n",
    "#where we will be storing this extraction\n",
    "hdf5_fname = 'data/mimic_data'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 03:26:58) Start extracting 5 labels\n",
      "(2017-06-03 03:26:58)>> HEART RATE\n",
      "(2017-06-03 03:26:59)>>>> Extracting 5 items from chartevents\n",
      "(2017-06-03 03:26:59)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:26:59)>>>> Combine DF\n",
      "(2017-06-03 03:26:59)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:26:59)>>>> Clean UOM\n",
      "(2017-06-03 03:26:59)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:00:00</td>\n",
       "      <td>62</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:30:00</td>\n",
       "      <td>67</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 01:00:00</td>\n",
       "      <td>67</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 01:20:00</td>\n",
       "      <td>63</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 01:25:00</td>\n",
       "      <td>63</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value     units  itemid\n",
       "0  143838 2129-07-14 00:00:00    62  beat/min     211\n",
       "1  143838 2129-07-14 00:30:00    67  beat/min     211\n",
       "2  143838 2129-07-14 01:00:00    67  beat/min     211\n",
       "3  143838 2129-07-14 01:20:00    63  beat/min     211\n",
       "4  143838 2129-07-14 01:25:00    63  beat/min     211"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1649, 5)\n",
      "(2017-06-03 03:27:01)<< DONE (3.0s)\n",
      "(2017-06-03 03:27:01)>> BLOOD PRESSURE SYSTOLIC\n",
      "(2017-06-03 03:27:01)>>>> Extracting 14 items from chartevents\n",
      "(2017-06-03 03:27:02)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:27:02)>>>> Combine DF\n",
      "(2017-06-03 03:27:02)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:02)>>>> Clean UOM\n",
      "(2017-06-03 03:27:02)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-13 22:40:00</td>\n",
       "      <td>145</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-13 23:00:00</td>\n",
       "      <td>136</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-13 23:30:00</td>\n",
       "      <td>179</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:00:00</td>\n",
       "      <td>131</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:30:00</td>\n",
       "      <td>146</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value units  itemid\n",
       "0  143838 2129-07-13 22:40:00   145  mmHg      51\n",
       "1  143838 2129-07-13 23:00:00   136  mmHg      51\n",
       "2  143838 2129-07-13 23:30:00   179  mmHg      51\n",
       "3  143838 2129-07-14 00:00:00   131  mmHg      51\n",
       "4  143838 2129-07-14 00:30:00   146  mmHg      51"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(644, 5)\n",
      "(2017-06-03 03:27:02)<< DONE (1.0s)\n",
      "(2017-06-03 03:27:02)>> BLOOD PRESSURE DIASTOLIC\n",
      "(2017-06-03 03:27:02)>>>> Extracting 15 items from chartevents\n",
      "(2017-06-03 03:27:02)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:02)>>>> Combine DF\n",
      "(2017-06-03 03:27:02)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:02)>>>> Clean UOM\n",
      "(2017-06-03 03:27:02)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 21:00:00</td>\n",
       "      <td>53</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 14:00:00</td>\n",
       "      <td>84</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 10:30:00</td>\n",
       "      <td>54</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 15:00:00</td>\n",
       "      <td>60</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 16:00:00</td>\n",
       "      <td>59</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value units  itemid\n",
       "0  113540 2161-10-12 21:00:00    53  mmHg  220051\n",
       "1  113540 2161-10-12 14:00:00    84  mmHg  220051\n",
       "2  113540 2161-10-12 10:30:00    54  mmHg  220051\n",
       "3  113540 2161-10-12 15:00:00    60  mmHg  220051\n",
       "4  113540 2161-10-12 16:00:00    59  mmHg  220051"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(640, 5)\n",
      "(2017-06-03 03:27:02)<< DONE (0.0s)\n",
      "(2017-06-03 03:27:02)>> BLOOD PRESSURE MEAN\n",
      "(2017-06-03 03:27:03)>>>> Extracting 3 items from chartevents\n",
      "(2017-06-03 03:27:03)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:03)>>>> Combine DF\n",
      "(2017-06-03 03:27:03)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:03)>>>> Clean UOM\n",
      "(2017-06-03 03:27:03)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 21:00:00</td>\n",
       "      <td>76</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 12:00:00</td>\n",
       "      <td>91</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 14:00:00</td>\n",
       "      <td>82</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 10:30:00</td>\n",
       "      <td>70</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>113540</td>\n",
       "      <td>2161-10-12 15:00:00</td>\n",
       "      <td>80</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value units  itemid\n",
       "0  113540 2161-10-12 21:00:00    76  mmHg  220052\n",
       "1  113540 2161-10-12 12:00:00    91  mmHg  220052\n",
       "2  113540 2161-10-12 14:00:00    82  mmHg  220052\n",
       "3  113540 2161-10-12 10:30:00    70  mmHg  220052\n",
       "4  113540 2161-10-12 15:00:00    80  mmHg  220052"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(58, 5)\n",
      "(2017-06-03 03:27:03)<< DONE (1.0s)\n",
      "(2017-06-03 03:27:03)>> RESPIRATORY RATE\n",
      "(2017-06-03 03:27:03)>>>> Extracting 4 items from chartevents\n",
      "(2017-06-03 03:27:04)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:27:04)>>>> Combine DF\n",
      "(2017-06-03 03:27:04)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:27:04)>>>> Clean UOM\n",
      "(2017-06-03 03:27:04)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-13 23:30:00</td>\n",
       "      <td>17</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:00:00</td>\n",
       "      <td>20</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 00:30:00</td>\n",
       "      <td>17</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 01:00:00</td>\n",
       "      <td>13</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>143838</td>\n",
       "      <td>2129-07-14 01:20:00</td>\n",
       "      <td>15</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value       units  itemid\n",
       "0  143838 2129-07-13 23:30:00    17  breath/min     618\n",
       "1  143838 2129-07-14 00:00:00    20  breath/min     618\n",
       "2  143838 2129-07-14 00:30:00    17  breath/min     618\n",
       "3  143838 2129-07-14 01:00:00    13  breath/min     618\n",
       "4  143838 2129-07-14 01:20:00    15  breath/min     618"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1647, 5)\n",
      "(2017-06-03 03:27:04)<< DONE (1.0s)\n",
      "(2017-06-03 03:27:04) DONE (6.0s)\n"
     ]
    }
   ],
   "source": [
    "hadm_ids = mimic.sample_hadm_ids(5,conn)\n",
    "extract_labels(conn,labels[0:5],item_map_fname,hdf5_fname,hadm_ids)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 03:27:10) Start extracting 18 labels\n",
      "(2017-06-03 03:27:10)>> HEART RATE\n",
      "(2017-06-03 03:27:10)>>>> Extracting 5 items from chartevents\n",
      "(2017-06-03 03:28:03)<<<< DONE (53.0s)\n",
      "(2017-06-03 03:28:03)>>>> Combine DF\n",
      "(2017-06-03 03:28:03)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:28:03)>>>> Clean UOM\n",
      "(2017-06-03 03:28:07)<<<< DONE (4.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-26 03:00:00</td>\n",
       "      <td>79</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-02 06:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-02 07:00:00</td>\n",
       "      <td>116</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-02 08:00:00</td>\n",
       "      <td>144</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-03 08:00:00</td>\n",
       "      <td>144</td>\n",
       "      <td>beat/min</td>\n",
       "      <td>211</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value     units  itemid\n",
       "0  185910.0 2166-08-26 03:00:00    79  beat/min     211\n",
       "1  106266.0 2114-12-02 06:00:00   116  beat/min     211\n",
       "2  106266.0 2114-12-02 07:00:00   116  beat/min     211\n",
       "3  106266.0 2114-12-02 08:00:00   144  beat/min     211\n",
       "4  106266.0 2114-12-03 08:00:00   144  beat/min     211"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7952939, 5)\n",
      "(2017-06-03 03:28:20)<< DONE (70.0s)\n",
      "(2017-06-03 03:28:20)>> BLOOD PRESSURE SYSTOLIC\n",
      "(2017-06-03 03:28:21)>>>> Extracting 14 items from chartevents\n",
      "(2017-06-03 03:29:45)<<<< DONE (84.0s)\n",
      "(2017-06-03 03:29:45)>>>> Combine DF\n",
      "(2017-06-03 03:29:45)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:29:45)>>>> Clean UOM\n",
      "(2017-06-03 03:29:48)<<<< DONE (3.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>176203.0</td>\n",
       "      <td>2154-11-12 22:00:00</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>197569.0</td>\n",
       "      <td>2133-03-09 00:00:00</td>\n",
       "      <td>117</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>197569.0</td>\n",
       "      <td>2133-03-09 01:00:00</td>\n",
       "      <td>117</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>197569.0</td>\n",
       "      <td>2133-03-09 02:00:00</td>\n",
       "      <td>127</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>197569.0</td>\n",
       "      <td>2133-03-09 03:00:00</td>\n",
       "      <td>126</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value units  itemid\n",
       "0  176203.0 2154-11-12 22:00:00                  51\n",
       "1  197569.0 2133-03-09 00:00:00   117  mmHg      51\n",
       "2  197569.0 2133-03-09 01:00:00   117  mmHg      51\n",
       "3  197569.0 2133-03-09 02:00:00   127  mmHg      51\n",
       "4  197569.0 2133-03-09 03:00:00   126  mmHg      51"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(6374824, 5)\n",
      "(2017-06-03 03:30:00)<< DONE (100.0s)\n",
      "(2017-06-03 03:30:00)>> BLOOD PRESSURE DIASTOLIC\n",
      "(2017-06-03 03:30:00)>>>> Extracting 15 items from chartevents\n",
      "(2017-06-03 03:31:18)<<<< DONE (78.0s)\n",
      "(2017-06-03 03:31:18)>>>> Combine DF\n",
      "(2017-06-03 03:31:19)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:31:19)>>>> Clean UOM\n",
      "(2017-06-03 03:31:22)<<<< DONE (3.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>124321.0</td>\n",
       "      <td>2157-10-21 12:15:00</td>\n",
       "      <td>74</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>124321.0</td>\n",
       "      <td>2157-10-21 13:00:00</td>\n",
       "      <td>67</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>124321.0</td>\n",
       "      <td>2157-10-21 14:00:00</td>\n",
       "      <td>64</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>124321.0</td>\n",
       "      <td>2157-10-21 15:00:00</td>\n",
       "      <td>76</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>124321.0</td>\n",
       "      <td>2157-10-21 16:17:00</td>\n",
       "      <td>80</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220051</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value units  itemid\n",
       "0  124321.0 2157-10-21 12:15:00    74  mmHg  220051\n",
       "1  124321.0 2157-10-21 13:00:00    67  mmHg  220051\n",
       "2  124321.0 2157-10-21 14:00:00    64  mmHg  220051\n",
       "3  124321.0 2157-10-21 15:00:00    76  mmHg  220051\n",
       "4  124321.0 2157-10-21 16:17:00    80  mmHg  220051"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(6371249, 5)\n",
      "(2017-06-03 03:31:34)<< DONE (94.0s)\n",
      "(2017-06-03 03:31:34)>> BLOOD PRESSURE MEAN\n",
      "(2017-06-03 03:31:34)>>>> Extracting 3 items from chartevents\n",
      "(2017-06-03 03:32:13)<<<< DONE (39.0s)\n",
      "(2017-06-03 03:32:13)>>>> Combine DF\n",
      "(2017-06-03 03:32:13)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:32:13)>>>> Clean UOM\n",
      "(2017-06-03 03:32:14)<<<< DONE (1.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>124321</td>\n",
       "      <td>2157-10-21 13:00:00</td>\n",
       "      <td>86</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>124321</td>\n",
       "      <td>2157-10-21 14:00:00</td>\n",
       "      <td>86</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>124321</td>\n",
       "      <td>2157-10-21 15:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>124321</td>\n",
       "      <td>2157-10-21 16:17:00</td>\n",
       "      <td>104</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>124321</td>\n",
       "      <td>2157-10-21 17:00:00</td>\n",
       "      <td>104</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>220052</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value units  itemid\n",
       "0  124321 2157-10-21 13:00:00    86  mmHg  220052\n",
       "1  124321 2157-10-21 14:00:00    86  mmHg  220052\n",
       "2  124321 2157-10-21 15:00:00    98  mmHg  220052\n",
       "3  124321 2157-10-21 16:17:00   104  mmHg  220052\n",
       "4  124321 2157-10-21 17:00:00   104  mmHg  220052"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2536271, 5)\n",
      "(2017-06-03 03:32:18)<< DONE (44.0s)\n",
      "(2017-06-03 03:32:18)>> RESPIRATORY RATE\n",
      "(2017-06-03 03:32:18)>>>> Extracting 4 items from chartevents\n",
      "(2017-06-03 03:34:08)<<<< DONE (110.0s)\n",
      "(2017-06-03 03:34:08)>>>> Combine DF\n",
      "(2017-06-03 03:34:08)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:34:08)>>>> Clean UOM\n",
      "(2017-06-03 03:34:13)<<<< DONE (5.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>20</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>122659.0</td>\n",
       "      <td>2131-05-22 14:00:00</td>\n",
       "      <td>29</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 19:00:00</td>\n",
       "      <td>24</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>24</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-10 13:30:00</td>\n",
       "      <td>22</td>\n",
       "      <td>breath/min</td>\n",
       "      <td>618</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value       units  itemid\n",
       "0  185910.0 2166-08-25 00:00:00    20  breath/min     618\n",
       "1  122659.0 2131-05-22 14:00:00    29  breath/min     618\n",
       "2  188670.0 2183-08-23 19:00:00    24  breath/min     618\n",
       "3  188670.0 2183-08-23 20:00:00    24  breath/min     618\n",
       "4  185910.0 2166-08-10 13:30:00    22  breath/min     618"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(7810019, 5)\n",
      "(2017-06-03 03:34:27)<< DONE (129.0s)\n",
      "(2017-06-03 03:34:27)>> TEMPERATURE BODY\n",
      "(2017-06-03 03:34:27)>>>> Extracting 4 items from chartevents\n",
      "(2017-06-03 03:35:01)<<<< DONE (34.0s)\n",
      "(2017-06-03 03:35:01)>>>> Combine DF\n",
      "(2017-06-03 03:35:01)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:35:01)>>>> Clean UOM\n",
      "(2017-06-03 03:35:02)<<<< DONE (1.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>98.400001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>99.300003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-12 08:00:00</td>\n",
       "      <td>98.699996948242188</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-13 17:00:00</td>\n",
       "      <td>99.900001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-11 20:00:00</td>\n",
       "      <td>37.900001525878906</td>\n",
       "      <td>degC</td>\n",
       "      <td>676</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime               value units  itemid\n",
       "0  185910.0 2166-08-25 00:00:00  98.400001525878906  degF     678\n",
       "1  188670.0 2183-08-23 20:00:00  99.300003051757812  degF     678\n",
       "2  185910.0 2166-08-12 08:00:00  98.699996948242188  degF     678\n",
       "3  185910.0 2166-08-13 17:00:00  99.900001525878906  degF     678\n",
       "4  166707.0 2122-02-11 20:00:00  37.900001525878906  degC     676"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1751447, 5)\n",
      "(2017-06-03 03:35:05)<< DONE (38.0s)\n",
      "(2017-06-03 03:35:05)>> OXYGEN SATURATION PULSE OXIMETRY\n",
      "(2017-06-03 03:35:06)>>>> Extracting 2 items from chartevents\n",
      "(2017-06-03 03:35:59)<<<< DONE (53.0s)\n",
      "(2017-06-03 03:35:59)>>>> Combine DF\n",
      "(2017-06-03 03:35:59)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:35:59)>>>> Clean UOM\n",
      "(2017-06-03 03:36:02)<<<< DONE (3.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>percent</td>\n",
       "      <td>646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:15:00</td>\n",
       "      <td>100</td>\n",
       "      <td>percent</td>\n",
       "      <td>646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>122659.0</td>\n",
       "      <td>2131-05-22 14:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>percent</td>\n",
       "      <td>646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 19:00:00</td>\n",
       "      <td>97</td>\n",
       "      <td>percent</td>\n",
       "      <td>646</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>97</td>\n",
       "      <td>percent</td>\n",
       "      <td>646</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value    units  itemid\n",
       "0  185910.0 2166-08-25 00:00:00   100  percent     646\n",
       "1  185910.0 2166-08-25 00:15:00   100  percent     646\n",
       "2  122659.0 2131-05-22 14:00:00   100  percent     646\n",
       "3  188670.0 2183-08-23 19:00:00    97  percent     646\n",
       "4  188670.0 2183-08-23 20:00:00    97  percent     646"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(6099827, 5)\n",
      "(2017-06-03 03:36:14)<< DONE (69.0s)\n",
      "(2017-06-03 03:36:14)>> WEIGHT BODY\n",
      "(2017-06-03 03:36:14)>>>> Extracting 3 items from chartevents\n",
      "(2017-06-03 03:36:48)<<<< DONE (34.0s)\n",
      "(2017-06-03 03:36:48)>>>> Combine DF\n",
      "(2017-06-03 03:36:49)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:36:49)>>>> Clean UOM\n",
      "(2017-06-03 03:36:49)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-20 06:00:00</td>\n",
       "      <td>102.5</td>\n",
       "      <td>kg</td>\n",
       "      <td>763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-15 07:00:00</td>\n",
       "      <td>99.400001525878906</td>\n",
       "      <td>kg</td>\n",
       "      <td>763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>182104.0</td>\n",
       "      <td>2131-05-05 06:00:00</td>\n",
       "      <td>109.69999694824219</td>\n",
       "      <td>kg</td>\n",
       "      <td>763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>157907.0</td>\n",
       "      <td>2129-06-10 05:00:00</td>\n",
       "      <td>103.19999694824219</td>\n",
       "      <td>kg</td>\n",
       "      <td>763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-14 06:00:00</td>\n",
       "      <td>101.80000305175781</td>\n",
       "      <td>kg</td>\n",
       "      <td>763</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime               value units  itemid\n",
       "0  185910.0 2166-08-20 06:00:00               102.5    kg     763\n",
       "1  166707.0 2122-02-15 07:00:00  99.400001525878906    kg     763\n",
       "2  182104.0 2131-05-05 06:00:00  109.69999694824219    kg     763\n",
       "3  157907.0 2129-06-10 05:00:00  103.19999694824219    kg     763\n",
       "4  166707.0 2122-02-14 06:00:00  101.80000305175781    kg     763"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(95425, 5)\n",
      "(2017-06-03 03:36:49)<< DONE (35.0s)\n",
      "(2017-06-03 03:36:49)>> OUTPUT URINE\n",
      "(2017-06-03 03:36:49)>>>> Extracting 2 items from chartevents\n",
      "(2017-06-03 03:37:28)<<<< DONE (39.0s)\n",
      "(2017-06-03 03:37:28)>>>> Extracting 29 items from outputevents\n",
      "(2017-06-03 03:37:59)<<<< DONE (31.0s)\n",
      "(2017-06-03 03:37:59)>>>> Combine DF\n",
      "(2017-06-03 03:37:59)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:37:59)>>>> Clean UOM\n",
      "(2017-06-03 03:38:09)<<<< DONE (10.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-03 08:00:00</td>\n",
       "      <td>Voiding qs</td>\n",
       "      <td>ml</td>\n",
       "      <td>3686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-09 09:00:00</td>\n",
       "      <td>Voiding qs</td>\n",
       "      <td>ml</td>\n",
       "      <td>3686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-09 14:00:00</td>\n",
       "      <td>Voiding qs</td>\n",
       "      <td>ml</td>\n",
       "      <td>3686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-05 21:30:00</td>\n",
       "      <td>Voiding qs</td>\n",
       "      <td>ml</td>\n",
       "      <td>3686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>106266.0</td>\n",
       "      <td>2114-12-04 16:00:00</td>\n",
       "      <td>Voiding qs</td>\n",
       "      <td>ml</td>\n",
       "      <td>3686</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime       value units  itemid\n",
       "0  106266.0 2114-12-03 08:00:00  Voiding qs    ml    3686\n",
       "1  106266.0 2114-12-09 09:00:00  Voiding qs    ml    3686\n",
       "2  106266.0 2114-12-09 14:00:00  Voiding qs    ml    3686\n",
       "3  106266.0 2114-12-05 21:30:00  Voiding qs    ml    3686\n",
       "4  106266.0 2114-12-04 16:00:00  Voiding qs    ml    3686"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(3644639, 5)\n",
      "(2017-06-03 03:38:15)<< DONE (86.0s)\n",
      "(2017-06-03 03:38:15)>> GLASGOW COMA SCALE MOTOR\n",
      "(2017-06-03 03:38:15)>>>> Extracting 1 items from chartevents\n",
      "(2017-06-03 03:39:12)<<<< DONE (57.0s)\n",
      "(2017-06-03 03:39:12)>>>> Combine DF\n",
      "(2017-06-03 03:39:12)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:39:12)>>>> Clean UOM\n",
      "(2017-06-03 03:39:12)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>6 Obeys Commands</td>\n",
       "      <td></td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>6 Obeys Commands</td>\n",
       "      <td></td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-27 12:00:00</td>\n",
       "      <td>5 Localizes Pain</td>\n",
       "      <td></td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-09-03 12:00:00</td>\n",
       "      <td>6 Obeys Commands</td>\n",
       "      <td></td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>101757.0</td>\n",
       "      <td>2133-01-03 16:00:00</td>\n",
       "      <td>6 Obeys Commands</td>\n",
       "      <td></td>\n",
       "      <td>454</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime             value units  itemid\n",
       "0  185910.0 2166-08-25 00:00:00  6 Obeys Commands           454\n",
       "1  188670.0 2183-08-23 20:00:00  6 Obeys Commands           454\n",
       "2  185910.0 2166-08-27 12:00:00  5 Localizes Pain           454\n",
       "3  185910.0 2166-09-03 12:00:00  6 Obeys Commands           454\n",
       "4  101757.0 2133-01-03 16:00:00  6 Obeys Commands           454"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(952565, 5)\n",
      "(2017-06-03 03:39:13)<< DONE (58.0s)\n",
      "(2017-06-03 03:39:13)>> GLASGOW COMA SCALE EYE OPENING\n",
      "(2017-06-03 03:39:13)>>>> Extracting 1 items from chartevents\n",
      "(2017-06-03 03:39:57)<<<< DONE (44.0s)\n",
      "(2017-06-03 03:39:57)>>>> Combine DF\n",
      "(2017-06-03 03:39:57)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:39:57)>>>> Clean UOM\n",
      "(2017-06-03 03:39:57)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>4 Spontaneously</td>\n",
       "      <td></td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-15 20:00:00</td>\n",
       "      <td>4 Spontaneously</td>\n",
       "      <td></td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-16 16:00:00</td>\n",
       "      <td>3 To speech</td>\n",
       "      <td></td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-23 04:00:00</td>\n",
       "      <td>3 To speech</td>\n",
       "      <td></td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-27 12:00:00</td>\n",
       "      <td>1 No Response</td>\n",
       "      <td></td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime            value units  itemid\n",
       "0  188670.0 2183-08-23 20:00:00  4 Spontaneously           184\n",
       "1  185910.0 2166-08-15 20:00:00  4 Spontaneously           184\n",
       "2  185910.0 2166-08-16 16:00:00      3 To speech           184\n",
       "3  185910.0 2166-08-23 04:00:00      3 To speech           184\n",
       "4  185910.0 2166-08-27 12:00:00    1 No Response           184"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(956672, 5)\n",
      "(2017-06-03 03:39:58)<< DONE (45.0s)\n",
      "(2017-06-03 03:39:58)>> GLASGOW COMA SCALE VERBAL\n",
      "(2017-06-03 03:39:59)>>>> Extracting 1 items from chartevents\n",
      "(2017-06-03 03:40:22)<<<< DONE (23.0s)\n",
      "(2017-06-03 03:40:22)>>>> Combine DF\n",
      "(2017-06-03 03:40:22)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:40:22)>>>> Clean UOM\n",
      "(2017-06-03 03:40:22)<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>1.0 ET/Trach</td>\n",
       "      <td></td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>4 Confused</td>\n",
       "      <td></td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-12 08:00:00</td>\n",
       "      <td>1.0 ET/Trach</td>\n",
       "      <td></td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-27 12:00:00</td>\n",
       "      <td>1.0 ET/Trach</td>\n",
       "      <td></td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-11 20:00:00</td>\n",
       "      <td>5 Oriented</td>\n",
       "      <td></td>\n",
       "      <td>723</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime         value units  itemid\n",
       "0  185910.0 2166-08-25 00:00:00  1.0 ET/Trach           723\n",
       "1  188670.0 2183-08-23 20:00:00    4 Confused           723\n",
       "2  185910.0 2166-08-12 08:00:00  1.0 ET/Trach           723\n",
       "3  185910.0 2166-08-27 12:00:00  1.0 ET/Trach           723\n",
       "4  166707.0 2122-02-11 20:00:00    5 Oriented           723"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(954700, 5)\n",
      "(2017-06-03 03:40:24)<< DONE (26.0s)\n",
      "(2017-06-03 03:40:24)>> NORMAL SALINE\n",
      "(2017-06-03 03:40:24)>>>> Extracting 2 items from chartevents\n",
      "(2017-06-03 03:40:24)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:40:24)>>>> Extracting 4 items from inputevents_cv\n",
      "(2017-06-03 03:40:25)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:40:25)>>>> Extracting 2 items from inputevents_mv\n",
      "(2017-06-03 03:40:43)<<<< DONE (18.0s)\n",
      "(2017-06-03 03:40:43)>>>> Combine DF\n",
      "(2017-06-03 03:40:43)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:40:43)>>>> Clean UOM\n",
      "(2017-06-03 03:40:46)<<<< DONE (3.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>110468</td>\n",
       "      <td>2134-02-13 03:45:00</td>\n",
       "      <td>given</td>\n",
       "      <td>ml</td>\n",
       "      <td>4647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>110468</td>\n",
       "      <td>2134-02-13 07:05:00</td>\n",
       "      <td>given</td>\n",
       "      <td>ml</td>\n",
       "      <td>4647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>175419</td>\n",
       "      <td>2122-08-16 01:00:00</td>\n",
       "      <td>given</td>\n",
       "      <td>ml</td>\n",
       "      <td>4647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>143494</td>\n",
       "      <td>2147-07-02 23:00:00</td>\n",
       "      <td>given</td>\n",
       "      <td>ml</td>\n",
       "      <td>4647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>143494</td>\n",
       "      <td>2147-07-02 23:43:00</td>\n",
       "      <td>given</td>\n",
       "      <td>ml</td>\n",
       "      <td>4647</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime  value units  itemid\n",
       "0  110468 2134-02-13 03:45:00  given    ml    4647\n",
       "1  110468 2134-02-13 07:05:00  given    ml    4647\n",
       "2  175419 2122-08-16 01:00:00  given    ml    4647\n",
       "3  143494 2147-07-02 23:00:00  given    ml    4647\n",
       "4  143494 2147-07-02 23:43:00  given    ml    4647"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(817373, 5)\n",
      "(2017-06-03 03:40:46)<< DONE (22.0s)\n",
      "(2017-06-03 03:40:46)>> LACTATED RINGERS\n",
      "(2017-06-03 03:40:46)>>>> Extracting 2 items from chartevents\n",
      "(2017-06-03 03:40:46)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:40:46)>>>> Extracting 7 items from inputevents_cv\n",
      "(2017-06-03 03:40:50)<<<< DONE (4.0s)\n",
      "(2017-06-03 03:40:50)>>>> Extracting 2 items from inputevents_mv\n",
      "(2017-06-03 03:40:51)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:40:51)>>>> Combine DF\n",
      "(2017-06-03 03:40:51)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:40:51)>>>> Clean UOM\n",
      "(2017-06-03 03:40:53)<<<< DONE (2.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>194762.0</td>\n",
       "      <td>2110-11-22 18:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>ml</td>\n",
       "      <td>1634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>178769.0</td>\n",
       "      <td>2181-06-01 05:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>ml</td>\n",
       "      <td>2971</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>178769.0</td>\n",
       "      <td>2181-06-01 01:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>ml</td>\n",
       "      <td>2971</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>178769.0</td>\n",
       "      <td>2181-06-01 02:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>ml</td>\n",
       "      <td>2971</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>178769.0</td>\n",
       "      <td>2181-06-01 03:00:00</td>\n",
       "      <td>100</td>\n",
       "      <td>ml</td>\n",
       "      <td>2971</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value units  itemid\n",
       "0  194762.0 2110-11-22 18:00:00   100    ml    1634\n",
       "1  178769.0 2181-06-01 05:00:00   100    ml    2971\n",
       "2  178769.0 2181-06-01 01:00:00   100    ml    2971\n",
       "3  178769.0 2181-06-01 02:00:00   100    ml    2971\n",
       "4  178769.0 2181-06-01 03:00:00   100    ml    2971"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(504306, 5)\n",
      "(2017-06-03 03:40:53)<< DONE (7.0s)\n",
      "(2017-06-03 03:40:53)>> NOREPINEPHRINE\n",
      "(2017-06-03 03:40:53)>>>> Extracting 2 items from inputevents_cv\n",
      "(2017-06-03 03:41:10)<<<< DONE (17.0s)\n",
      "(2017-06-03 03:41:10)>>>> Extracting 1 items from inputevents_mv\n",
      "(2017-06-03 03:41:12)<<<< DONE (2.0s)\n",
      "(2017-06-03 03:41:12)>>>> Combine DF\n",
      "(2017-06-03 03:41:12)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:41:12)>>>> Clean UOM\n",
      "(2017-06-03 03:41:16)<<<< DONE (4.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>181516.0</td>\n",
       "      <td>2121-01-27 00:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mg</td>\n",
       "      <td>30120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>194502.0</td>\n",
       "      <td>2102-12-20 08:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mg</td>\n",
       "      <td>30047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>172260.0</td>\n",
       "      <td>2124-11-11 16:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mg</td>\n",
       "      <td>30047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>114829.0</td>\n",
       "      <td>2107-01-15 22:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mg</td>\n",
       "      <td>30047</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>181516.0</td>\n",
       "      <td>2121-01-27 01:00:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>mg</td>\n",
       "      <td>30120</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime  value units  itemid\n",
       "0  181516.0 2121-01-27 00:00:00    NaN    mg   30120\n",
       "1  194502.0 2102-12-20 08:00:00    NaN    mg   30047\n",
       "2  172260.0 2124-11-11 16:00:00    NaN    mg   30047\n",
       "3  114829.0 2107-01-15 22:00:00    NaN    mg   30047\n",
       "4  181516.0 2121-01-27 01:00:00    NaN    mg   30120"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1136938, 5)\n",
      "(2017-06-03 03:41:16)<< DONE (23.0s)\n",
      "(2017-06-03 03:41:16)>> VASOPRESSIN\n",
      "(2017-06-03 03:41:16)>>>> Extracting 10 items from chartevents\n",
      "(2017-06-03 03:41:17)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:41:17)>>>> Extracting 4 items from inputevents_cv\n",
      "(2017-06-03 03:41:22)<<<< DONE (5.0s)\n",
      "(2017-06-03 03:41:22)>>>> Extracting 1 items from inputevents_mv\n",
      "(2017-06-03 03:41:23)<<<< DONE (1.0s)\n",
      "(2017-06-03 03:41:23)>>>> Combine DF\n",
      "(2017-06-03 03:41:23)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:41:23)>>>> Clean UOM\n",
      "(2017-06-03 03:41:24)<<<< DONE (1.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>126005.0</td>\n",
       "      <td>2126-09-05 16:30:00</td>\n",
       "      <td></td>\n",
       "      <td>U</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>126005.0</td>\n",
       "      <td>2126-09-05 16:45:00</td>\n",
       "      <td></td>\n",
       "      <td>units/hour</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>115221.0</td>\n",
       "      <td>2194-12-09 05:00:00</td>\n",
       "      <td>3</td>\n",
       "      <td>U</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>115221.0</td>\n",
       "      <td>2194-12-09 06:00:00</td>\n",
       "      <td>3</td>\n",
       "      <td>units/hour</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>115221.0</td>\n",
       "      <td>2194-12-09 04:00:00</td>\n",
       "      <td>3</td>\n",
       "      <td>units/hour</td>\n",
       "      <td>1136</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value       units  itemid\n",
       "0  126005.0 2126-09-05 16:30:00                 U    1136\n",
       "1  126005.0 2126-09-05 16:45:00        units/hour    1136\n",
       "2  115221.0 2194-12-09 05:00:00     3           U    1136\n",
       "3  115221.0 2194-12-09 06:00:00     3  units/hour    1136\n",
       "4  115221.0 2194-12-09 04:00:00     3  units/hour    1136"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(339184, 5)\n",
      "(2017-06-03 03:41:24)<< DONE (8.0s)\n",
      "(2017-06-03 03:41:24)>> HEMOGLOBIN\n",
      "(2017-06-03 03:41:24)>>>> Extracting 3 items from chartevents\n",
      "(2017-06-03 03:43:00)<<<< DONE (96.0s)\n",
      "(2017-06-03 03:43:00)>>>> Extracting 2 items from labevents\n",
      "(2017-06-03 03:43:27)<<<< DONE (27.0s)\n",
      "(2017-06-03 03:43:27)>>>> Combine DF\n",
      "(2017-06-03 03:43:27)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:43:27)>>>> Clean UOM\n",
      "(2017-06-03 03:43:30)<<<< DONE (3.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-13 01:53:00</td>\n",
       "      <td>9.5</td>\n",
       "      <td>gm/dl</td>\n",
       "      <td>814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-22 02:14:00</td>\n",
       "      <td>10</td>\n",
       "      <td>gm/dl</td>\n",
       "      <td>814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-18 03:46:00</td>\n",
       "      <td>10.100000381469727</td>\n",
       "      <td>gm/dl</td>\n",
       "      <td>814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>157907.0</td>\n",
       "      <td>2129-06-11 02:00:00</td>\n",
       "      <td>9.8999996185302734</td>\n",
       "      <td>gm/dl</td>\n",
       "      <td>814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-30 02:07:00</td>\n",
       "      <td>9</td>\n",
       "      <td>gm/dl</td>\n",
       "      <td>814</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime               value  units  itemid\n",
       "0  185910.0 2166-08-13 01:53:00                 9.5  gm/dl     814\n",
       "1  185910.0 2166-08-22 02:14:00                  10  gm/dl     814\n",
       "2  185910.0 2166-08-18 03:46:00  10.100000381469727  gm/dl     814\n",
       "3  157907.0 2129-06-11 02:00:00  9.8999996185302734  gm/dl     814\n",
       "4  185910.0 2166-08-30 02:07:00                   9  gm/dl     814"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1167921, 5)\n",
      "(2017-06-03 03:43:31)<< DONE (127.0s)\n",
      "(2017-06-03 03:43:31)>> LACTATE\n",
      "(2017-06-03 03:43:31)>>>> Extracting 3 items from chartevents\n",
      "(2017-06-03 03:44:30)<<<< DONE (59.0s)\n",
      "(2017-06-03 03:44:30)>>>> Extracting 1 items from labevents\n",
      "(2017-06-03 03:44:33)<<<< DONE (3.0s)\n",
      "(2017-06-03 03:44:33)>>>> Combine DF\n",
      "(2017-06-03 03:44:33)<<<< DONE (0.0s)\n",
      "(2017-06-03 03:44:33)>>>> Clean UOM\n",
      "(2017-06-03 03:44:34)<<<< DONE (1.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-11 21:20:00</td>\n",
       "      <td>1.5</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-11 21:20:00</td>\n",
       "      <td>1.5</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>1531</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>175413.0</td>\n",
       "      <td>2170-04-11 15:00:00</td>\n",
       "      <td></td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>146828.0</td>\n",
       "      <td>2186-10-03 21:54:00</td>\n",
       "      <td>1.8</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>146828.0</td>\n",
       "      <td>2186-10-03 21:54:00</td>\n",
       "      <td>1.8</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>1531</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         id            datetime value   units  itemid\n",
       "0  185910.0 2166-08-11 21:20:00   1.5  mmol/L     818\n",
       "1  185910.0 2166-08-11 21:20:00   1.5  mmol/L    1531\n",
       "2  175413.0 2170-04-11 15:00:00        mmol/L     818\n",
       "3  146828.0 2186-10-03 21:54:00   1.8  mmol/L     818\n",
       "4  146828.0 2186-10-03 21:54:00   1.8  mmol/L    1531"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(393608, 5)\n",
      "(2017-06-03 03:44:35)<< DONE (64.0s)\n",
      "(2017-06-03 03:44:35) DONE (1045.0s)\n"
     ]
    }
   ],
   "source": [
    "df_all = extract_labels(conn,labels,item_map_fname,hdf5_fname)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Transform (and \"Load\") MIMIC III Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import icu_data_defs \n",
    "import mimic\n",
    "from sklearn.pipeline import Pipeline\n",
    "import logger\n",
    "import pandas as pd\n",
    "from constants import column_names\n",
    "import utils\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(utils)\n",
    "reload(logger)\n",
    "reload(mimic)\n",
    "def transform_extracted_labels(labels,hdf5_fname):\n",
    "    \n",
    "    pipeline = Pipeline([\n",
    "        ('clean',mimic.clean_extract()),\n",
    "        ('unstack',mimic.unstacker()),\n",
    "        ('clean_uom',mimic.clean_uom())\n",
    "    ])\n",
    "    \n",
    "    for label in labels:\n",
    "        logger.log('Opening {}'.format(label.upper()))\n",
    "        df = utils.open_df(hdf5_fname,'extract/{}'.format(label))\n",
    "        logger.log('Transforming {} - {}'.format(label.upper(),df.shape))\n",
    "        df_transformed = pipeline.transform(df)\n",
    "        # Add label guess to column index\n",
    "        df = utils.add_same_val__index_level(df,label,'label',axis=1)\n",
    "        display(df_transformed.head())\n",
    "        utils.save_df(df_transformed,hdf5_fname,'transformed/{}'.format(label))\n",
    "        del df,df_transformed\n",
    "        logger.end_log_level()\n",
    "        \n",
    "    return\n",
    "        \n",
    "mimic_data_sef_fname = 'config/data_definitions.xlsx'\n",
    "hdf5_fname = 'data/mimic_data'\n",
    "\n",
    "#get all labels\n",
    "data_dict = icu_data_defs.data_dictionary(mimic_data_sef_fname)\n",
    "simple_data = data_dict.get_panel_defintions(12) #12 is \"simple data\"\n",
    "labels = simple_data.label.unique().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-02 05:11:14) Opening HEART RATE\n",
      "(2017-06-02 05:11:19) DONE (5.0s)\n",
      "(2017-06-02 05:11:19) Transforming HEART RATE - (7952939, 5)\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-3-4ff30e8e4bcc>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtransform_extracted_labels\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhdf5_fname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-2-7e9518f41127>\u001b[0m in \u001b[0;36mtransform_extracted_labels\u001b[1;34m(labels, hdf5_fname)\u001b[0m\n\u001b[0;32m     14\u001b[0m         \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mopen_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mhdf5_fname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'extract/{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     15\u001b[0m         \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Transforming {} - {}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m         \u001b[0mdf_transformed\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpipeline\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     17\u001b[0m         \u001b[0mdisplay\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_transformed\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     18\u001b[0m         \u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_transformed\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhdf5_fname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'transformed/{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlabel\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\sklearn\\pipeline.pyc\u001b[0m in \u001b[0;36m_transform\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m    446\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtransform\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    447\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mtransform\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 448\u001b[1;33m                 \u001b[0mXt\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mXt\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    449\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mXt\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    450\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\icu_ml_project\\v5\\mimic.pyc\u001b[0m in \u001b[0;36mtransform\u001b[1;34m(self, df)\u001b[0m\n\u001b[0;32m    189\u001b[0m         \u001b[0mFORMAT\u001b[0m \u001b[0mpre\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0munstack\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    190\u001b[0m         \"\"\"\n\u001b[1;32m--> 191\u001b[1;33m         \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_replace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnan\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    192\u001b[0m         \u001b[1;31m#drop NAN record_id, timestamps, or value\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    193\u001b[0m         \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdropna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msubset\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mID\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDATETIME\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumn_names\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mVALUE\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhow\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'any'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\generic.pyc\u001b[0m in \u001b[0;36mreplace\u001b[1;34m(self, to_replace, value, inplace, limit, regex, method, axis)\u001b[0m\n\u001b[0;32m   3538\u001b[0m                     new_data = self._data.replace(to_replace=to_replace,\n\u001b[0;32m   3539\u001b[0m                                                   \u001b[0mvalue\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3540\u001b[1;33m                                                   regex=regex)\n\u001b[0m\u001b[0;32m   3541\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3542\u001b[0m                     msg = ('Invalid \"to_replace\" type: '\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\internals.pyc\u001b[0m in \u001b[0;36mreplace\u001b[1;34m(self, **kwargs)\u001b[0m\n\u001b[0;32m   3172\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3173\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mreplace\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3174\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'replace'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3175\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3176\u001b[0m     def replace_list(self, src_list, dest_list, inplace=False, regex=False,\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\internals.pyc\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)\u001b[0m\n\u001b[0;32m   3033\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3034\u001b[0m             \u001b[0mkwargs\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'mgr'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3035\u001b[1;33m             \u001b[0mapplied\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mb\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   3036\u001b[0m             \u001b[0mresult_blocks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_extend_blocks\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mapplied\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mresult_blocks\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   3037\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\internals.pyc\u001b[0m in \u001b[0;36mreplace\u001b[1;34m(self, to_replace, value, inplace, filter, regex, convert, mgr)\u001b[0m\n\u001b[0;32m   1926\u001b[0m                                                     \u001b[0minplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1927\u001b[0m                                                     \u001b[0mfilter\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mfilter\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mregex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mregex\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1928\u001b[1;33m                                                     convert=convert, mgr=mgr)\n\u001b[0m\u001b[0;32m   1929\u001b[0m         \u001b[1;32melif\u001b[0m \u001b[0mboth_lists\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1930\u001b[0m             \u001b[1;32mfor\u001b[0m \u001b[0mto_rep\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mto_replace\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\internals.pyc\u001b[0m in \u001b[0;36mreplace\u001b[1;34m(self, to_replace, value, inplace, filter, regex, convert, mgr)\u001b[0m\n\u001b[0;32m    623\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    624\u001b[0m         \u001b[0moriginal_to_replace\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mto_replace\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 625\u001b[1;33m         \u001b[0mmask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0misnull\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    626\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    627\u001b[0m         \u001b[1;31m# try to replace, if we raise an error, convert to ObjectBlock and\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\types\\missing.pyc\u001b[0m in \u001b[0;36misnull\u001b[1;34m(obj)\u001b[0m\n\u001b[0;32m     43\u001b[0m     \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnotnull\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mboolean\u001b[0m \u001b[0minverse\u001b[0m \u001b[0mof\u001b[0m \u001b[0mpandas\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     44\u001b[0m     \"\"\"\n\u001b[1;32m---> 45\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0m_isnull\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     46\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     47\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\types\\missing.pyc\u001b[0m in \u001b[0;36m_isnull_new\u001b[1;34m(obj)\u001b[0m\n\u001b[0;32m     53\u001b[0m         \u001b[1;32mraise\u001b[0m \u001b[0mNotImplementedError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"isnull is not defined for MultiIndex\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     54\u001b[0m     \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mABCSeries\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mABCIndexClass\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 55\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0m_isnull_ndarraylike\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     56\u001b[0m     \u001b[1;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mABCGeneric\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     57\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_constructor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misnull\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0misnull\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\types\\missing.pyc\u001b[0m in \u001b[0;36m_isnull_ndarraylike\u001b[1;34m(obj)\u001b[0m\n\u001b[0;32m    139\u001b[0m                 \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mempty\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mbool\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    140\u001b[0m                 \u001b[0mvec\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misnullobj\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mravel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 141\u001b[1;33m                 \u001b[0mresult\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mvec\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    142\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    143\u001b[0m     \u001b[1;32melif\u001b[0m \u001b[0mneeds_i8_conversion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobj\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "transform_extracted_labels(labels,hdf5_fname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "store = pd.HDFStore(hdf5_fname)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<class 'pandas.io.pytables.HDFStore'>\n",
       "File path: data/mimic_data\n",
       "/extract/blood pressure diastolic                        frame        (shape->[6371249,5])\n",
       "/extract/blood pressure mean                             frame        (shape->[2536271,5])\n",
       "/extract/blood pressure systolic                         frame        (shape->[6374824,5])\n",
       "/extract/glasgow coma scale eye opening                  frame        (shape->[956672,5]) \n",
       "/extract/glasgow coma scale motor                        frame        (shape->[952565,5]) \n",
       "/extract/glasgow coma scale verbal                       frame        (shape->[954700,5]) \n",
       "/extract/heart rate                                      frame        (shape->[7952939,5])\n",
       "/extract/hemoglobin                                      frame        (shape->[1167921,5])\n",
       "/extract/lactate                                         frame        (shape->[393608,5]) \n",
       "/extract/lactated ringers                                frame        (shape->[504306,5]) \n",
       "/extract/norepinephrine                                  frame        (shape->[1136938,5])\n",
       "/extract/normal saline                                   frame        (shape->[817373,5]) \n",
       "/extract/output urine                                    frame        (shape->[3644639,5])\n",
       "/extract/oxygen saturation pulse oximetry                frame        (shape->[6099827,5])\n",
       "/extract/respiratory rate                                frame        (shape->[7810019,5])\n",
       "/extract/temperature body                                frame        (shape->[1751447,5])\n",
       "/extract/vasopressin                                     frame        (shape->[339184,5]) \n",
       "/extract/weight body                                     frame        (shape->[95425,5])  \n",
       "/transformed/blood pressure diastolic                    frame                            \n",
       "/transformed/blood pressure mean                         frame                            \n",
       "/transformed/blood pressure systolic                     frame                            \n",
       "/transformed/glasgow coma scale eye opening              frame                            \n",
       "/transformed/glasgow coma scale motor                    frame                            \n",
       "/transformed/glasgow coma scale verbal                   frame                            \n",
       "/transformed/heart rate                                  frame                            \n",
       "/transformed/hemoglobin                                  frame                            \n",
       "/transformed/lactate                                     frame                            \n",
       "/transformed/lactated ringers                            frame                            \n",
       "/transformed/norepinephrine                              frame                            \n",
       "/transformed/normal saline                               frame                            \n",
       "/transformed/output urine                                frame                            \n",
       "/transformed/oxygen saturation pulse oximetry            frame                            \n",
       "/transformed/respiratory rate                            frame                            \n",
       "/transformed/temperature body                            frame                            \n",
       "/transformed/vasopressin                                 frame                            \n",
       "/transformed/weight body                                 frame                            "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "store.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'utils' from 'utils.py'>"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(utils)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Cleaners"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Design decision: If a given column has both NUMERIC CATEGORICAL and QUANTITATIVE data, all NUMERIC CATEGORICAL data will actually be treated like QUANTITATIVE data, unfortunately."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import transformers\n",
    "import utils\n",
    "from sklearn.pipeline import Pipeline\n",
    "from units import MedicalUreg\n",
    "import icu_data_defs\n",
    "import units\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(transformers)\n",
    "reload(utils)\n",
    "mimic_data_sef_fname = 'config/data_definitions.xlsx'\n",
    "hdf5_fname = 'data/mimic_data'\n",
    "medical_units = 'config/medical_units.txt'\n",
    "\n",
    "#get all labels\n",
    "data_dict = icu_data_defs.data_dictionary(mimic_data_sef_fname)\n",
    "label = 'lactate'\n",
    "\n",
    "standard_pipeline = Pipeline([\n",
    "        ('aggregate_same_datetime',transformers.same_index_aggregator(agg_func=lambda x:x.iloc[0])),\n",
    "        ('split_dtype',transformers.split_dtype()),\n",
    "        ('format_columns',transformers.format_columns(data_dict,MedicalUreg(medical_units))),\n",
    "        ('drop_small_columns',transformers.remove_small_columns(threshold=50))\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "transformers.py:103: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  df_out.dropna(how='all',inplace=True,axis=1)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(113L, 0, '0.0%')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = utils.open_df('data/mimic_data','transformed/{}'.format(label))\n",
    "df_cleaned = standard_pipeline.transform(df)\n",
    "utils.data_loss(df,df_cleaned)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"6\" halign=\"left\">lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"4\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">mmol/L</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>50813</th>\n",
       "      <th>225668</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>50813(mmol/L)</th>\n",
       "      <th>818</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100003</th>\n",
       "      <th>2150-04-17 19:12:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-08 10:58:00</th>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-03-31 00:44:00</th>\n",
       "      <td>3.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-02 14:10:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100009</th>\n",
       "      <th>2162-05-17 13:19:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162-05-17 17:14:00</th>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">100010</th>\n",
       "      <th>2109-12-10 10:25:00</th>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 12:11:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:05:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:58:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100011</th>\n",
       "      <th>2177-08-29 04:44:00</th>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-08-29 06:55:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100012</th>\n",
       "      <th>2177-03-14 07:38:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-14 11:42:00</th>\n",
       "      <td>2.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 08:05:00</th>\n",
       "      <td>2.1</td>\n",
       "      <td>2.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 14:01:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 21:42:00</th>\n",
       "      <td>1.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100016</th>\n",
       "      <th>2188-05-24 12:00:00</th>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100017</th>\n",
       "      <th>2103-03-11 05:10:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100018</th>\n",
       "      <th>2176-08-29 15:29:00</th>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 09:23:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 10:19:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 11:29:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 12:40:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100020</th>\n",
       "      <th>2142-11-30 21:54:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2142-12-03 00:17:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100024</th>\n",
       "      <th>2170-09-19 10:25:00</th>\n",
       "      <td>1.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-19 16:33:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-20 02:04:00</th>\n",
       "      <td>3.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199976</th>\n",
       "      <th>2182-02-14 11:15:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-16 03:57:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-19 03:59:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-20 03:31:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-21 04:55:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199979</th>\n",
       "      <th>2182-02-06 09:17:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-06 14:16:00</th>\n",
       "      <td>3.8</td>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">199981</th>\n",
       "      <th>2110-09-24 16:34:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-24 20:09:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-25 06:10:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199987</th>\n",
       "      <th>2175-05-19 16:30:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">199988</th>\n",
       "      <th>2169-01-24 12:48:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 01:35:00</th>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 11:18:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 16:43:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 22:35:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-10 05:33:00</th>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199993</th>\n",
       "      <th>2161-11-12 23:14:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-11-13 03:46:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">199994</th>\n",
       "      <th>2188-07-07 21:23:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 03:09:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 04:13:00</th>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 06:20:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-20 10:52:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 12:36:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:33:00</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:59:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 20:43:00</th>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-04 20:55:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 15:29:00</th>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>177451 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      lactate                                        \n",
       "status                       known         unknown                        \n",
       "variable_type                   qn              qn                nom     \n",
       "units                       mmol/L        no_units           no_units     \n",
       "description                  50813 225668      818 1531 50813(mmol/L)  818\n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:32:00     1.9    NaN      NaN  NaN           NaN  NaN\n",
       "100003 2150-04-17 19:12:00     1.1    1.1      NaN  NaN           NaN  NaN\n",
       "100006 2108-04-08 10:58:00     4.5    NaN      4.5  NaN           NaN  NaN\n",
       "100007 2145-03-31 00:44:00     3.1    NaN      NaN  NaN           NaN  NaN\n",
       "       2145-04-02 14:10:00     1.9    NaN      NaN  NaN           NaN  NaN\n",
       "100009 2162-05-17 13:19:00     1.1    1.1      NaN  NaN           NaN  NaN\n",
       "       2162-05-17 17:14:00     1.5    1.5      NaN  NaN           NaN  NaN\n",
       "100010 2109-12-10 10:25:00     0.6    NaN      NaN  NaN           NaN  NaN\n",
       "       2109-12-10 12:11:00     0.9    NaN      NaN  NaN           NaN  NaN\n",
       "       2109-12-10 13:05:00     1.0    NaN      NaN  NaN           NaN  NaN\n",
       "       2109-12-10 13:58:00     0.8    NaN      NaN  NaN           NaN  NaN\n",
       "100011 2177-08-29 04:44:00     3.8    NaN      NaN  NaN           NaN  NaN\n",
       "       2177-08-29 06:55:00     2.3    2.3      NaN  NaN           NaN  NaN\n",
       "100012 2177-03-14 07:38:00     2.3    NaN      NaN  NaN           NaN  NaN\n",
       "       2177-03-14 11:42:00     2.5    2.5      NaN  NaN           NaN  NaN\n",
       "       2177-03-15 08:05:00     2.1    2.1      NaN  NaN           NaN  NaN\n",
       "       2177-03-15 14:01:00     2.6    2.6      NaN  NaN           NaN  NaN\n",
       "       2177-03-15 21:42:00     1.8    1.8      NaN  NaN           NaN  NaN\n",
       "100016 2188-05-24 12:00:00     2.0    NaN      NaN  NaN           NaN  NaN\n",
       "100017 2103-03-11 05:10:00     1.1    NaN      1.1  NaN           NaN  NaN\n",
       "100018 2176-08-29 15:29:00     1.3    NaN      NaN  NaN           NaN  NaN\n",
       "       2176-08-30 09:23:00     0.9    0.9      NaN  NaN           NaN  NaN\n",
       "       2176-08-30 10:19:00     1.0    1.0      NaN  NaN           NaN  NaN\n",
       "       2176-08-30 11:29:00     0.9    0.9      NaN  NaN           NaN  NaN\n",
       "       2176-08-30 12:40:00     1.1    1.1      NaN  NaN           NaN  NaN\n",
       "100020 2142-11-30 21:54:00     1.1    NaN      NaN  NaN           NaN  NaN\n",
       "       2142-12-03 00:17:00     1.0    NaN      NaN  NaN           NaN  NaN\n",
       "100024 2170-09-19 10:25:00     1.4    1.4      NaN  NaN           NaN  NaN\n",
       "       2170-09-19 16:33:00     2.6    2.6      NaN  NaN           NaN  NaN\n",
       "       2170-09-20 02:04:00     3.2    3.2      NaN  NaN           NaN  NaN\n",
       "...                            ...    ...      ...  ...           ...  ...\n",
       "199976 2182-02-14 11:15:00     0.8    NaN      0.8  NaN           NaN  NaN\n",
       "       2182-02-16 03:57:00     0.8    NaN      0.8  NaN           NaN  NaN\n",
       "       2182-02-19 03:59:00     0.8    NaN      0.8  NaN           NaN  NaN\n",
       "       2182-02-20 03:31:00     0.7    NaN      0.7  NaN           NaN  NaN\n",
       "       2182-02-21 04:55:00     0.9    NaN      0.9  NaN           NaN  NaN\n",
       "199979 2182-02-06 09:17:00     1.0    NaN      NaN  NaN           NaN  NaN\n",
       "       2182-02-06 14:16:00     3.8    3.8      NaN  NaN           NaN  NaN\n",
       "199981 2110-09-24 16:34:00     1.1    NaN      1.1  1.1           NaN  NaN\n",
       "       2110-09-24 20:09:00     1.0    NaN      1.0  1.0           NaN  NaN\n",
       "       2110-09-25 06:10:00     1.0    NaN      1.0  1.0           NaN  NaN\n",
       "199987 2175-05-19 16:30:00     2.3    NaN      NaN  NaN           NaN  NaN\n",
       "199988 2169-01-24 12:48:00     1.0    NaN      NaN  NaN           NaN  NaN\n",
       "       2169-02-07 01:35:00     1.6    NaN      1.6  NaN           NaN  NaN\n",
       "       2169-02-07 11:18:00     1.0    NaN      1.0  NaN           NaN  NaN\n",
       "       2169-02-07 16:43:00     1.0    NaN      1.0  NaN           NaN  NaN\n",
       "       2169-02-07 22:35:00     1.1    NaN      1.1  NaN           NaN  NaN\n",
       "       2169-02-10 05:33:00     1.4    NaN      NaN  NaN           NaN  NaN\n",
       "199993 2161-11-12 23:14:00     0.9    NaN      0.9  NaN           NaN  NaN\n",
       "       2161-11-13 03:46:00     0.9    NaN      0.9  NaN           NaN  NaN\n",
       "199994 2188-07-07 21:23:00     1.0    NaN      1.0  NaN           NaN  NaN\n",
       "       2188-07-08 03:09:00     0.7    NaN      0.7  NaN           NaN  NaN\n",
       "       2188-07-08 04:13:00     0.6    NaN      0.6  NaN           NaN  NaN\n",
       "       2188-07-08 06:20:00     0.7    NaN      0.7  NaN           NaN  NaN\n",
       "199998 2119-02-20 10:52:00     1.1    1.1      1.1  1.1           NaN  NaN\n",
       "       2119-02-20 12:36:00     1.9    1.9      1.9  1.9           NaN  NaN\n",
       "       2119-02-20 13:33:00     2.0    2.0      2.0  2.0           NaN  NaN\n",
       "       2119-02-20 13:59:00     2.6    2.6      2.6  2.6           NaN  NaN\n",
       "       2119-02-20 20:43:00     1.3    1.3      1.3  1.3           NaN  NaN\n",
       "199999 2136-04-04 20:55:00     1.9    NaN      NaN  NaN           NaN  NaN\n",
       "       2136-04-06 15:29:00     1.8    NaN      NaN  NaN           NaN  NaN\n",
       "\n",
       "[177451 rows x 6 columns]"
      ]
     },
     "execution_count": 216,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cleaned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_invalid = df_cleaned.loc[:,df_cleaned.columns.get_level_values('status') == 'unknown']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_invalid.columns.set_labels([0]*df_invalid.columns.size,level='status',inplace=True)\n",
    "df_invalid.columns.set_levels(['known'],level='status',inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"10\" halign=\"left\">lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"6\" halign=\"left\">known</th>\n",
       "      <th colspan=\"4\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">nom</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">mmol/L</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>50813(mmol/L)</th>\n",
       "      <th>818</th>\n",
       "      <th>50813</th>\n",
       "      <th>225668</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>50813(mmol/L)</th>\n",
       "      <th>818</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100003</th>\n",
       "      <th>2150-04-17 19:12:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-08 10:58:00</th>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-03-31 00:44:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-02 14:10:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100009</th>\n",
       "      <th>2162-05-17 13:19:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162-05-17 17:14:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">100010</th>\n",
       "      <th>2109-12-10 10:25:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 12:11:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:05:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:58:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100011</th>\n",
       "      <th>2177-08-29 04:44:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-08-29 06:55:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.3</td>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100012</th>\n",
       "      <th>2177-03-14 07:38:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-14 11:42:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 08:05:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.1</td>\n",
       "      <td>2.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 14:01:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 21:42:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100016</th>\n",
       "      <th>2188-05-24 12:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100017</th>\n",
       "      <th>2103-03-11 05:10:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100018</th>\n",
       "      <th>2176-08-29 15:29:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 09:23:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 10:19:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 11:29:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 12:40:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100020</th>\n",
       "      <th>2142-11-30 21:54:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2142-12-03 00:17:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100024</th>\n",
       "      <th>2170-09-19 10:25:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-19 16:33:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-20 02:04:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199976</th>\n",
       "      <th>2182-02-14 11:15:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-16 03:57:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-19 03:59:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-20 03:31:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-21 04:55:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199979</th>\n",
       "      <th>2182-02-06 09:17:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-06 14:16:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.8</td>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">199981</th>\n",
       "      <th>2110-09-24 16:34:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-24 20:09:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-25 06:10:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199987</th>\n",
       "      <th>2175-05-19 16:30:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">199988</th>\n",
       "      <th>2169-01-24 12:48:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 01:35:00</th>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 11:18:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 16:43:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 22:35:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-10 05:33:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199993</th>\n",
       "      <th>2161-11-12 23:14:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-11-13 03:46:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">199994</th>\n",
       "      <th>2188-07-07 21:23:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 03:09:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 04:13:00</th>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 06:20:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-20 10:52:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 12:36:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:33:00</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:59:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 20:43:00</th>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-04 20:55:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 15:29:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>177451 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                       lactate                                        \\\n",
       "status                        known                                         \n",
       "variable_type                    qn                nom          qn          \n",
       "units                      no_units           no_units      mmol/L          \n",
       "description                     818 1531 50813(mmol/L)  818  50813 225668   \n",
       "id     datetime                                                             \n",
       "100001 2117-09-11 09:32:00      NaN  NaN           NaN  NaN    1.9    NaN   \n",
       "100003 2150-04-17 19:12:00      NaN  NaN           NaN  NaN    1.1    1.1   \n",
       "100006 2108-04-08 10:58:00      4.5  NaN           NaN  NaN    4.5    NaN   \n",
       "100007 2145-03-31 00:44:00      NaN  NaN           NaN  NaN    3.1    NaN   \n",
       "       2145-04-02 14:10:00      NaN  NaN           NaN  NaN    1.9    NaN   \n",
       "100009 2162-05-17 13:19:00      NaN  NaN           NaN  NaN    1.1    1.1   \n",
       "       2162-05-17 17:14:00      NaN  NaN           NaN  NaN    1.5    1.5   \n",
       "100010 2109-12-10 10:25:00      NaN  NaN           NaN  NaN    0.6    NaN   \n",
       "       2109-12-10 12:11:00      NaN  NaN           NaN  NaN    0.9    NaN   \n",
       "       2109-12-10 13:05:00      NaN  NaN           NaN  NaN    1.0    NaN   \n",
       "       2109-12-10 13:58:00      NaN  NaN           NaN  NaN    0.8    NaN   \n",
       "100011 2177-08-29 04:44:00      NaN  NaN           NaN  NaN    3.8    NaN   \n",
       "       2177-08-29 06:55:00      NaN  NaN           NaN  NaN    2.3    2.3   \n",
       "100012 2177-03-14 07:38:00      NaN  NaN           NaN  NaN    2.3    NaN   \n",
       "       2177-03-14 11:42:00      NaN  NaN           NaN  NaN    2.5    2.5   \n",
       "       2177-03-15 08:05:00      NaN  NaN           NaN  NaN    2.1    2.1   \n",
       "       2177-03-15 14:01:00      NaN  NaN           NaN  NaN    2.6    2.6   \n",
       "       2177-03-15 21:42:00      NaN  NaN           NaN  NaN    1.8    1.8   \n",
       "100016 2188-05-24 12:00:00      NaN  NaN           NaN  NaN    2.0    NaN   \n",
       "100017 2103-03-11 05:10:00      1.1  NaN           NaN  NaN    1.1    NaN   \n",
       "100018 2176-08-29 15:29:00      NaN  NaN           NaN  NaN    1.3    NaN   \n",
       "       2176-08-30 09:23:00      NaN  NaN           NaN  NaN    0.9    0.9   \n",
       "       2176-08-30 10:19:00      NaN  NaN           NaN  NaN    1.0    1.0   \n",
       "       2176-08-30 11:29:00      NaN  NaN           NaN  NaN    0.9    0.9   \n",
       "       2176-08-30 12:40:00      NaN  NaN           NaN  NaN    1.1    1.1   \n",
       "100020 2142-11-30 21:54:00      NaN  NaN           NaN  NaN    1.1    NaN   \n",
       "       2142-12-03 00:17:00      NaN  NaN           NaN  NaN    1.0    NaN   \n",
       "100024 2170-09-19 10:25:00      NaN  NaN           NaN  NaN    1.4    1.4   \n",
       "       2170-09-19 16:33:00      NaN  NaN           NaN  NaN    2.6    2.6   \n",
       "       2170-09-20 02:04:00      NaN  NaN           NaN  NaN    3.2    3.2   \n",
       "...                             ...  ...           ...  ...    ...    ...   \n",
       "199976 2182-02-14 11:15:00      0.8  NaN           NaN  NaN    0.8    NaN   \n",
       "       2182-02-16 03:57:00      0.8  NaN           NaN  NaN    0.8    NaN   \n",
       "       2182-02-19 03:59:00      0.8  NaN           NaN  NaN    0.8    NaN   \n",
       "       2182-02-20 03:31:00      0.7  NaN           NaN  NaN    0.7    NaN   \n",
       "       2182-02-21 04:55:00      0.9  NaN           NaN  NaN    0.9    NaN   \n",
       "199979 2182-02-06 09:17:00      NaN  NaN           NaN  NaN    1.0    NaN   \n",
       "       2182-02-06 14:16:00      NaN  NaN           NaN  NaN    3.8    3.8   \n",
       "199981 2110-09-24 16:34:00      1.1  1.1           NaN  NaN    1.1    NaN   \n",
       "       2110-09-24 20:09:00      1.0  1.0           NaN  NaN    1.0    NaN   \n",
       "       2110-09-25 06:10:00      1.0  1.0           NaN  NaN    1.0    NaN   \n",
       "199987 2175-05-19 16:30:00      NaN  NaN           NaN  NaN    2.3    NaN   \n",
       "199988 2169-01-24 12:48:00      NaN  NaN           NaN  NaN    1.0    NaN   \n",
       "       2169-02-07 01:35:00      1.6  NaN           NaN  NaN    1.6    NaN   \n",
       "       2169-02-07 11:18:00      1.0  NaN           NaN  NaN    1.0    NaN   \n",
       "       2169-02-07 16:43:00      1.0  NaN           NaN  NaN    1.0    NaN   \n",
       "       2169-02-07 22:35:00      1.1  NaN           NaN  NaN    1.1    NaN   \n",
       "       2169-02-10 05:33:00      NaN  NaN           NaN  NaN    1.4    NaN   \n",
       "199993 2161-11-12 23:14:00      0.9  NaN           NaN  NaN    0.9    NaN   \n",
       "       2161-11-13 03:46:00      0.9  NaN           NaN  NaN    0.9    NaN   \n",
       "199994 2188-07-07 21:23:00      1.0  NaN           NaN  NaN    1.0    NaN   \n",
       "       2188-07-08 03:09:00      0.7  NaN           NaN  NaN    0.7    NaN   \n",
       "       2188-07-08 04:13:00      0.6  NaN           NaN  NaN    0.6    NaN   \n",
       "       2188-07-08 06:20:00      0.7  NaN           NaN  NaN    0.7    NaN   \n",
       "199998 2119-02-20 10:52:00      1.1  1.1           NaN  NaN    1.1    1.1   \n",
       "       2119-02-20 12:36:00      1.9  1.9           NaN  NaN    1.9    1.9   \n",
       "       2119-02-20 13:33:00      2.0  2.0           NaN  NaN    2.0    2.0   \n",
       "       2119-02-20 13:59:00      2.6  2.6           NaN  NaN    2.6    2.6   \n",
       "       2119-02-20 20:43:00      1.3  1.3           NaN  NaN    1.3    1.3   \n",
       "199999 2136-04-04 20:55:00      NaN  NaN           NaN  NaN    1.9    NaN   \n",
       "       2136-04-06 15:29:00      NaN  NaN           NaN  NaN    1.8    NaN   \n",
       "\n",
       "label                                                        \n",
       "status                      unknown                          \n",
       "variable_type                    qn                nom       \n",
       "units                      no_units           no_units       \n",
       "description                     818 1531 50813(mmol/L)  818  \n",
       "id     datetime                                              \n",
       "100001 2117-09-11 09:32:00      NaN  NaN           NaN  NaN  \n",
       "100003 2150-04-17 19:12:00      NaN  NaN           NaN  NaN  \n",
       "100006 2108-04-08 10:58:00      4.5  NaN           NaN  NaN  \n",
       "100007 2145-03-31 00:44:00      NaN  NaN           NaN  NaN  \n",
       "       2145-04-02 14:10:00      NaN  NaN           NaN  NaN  \n",
       "100009 2162-05-17 13:19:00      NaN  NaN           NaN  NaN  \n",
       "       2162-05-17 17:14:00      NaN  NaN           NaN  NaN  \n",
       "100010 2109-12-10 10:25:00      NaN  NaN           NaN  NaN  \n",
       "       2109-12-10 12:11:00      NaN  NaN           NaN  NaN  \n",
       "       2109-12-10 13:05:00      NaN  NaN           NaN  NaN  \n",
       "       2109-12-10 13:58:00      NaN  NaN           NaN  NaN  \n",
       "100011 2177-08-29 04:44:00      NaN  NaN           NaN  NaN  \n",
       "       2177-08-29 06:55:00      NaN  NaN           NaN  NaN  \n",
       "100012 2177-03-14 07:38:00      NaN  NaN           NaN  NaN  \n",
       "       2177-03-14 11:42:00      NaN  NaN           NaN  NaN  \n",
       "       2177-03-15 08:05:00      NaN  NaN           NaN  NaN  \n",
       "       2177-03-15 14:01:00      NaN  NaN           NaN  NaN  \n",
       "       2177-03-15 21:42:00      NaN  NaN           NaN  NaN  \n",
       "100016 2188-05-24 12:00:00      NaN  NaN           NaN  NaN  \n",
       "100017 2103-03-11 05:10:00      1.1  NaN           NaN  NaN  \n",
       "100018 2176-08-29 15:29:00      NaN  NaN           NaN  NaN  \n",
       "       2176-08-30 09:23:00      NaN  NaN           NaN  NaN  \n",
       "       2176-08-30 10:19:00      NaN  NaN           NaN  NaN  \n",
       "       2176-08-30 11:29:00      NaN  NaN           NaN  NaN  \n",
       "       2176-08-30 12:40:00      NaN  NaN           NaN  NaN  \n",
       "100020 2142-11-30 21:54:00      NaN  NaN           NaN  NaN  \n",
       "       2142-12-03 00:17:00      NaN  NaN           NaN  NaN  \n",
       "100024 2170-09-19 10:25:00      NaN  NaN           NaN  NaN  \n",
       "       2170-09-19 16:33:00      NaN  NaN           NaN  NaN  \n",
       "       2170-09-20 02:04:00      NaN  NaN           NaN  NaN  \n",
       "...                             ...  ...           ...  ...  \n",
       "199976 2182-02-14 11:15:00      0.8  NaN           NaN  NaN  \n",
       "       2182-02-16 03:57:00      0.8  NaN           NaN  NaN  \n",
       "       2182-02-19 03:59:00      0.8  NaN           NaN  NaN  \n",
       "       2182-02-20 03:31:00      0.7  NaN           NaN  NaN  \n",
       "       2182-02-21 04:55:00      0.9  NaN           NaN  NaN  \n",
       "199979 2182-02-06 09:17:00      NaN  NaN           NaN  NaN  \n",
       "       2182-02-06 14:16:00      NaN  NaN           NaN  NaN  \n",
       "199981 2110-09-24 16:34:00      1.1  1.1           NaN  NaN  \n",
       "       2110-09-24 20:09:00      1.0  1.0           NaN  NaN  \n",
       "       2110-09-25 06:10:00      1.0  1.0           NaN  NaN  \n",
       "199987 2175-05-19 16:30:00      NaN  NaN           NaN  NaN  \n",
       "199988 2169-01-24 12:48:00      NaN  NaN           NaN  NaN  \n",
       "       2169-02-07 01:35:00      1.6  NaN           NaN  NaN  \n",
       "       2169-02-07 11:18:00      1.0  NaN           NaN  NaN  \n",
       "       2169-02-07 16:43:00      1.0  NaN           NaN  NaN  \n",
       "       2169-02-07 22:35:00      1.1  NaN           NaN  NaN  \n",
       "       2169-02-10 05:33:00      NaN  NaN           NaN  NaN  \n",
       "199993 2161-11-12 23:14:00      0.9  NaN           NaN  NaN  \n",
       "       2161-11-13 03:46:00      0.9  NaN           NaN  NaN  \n",
       "199994 2188-07-07 21:23:00      1.0  NaN           NaN  NaN  \n",
       "       2188-07-08 03:09:00      0.7  NaN           NaN  NaN  \n",
       "       2188-07-08 04:13:00      0.6  NaN           NaN  NaN  \n",
       "       2188-07-08 06:20:00      0.7  NaN           NaN  NaN  \n",
       "199998 2119-02-20 10:52:00      1.1  1.1           NaN  NaN  \n",
       "       2119-02-20 12:36:00      1.9  1.9           NaN  NaN  \n",
       "       2119-02-20 13:33:00      2.0  2.0           NaN  NaN  \n",
       "       2119-02-20 13:59:00      2.6  2.6           NaN  NaN  \n",
       "       2119-02-20 20:43:00      1.3  1.3           NaN  NaN  \n",
       "199999 2136-04-04 20:55:00      NaN  NaN           NaN  NaN  \n",
       "       2136-04-06 15:29:00      NaN  NaN           NaN  NaN  \n",
       "\n",
       "[177451 rows x 10 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_invalid.join(df_cleaned, how='outer')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = utils.open_df('data/mimic_data','transformed/{}'.format(label))\n",
    "df_cleaned = pipeline.transform(df)\n",
    "utils.data_loss(df,df_cleaned)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(113L, 0, '0.0%')"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Secondary/optional transformation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload (transformers)\n",
    "pipeline1 = Pipeline([\n",
    "        ('combine_like_columns',transformers.combine_like_cols()),\n",
    "        ('quantitative_vales_only',transformers.quantitative_only()),\n",
    "        ('known_col_only',transformers.known_col_only())\n",
    "    ])\n",
    "\n",
    "pipeline2 = Pipeline([\n",
    "        ('combine_like_columns',transformers.combine_like_cols()),\n",
    "        ('max_col',transformers.max_col_only()),\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(205349L, 20, '0.0583%')"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final = pipeline1.transform(df_cleaned)\n",
    "utils.data_loss(df,df_final)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(205349L, 20, '0.0583%')"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_final = pipeline2.transform(df_cleaned)\n",
    "utils.data_loss(df,df_final)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Handle Categorical columns\n",
    "\n",
    "0. Standardize category lists\n",
    "1. Make sure ordinal & nominal within category list?\n",
    "2. Explode nominal\n",
    "3. Ordinal to numeric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def make_cat_dict(label,cat_codes):\n",
    "    df = utils.open_df(hdf5_fname, 'transformed/{}'.format(label))\n",
    "    mimic_cats = df.iloc[:,0].value_counts().sort_index().index.astype(str).tolist()\n",
    "    return dict(zip(mimic_cats,cat_codes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "category_map={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>val_numeric</th>\n",
       "      <th>val_text</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>category_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>No motor response</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Extension to pain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Flexion to pain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Withdrawal from pain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Localizing pain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Obeys commands</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>No eye opening</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2</td>\n",
       "      <td>Eye opening to pain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>3</td>\n",
       "      <td>Eye opening to verbal command</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>4</td>\n",
       "      <td>Eyes open spontaneously</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>No verbal response (&gt;2 yrs); no vocal response...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>2</td>\n",
       "      <td>Incomprehensible sounds</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>3</td>\n",
       "      <td>Inappropriate words</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>4</td>\n",
       "      <td>Confused</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>5</td>\n",
       "      <td>Oriented</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             val_numeric                                           val_text\n",
       "category_id                                                                \n",
       "0                      1                                 No motor response \n",
       "1                      2                                  Extension to pain\n",
       "2                      3                                    Flexion to pain\n",
       "3                      4                            Withdrawal from pain   \n",
       "4                      5                                    Localizing pain\n",
       "5                      6                                     Obeys commands\n",
       "6                      1                                     No eye opening\n",
       "7                      2                                Eye opening to pain\n",
       "8                      3                      Eye opening to verbal command\n",
       "9                      4                            Eyes open spontaneously\n",
       "10                     1  No verbal response (>2 yrs); no vocal response...\n",
       "11                     2                            Incomprehensible sounds\n",
       "12                     3                                Inappropriate words\n",
       "13                     4                                           Confused\n",
       "14                     5                                           Oriented"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_dict.tables.categories"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "label = data_dict.labels.GLASGOW_COMA_SCALE_MOTOR\n",
    "category_map[label] = make_cat_dict(label,range(0,6))\n",
    "label = data_dict.labels.GLASGOW_COMA_SCALE_EYE_OPENING\n",
    "category_map[label] = make_cat_dict(label,range(6,10))\n",
    "label = data_dict.labels.GLASGOW_COMA_SCALE_VERBAL\n",
    "category_map[label] = make_cat_dict(label,[10] + range(10,15))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'glasgow coma scale eye opening': {'1 No Response': 6,\n",
       "  '2 To pain': 7,\n",
       "  '3 To speech': 8,\n",
       "  '4 Spontaneously': 9},\n",
       " 'glasgow coma scale motor': {'1 No Response': 0,\n",
       "  '2 Abnorm extensn': 1,\n",
       "  '3 Abnorm flexion': 2,\n",
       "  '4 Flex-withdraws': 3,\n",
       "  '5 Localizes Pain': 4,\n",
       "  '6 Obeys Commands': 5},\n",
       " 'glasgow coma scale verbal': {'1 No Response': 10,\n",
       "  '1.0 ET/Trach': 10,\n",
       "  '2 Incomp sounds': 11,\n",
       "  '3 Inapprop words': 12,\n",
       "  '4 Confused': 13,\n",
       "  '5 Oriented': 14}}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "category_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'transformers' from 'transformers.pyc'>"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(utils)\n",
    "reload(transformers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>glasgow coma scale verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>723</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1.0 ET/Trach</th>\n",
       "      <td>444357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5 Oriented</th>\n",
       "      <td>379668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4 Confused</th>\n",
       "      <td>82434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1 No Response</th>\n",
       "      <td>20836</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2 Incomp sounds</th>\n",
       "      <td>18007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3 Inapprop words</th>\n",
       "      <td>5611</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label            glasgow coma scale verbal\n",
       "status                               known\n",
       "variable_type                          ord\n",
       "units                             no_units\n",
       "description                            723\n",
       "1.0 ET/Trach                        444357\n",
       "5 Oriented                          379668\n",
       "4 Confused                           82434\n",
       "1 No Response                        20836\n",
       "2 Incomp sounds                      18007\n",
       "3 Inapprop words                      5611"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_gcs = utils.open_df(hdf5_fname, 'transformed/{}'.format(data_dict.labels.GLASGOW_COMA_SCALE_VERBAL))\n",
    "df_gcs = standard_pipeline.transform(df_gcs)\n",
    "df_gcs.apply(lambda x: x.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>glasgow coma scale verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>723</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>465193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5 Oriented</th>\n",
       "      <td>379668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>82434</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18007</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5611</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         glasgow coma scale verbal\n",
       "status                            known\n",
       "variable_type                       ord\n",
       "units                          no_units\n",
       "description                         723\n",
       "1                                465193\n",
       "5 Oriented                       379668\n",
       "4                                 82434\n",
       "2                                 18007\n",
       "3                                  5611"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transformer = transformers.standardize_categories(data_dict,category_map,use_numeric=True)\n",
    "df_gcs = transformer.transform(df_gcs)\n",
    "df_gcs.apply(lambda x: x.value_counts())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">glasgow coma scale verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "      <th>unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "      <th>nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>723</th>\n",
       "      <th>723</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>465193.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18007.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5611.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>82434.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5 Oriented</th>\n",
       "      <td>NaN</td>\n",
       "      <td>379668.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         glasgow coma scale verbal          \n",
       "status                            known   unknown\n",
       "variable_type                       ord       nom\n",
       "units                          no_units  no_units\n",
       "description                         723       723\n",
       "1                              465193.0       NaN\n",
       "2                               18007.0       NaN\n",
       "3                                5611.0       NaN\n",
       "4                               82434.0       NaN\n",
       "5 Oriented                          NaN  379668.0"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "transformer2 = transformers.split_bad_categories(data_dict,use_numeric=True)\n",
    "transformer2.transform(df_gcs).apply(lambda x: x.value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Unified pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import icu_data_defs\n",
    "from sklearn.pipeline import Pipeline\n",
    "import transformers\n",
    "from units import MedicalUreg\n",
    "from constants import variable_type\n",
    "import utils\n",
    "import mimic\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(transformers)\n",
    "reload(utils)\n",
    "reload(mimic)\n",
    "hdf5_fname = 'data/mimic_data'\n",
    "\n",
    "#get all labels\n",
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')\n",
    "ureg = MedicalUreg('config/medical_units.txt')\n",
    "\n",
    "agg_func = lambda x:x.iloc[0]\n",
    "var_types_to_keep = [variable_type.QUANTITATIVE,variable_type.ORDINAL]\n",
    "category_map = {\n",
    "    data_dict.labels.GLASGOW_COMA_SCALE_EYE_OPENING: {\n",
    "        '1 No Response': 6,\n",
    "        '2 To pain': 7,\n",
    "        '3 To speech': 8,\n",
    "        '4 Spontaneously': 9\n",
    "    },\n",
    " data_dict.labels.GLASGOW_COMA_SCALE_MOTOR: {\n",
    "        '1 No Response': 0,\n",
    "        '2 Abnorm extensn': 1,\n",
    "        '3 Abnorm flexion': 2,\n",
    "        '4 Flex-withdraws': 3,\n",
    "        '5 Localizes Pain': 4,\n",
    "        '6 Obeys Commands': 5\n",
    "    },\n",
    " data_dict.labels.GLASGOW_COMA_SCALE_VERBAL: {\n",
    "        '1 No Response': 10,\n",
    "        '1.0 ET/Trach': 10,\n",
    "        '2 Incomp sounds': 11,\n",
    "        '3 Inapprop words': 12,\n",
    "        '4 Confused': 13,\n",
    "        '5 Oriented':14\n",
    "    }\n",
    "}\n",
    "\n",
    "mimic_transform = Pipeline([\n",
    "    ('clean',mimic.clean_extract()),\n",
    "    ('unstack',mimic.unstacker()),\n",
    "    ('add_level',transformers.add_level(None,'label',axis=1)),\n",
    "])\n",
    "\n",
    "standard_pipeline = Pipeline([\n",
    "        ('drop_small_columns',transformers.remove_small_columns(threshold=5)),\n",
    "        ('aggregate_same_datetime',transformers.same_index_aggregator(agg_func)),\n",
    "        ('split_dtype',transformers.split_dtype()),\n",
    "        ('standardize_columns',transformers.standardize_columns(data_dict,ureg)),\n",
    "        ('standardize_categories',transformers.standardize_categories(data_dict,category_map)),\n",
    "        ('split_bad_categories',transformers.split_bad_categories(data_dict))\n",
    "    ])\n",
    "\n",
    "cleaning_pipeline = Pipeline([\n",
    "        ('drop_small_columns',transformers.remove_small_columns(threshold=50)),\n",
    "        ('combine_like_columns',transformers.combine_like_cols()),\n",
    "        ('quantitative_only',transformers.filter_var_type(var_types_to_keep)),\n",
    "        ('known_col_only',transformers.known_col_only()),\n",
    "#         ('max_col',transformers.max_col_only())        \n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>817373.000000</td>\n",
       "      <td>817373</td>\n",
       "      <td>771272.000000</td>\n",
       "      <td>817373</td>\n",
       "      <td>817373.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>536890</td>\n",
       "      <td>446614.000000</td>\n",
       "      <td>6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2159-09-30 01:00:00</td>\n",
       "      <td>99.999996</td>\n",
       "      <td>mL/hour</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>9</td>\n",
       "      <td>25385.000000</td>\n",
       "      <td>719922</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2100-06-08 04:23:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>last</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2209-08-07 14:27:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>150437.603989</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>222346.358593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>28720.645234</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>23242.543767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>100001.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4647.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>125824.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>225158.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>150715.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>225158.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>175466.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>225158.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>199984.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>225158.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id             datetime          value    units  \\\n",
       "count   817373.000000               817373  771272.000000   817373   \n",
       "unique            NaN               536890  446614.000000        6   \n",
       "top               NaN  2159-09-30 01:00:00      99.999996  mL/hour   \n",
       "freq              NaN                    9   25385.000000   719922   \n",
       "first             NaN  2100-06-08 04:23:00            NaN      NaN   \n",
       "last              NaN  2209-08-07 14:27:00            NaN      NaN   \n",
       "mean    150437.603989                  NaN            NaN      NaN   \n",
       "std      28720.645234                  NaN            NaN      NaN   \n",
       "min     100001.000000                  NaN            NaN      NaN   \n",
       "25%     125824.000000                  NaN            NaN      NaN   \n",
       "50%     150715.000000                  NaN            NaN      NaN   \n",
       "75%     175466.000000                  NaN            NaN      NaN   \n",
       "max     199984.000000                  NaN            NaN      NaN   \n",
       "\n",
       "               itemid  \n",
       "count   817373.000000  \n",
       "unique            NaN  \n",
       "top               NaN  \n",
       "freq              NaN  \n",
       "first             NaN  \n",
       "last              NaN  \n",
       "mean    222346.358593  \n",
       "std      23242.543767  \n",
       "min       4647.000000  \n",
       "25%     225158.000000  \n",
       "50%     225158.000000  \n",
       "75%     225158.000000  \n",
       "max     225158.000000  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label = 'normal saline'\n",
    "df = utils.open_df(hdf5_fname,'extract/{}'.format(label))\n",
    "df.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"17\" halign=\"left\">normal saline</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL/hour</th>\n",
       "      <th>mL</th>\n",
       "      <th>ml</th>\n",
       "      <th>mL/hour</th>\n",
       "      <th>no_units</th>\n",
       "      <th>ml</th>\n",
       "      <th>mL</th>\n",
       "      <th>ml</th>\n",
       "      <th>mL/min</th>\n",
       "      <th>L</th>\n",
       "      <th colspan=\"2\" halign=\"left\">ml</th>\n",
       "      <th>no_units</th>\n",
       "      <th>ml</th>\n",
       "      <th>mL/hour</th>\n",
       "      <th>ml</th>\n",
       "      <th>mL/hour</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>225158</th>\n",
       "      <th>225158</th>\n",
       "      <th>30190</th>\n",
       "      <th>30190</th>\n",
       "      <th>30190</th>\n",
       "      <th>225158</th>\n",
       "      <th>30190</th>\n",
       "      <th>4647</th>\n",
       "      <th>225158</th>\n",
       "      <th>225158</th>\n",
       "      <th>6190</th>\n",
       "      <th>44053</th>\n",
       "      <th>44053</th>\n",
       "      <th>44440</th>\n",
       "      <th>44440</th>\n",
       "      <th>41913</th>\n",
       "      <th>41913</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>719400.000000</td>\n",
       "      <td>39402.0</td>\n",
       "      <td>5167.0</td>\n",
       "      <td>259.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>6730.000000</td>\n",
       "      <td>50.0</td>\n",
       "      <td>8</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>443511.000000</td>\n",
       "      <td>147.0</td>\n",
       "      <td>243.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4233.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>99.999996</td>\n",
       "      <td>500.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>49.999999</td>\n",
       "      <td>0.0</td>\n",
       "      <td>given</td>\n",
       "      <td>100.0</td>\n",
       "      <td>1000.0</td>\n",
       "      <td>given</td>\n",
       "      <td>17.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>25193.000000</td>\n",
       "      <td>11589.0</td>\n",
       "      <td>651.0</td>\n",
       "      <td>259.0</td>\n",
       "      <td>160.0</td>\n",
       "      <td>352.000000</td>\n",
       "      <td>50.0</td>\n",
       "      <td>8</td>\n",
       "      <td>60.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label        normal saline                                                 \\\n",
       "units              mL/hour       mL      ml mL/hour no_units           ml   \n",
       "description         225158   225158  30190   30190    30190        225158   \n",
       "count        719400.000000  39402.0  5167.0   259.0    160.0  6730.000000   \n",
       "unique       443511.000000    147.0   243.0     1.0      1.0  4233.000000   \n",
       "top              99.999996    500.0     1.0     0.0      0.0    49.999999   \n",
       "freq          25193.000000  11589.0   651.0   259.0    160.0   352.000000   \n",
       "\n",
       "label                                                                   \\\n",
       "units           mL     ml mL/min       L     ml        no_units     ml   \n",
       "description 30190  4647   225158  225158 6190   44053    44053  44440    \n",
       "count         50.0      8   80.0     1.0      1    3.0      2.0    5.0   \n",
       "unique         1.0      1    6.0     1.0      1    2.0      1.0    2.0   \n",
       "top            0.0  given  100.0  1000.0  given   17.0      0.0   37.0   \n",
       "freq          50.0      8   60.0     1.0      1    2.0      2.0    3.0   \n",
       "\n",
       "label                               \n",
       "units       mL/hour     ml mL/hour  \n",
       "description  44440  41913   41913   \n",
       "count           1.0    2.0     1.0  \n",
       "unique          1.0    1.0     1.0  \n",
       "top             0.0   20.0     0.0  \n",
       "freq            1.0    2.0     1.0  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mimic_transform.set_params(add_level__level_val=label)\n",
    "df_tr = mimic_transform.transform(df)\n",
    "df_tr.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((770238, 17), (504923, 9), 265320L, 4, '0.0202% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"9\" halign=\"left\">normal saline</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "      <th>unknown</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th colspan=\"2\" halign=\"left\">mL</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th>no_units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">mL</th>\n",
       "      <th>mL/min</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>225158(mL/hour)</th>\n",
       "      <th>225158</th>\n",
       "      <th>30190(ml)</th>\n",
       "      <th>30190(mL/hour)</th>\n",
       "      <th>30190</th>\n",
       "      <th>225158(ml)</th>\n",
       "      <th>30190</th>\n",
       "      <th>225158</th>\n",
       "      <th>4647(ml)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>456226.000000</td>\n",
       "      <td>39261.000000</td>\n",
       "      <td>5068.000000</td>\n",
       "      <td>258.0</td>\n",
       "      <td>157.0</td>\n",
       "      <td>4844.000000</td>\n",
       "      <td>50.0</td>\n",
       "      <td>80.000000</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>given</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>75.669716</td>\n",
       "      <td>471.560439</td>\n",
       "      <td>4.255024</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>128.094818</td>\n",
       "      <td>0.0</td>\n",
       "      <td>103.312500</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>217.566524</td>\n",
       "      <td>372.084861</td>\n",
       "      <td>8.418981</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>260.186534</td>\n",
       "      <td>0.0</td>\n",
       "      <td>55.885358</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-906.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>6.000000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>0.850000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.310294</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>15.000000</td>\n",
       "      <td>500.000000</td>\n",
       "      <td>1.100000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>62.255889</td>\n",
       "      <td>500.000000</td>\n",
       "      <td>2.400000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.000001</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>51947.999400</td>\n",
       "      <td>11000.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5000.000160</td>\n",
       "      <td>0.0</td>\n",
       "      <td>300.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label           normal saline                                            \\\n",
       "status                  known                                             \n",
       "variable_type              qn                                             \n",
       "units                   mL/hr            mL                       mL/hr   \n",
       "description   225158(mL/hour)        225158    30190(ml) 30190(mL/hour)   \n",
       "count           456226.000000  39261.000000  5068.000000          258.0   \n",
       "unique                    NaN           NaN          NaN            NaN   \n",
       "top                       NaN           NaN          NaN            NaN   \n",
       "freq                      NaN           NaN          NaN            NaN   \n",
       "mean                75.669716    471.560439     4.255024            0.0   \n",
       "std                217.566524    372.084861     8.418981            0.0   \n",
       "min               -906.000000      0.000000     0.000000            0.0   \n",
       "25%                  6.000000    200.000000     0.850000            0.0   \n",
       "50%                 15.000000    500.000000     1.100000            0.0   \n",
       "75%                 62.255889    500.000000     2.400000            0.0   \n",
       "max              51947.999400  11000.000000   117.000000            0.0   \n",
       "\n",
       "label                                                           \n",
       "status         unknown        known           unknown           \n",
       "variable_type       qn           qn                qn      nom  \n",
       "units         no_units           mL            mL/min no_units  \n",
       "description      30190   225158(ml) 30190      225158 4647(ml)  \n",
       "count            157.0  4844.000000  50.0   80.000000        8  \n",
       "unique             NaN          NaN   NaN         NaN        1  \n",
       "top                NaN          NaN   NaN         NaN    given  \n",
       "freq               NaN          NaN   NaN         NaN        8  \n",
       "mean               0.0   128.094818   0.0  103.312500      NaN  \n",
       "std                0.0   260.186534   0.0   55.885358      NaN  \n",
       "min                0.0     0.000000   0.0    0.500000      NaN  \n",
       "25%                0.0     9.310294   0.0  100.000000      NaN  \n",
       "50%                0.0    25.000000   0.0  100.000000      NaN  \n",
       "75%                0.0   100.000001   0.0  100.000000      NaN  \n",
       "max                0.0  5000.000160   0.0  300.000000      NaN  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cln1 = standard_pipeline.transform(df_tr)\n",
    "print utils.data_loss(df_tr,df_cln1)\n",
    "df_cln1.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "this step: ((504923, 9), (504874, 2), 295L, 1, '0.0051% records')\n",
      "overall: ((770238, 17), (504874, 2), 265615L, 5, '0.0253% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">normal saline</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>49173.000000</td>\n",
       "      <td>456484.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>389.563178</td>\n",
       "      <td>75.626948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>380.287765</td>\n",
       "      <td>217.512467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>-906.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>250.000000</td>\n",
       "      <td>15.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>500.000000</td>\n",
       "      <td>62.204886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>11000.000000</td>\n",
       "      <td>51947.999400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         normal saline               \n",
       "status                known               \n",
       "variable_type            qn               \n",
       "units                    mL          mL/hr\n",
       "count          49173.000000  456484.000000\n",
       "mean             389.563178      75.626948\n",
       "std              380.287765     217.512467\n",
       "min                0.000000    -906.000000\n",
       "25%              100.000000       6.000000\n",
       "50%              250.000000      15.000000\n",
       "75%              500.000000      62.204886\n",
       "max            11000.000000   51947.999400"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cln2 = cleaning_pipeline.transform(df_cln1)\n",
    "print 'this step:',utils.data_loss(df_cln1,df_cln2)\n",
    "print 'overall:',utils.data_loss(df_tr,df_cln2)\n",
    "df_cln2.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[<matplotlib.axes._subplots.AxesSubplot object at 0x0000000043A236D8>,\n",
       "        <matplotlib.axes._subplots.AxesSubplot object at 0x0000000029BE0C18>]], dtype=object)"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAAFeCAYAAADzFKfgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X9clfX9//Hn+QGiHPBHYbd0DoyJpTMT0Fn+GE1Z2Nxn\nlVDg1Jrmr1Xb1HlDWop+0sA2P61ltvZpy88HW2K/1ubWKtJwMpdKN1xSWl/b1KwZoSUcCQ6c9/cP\nP5yJggeBA9r7cb/dvN0813W9r9f7fTjviyfnXOe6HMYYIwAAYBVnV3cAAAB0PgIAAAAWIgAAAGAh\nAgAAABYiAAAAYCECAAAAFrpoA8CaNWtUUlKitWvXavr06V3dnSauvPJKffrpp+3ezwsvvKB58+ZJ\nku677z7t2LGjXX3atWvXObeZPn261q5dqyNHjmjEiBFtrtWRvvGNb+h3v/tdl7TfuXOnrrzyyjbX\nbkv7tWvXauXKlefc12uvvaZHH320zf26UDGnz79PzOnzcyHN6UZPPfWUnnrqKeXk5OjJJ588rz60\n91hwUQaAPXv26MCBAxozZowiIiIUGRnZ1V1qwuFwdPg+V65cqWuvvbbN7bt3767u3bsH3aZHjx6S\nQjOGtmhNv0PVPiIiol2129u+pX1NmDBBpaWl2rdvX4fs+0LAnD5/zOnzdyHN6UZbtmzRxIkT29SH\n9h4L3G1q1cUeeeSRwF8IQ4cOVeO1jL7xjW/olltu0Y4dO/TRRx9p0qRJWrx4sSSpsLBQGzZskMvl\n0iWXXKJly5YpNjZWOTk5+vTTT/XBBx8oJSVFn3zyibp166a33npLlZWVSktLU58+fbRlyxZVVlZq\n5cqV+trXvqZ//vOf+s///E+dPHlSH3/8sa666io99NBDCg8PV0vXVvrFL36h1157TWFhYerVq5fy\n8/N16aWX6tlnn9WmTZtUX1+vTz/9VHPmzFFmZmaTttOnT9f06dM1dOhQ3XHHHfr617+uPXv26MSJ\nE/rhD3+oG2+8UZL0y1/+Uq+88oqMMerfv79yc3MVExOj6667TgkJCed8XpOSkjRs2LAmyw4cOKA5\nc+YoJydHV111VYu16+vrlZ+frx07dsjlcmn48OFasmSJnnvuOb311lv66U9/qvr6en3ta1/TT37y\nE91yyy168803lZeXp8WLF+uhhx7SgAED9N5778nn82nZsmUaNWqURo0apSFDhpyz3y+88IJefvll\n/fKXvzzr8entH374YW3evFl9+vRRYmKi9u7dq4KCAuXk5CgyMlLvvvuu/vWvf+mKK67QQw89pK98\n5SsaO3bsOWuf/rP55je/2eTx2LFjA+1zcnJa9bpqyZl9SU9P19q1a7V27dqg/bsYMKeZ06ezYU5L\nUlVVlbxery677DJJ0ptvvqmXX35ZlZWVGjRokP7rv/5LERERGjZsmCZMmKD9+/dr1apVHXcsMBeZ\nEydOmGuuucb4fL6z1l1//fVm9erVxhhj/vWvf5mrr77afPDBB+avf/2r+eY3v2mOHz9ujDHm+eef\nNzfeeKMxxpglS5aY733ve4F9LFmyxNx2222moaHBVFRUmMGDB5sNGzYYY4z5n//5HzNz5kxjjDGr\nV682v//9740xxvh8PvPtb3/bvPLKK8YYYwYPHhyo1eijjz4ySUlJpq6uzhhjzJNPPmmKioqM1+s1\nt912m/n000+NMcaUlZWZESNGBPo5d+5cY4wx06ZNMy+//LL54IMPzODBg83rr79ujDHm5ZdfNtdf\nf70xxpgXXnjBLFiwwDQ0NBhjjCksLDSzZ88+7+f4gw8+MCNGjDDvvvuuSU1NNX/7298Cy1uq/fDD\nD5t77rknUDsnJ8fk5uaaI0eOmDFjxhhjjHnjjTfM2LFjzaJFi4wxxjz44IPmiSeeMG+88YYZOnSo\n2bdvnzHGmN/85jdm2rRpre7v6c9Tc48b+zp58mRz8uRJU19fb2bPnm2mT59ujDn1M8/KyjI+n8/4\nfD5z8803m+eff77V9Rt/Ni09bqzRmtfVI488Yu6///6gNaurq83w4cNNbW1tq/t5oWJOM6fPZMuc\n/sMf/mDWrVsX2N+tt95qamtrTUNDg7n55pvNiy++aIw59fprfG2eqT3HgovuI4CDBw+qb9++crub\nf/NiwoQJkqTLLrtMl1xyiT777DNt375dkyZNUq9evSRJN998sz7++GMdOXJEkpSYmNhkH9dff72c\nTqcuvfRSde/eXePGjZMkffnLX9Znn30mSVq8eLF69+6tJ554QsuXL1dFRYW8Xm+L/b7ssst01VVX\n6eabb9bq1as1ePBgTZgwQT169NAvf/lLbd26VQ8//LAee+wx1dTUnPM5CAsL09e//nVJ0pAhQwJ9\nev3117Vnzx7dcsstuummm7RhwwYdPHjwnPtqSV1dnW6//XZdddVVTVJsS7X/8pe/KCsrS07nqZfU\n9OnTtW3bNvXr10+XXXaZ3nrrLf3lL3/R3LlztXPnTkmnPr9qTNj9+vXT4MGDz9pvR3njjTeUmpqq\n7t27y+Vy6dZbb22yfty4cXK73XK73UpISOjw+lLrXletFRkZKY/HE3gNX8yY08zptvgizOmioqLA\n61s69VoPDw+X0+lUQkKCjh07FliXlJTU7D7acyy46AKA0+lUQ0NDi+sjIiKaPDbGyO/3n7Wd3+9X\nfX29JJ31eWN4eHiTx80dmBYsWKBNmzapf//++t73vhf0LS2Hw6GCggLl5+erd+/eysvL0wMPPKCj\nR4/qpptu0kcffaTk5GT96Ec/Oud+pFMT9vT9mv97e9Lv92v27Nn63e9+p9/97nd67rnn9NRTTwXd\nX0seffRRvf3223r11VdbVft0DQ0Ngef3m9/8poqLi1VSUqK0tDT169dPf/rTn9S9e3cNGDBAktSt\nW7dm99saZ3626fP5ztqmW7duTfZ5+jikpq+bttQ/ffvm6kute12dj4aGBrlcrnbt40LAnGZOn8mG\nOe3z+XTw4MEmH+O09LOQFDiXozltPRZcdAFgwIABqqysVF1dXavbjBs3Ti+99FIgTT333HPq3bu3\nYmNj29yPkpIS3XXXXZo0aZKMMdqzZ885D2L79u3T5MmTFR8frzlz5uiOO+7Qvn379NZbb6lPnz6a\nP3++xowZo61bt0rSOV+sLa0bO3asnnnmGVVXV0uSfv7znys7O7tN4wsLC9OIESO0atUq5ebmqrKy\nMmjtp59+WvX19fL7/frtb3+rMWPGSJImTpyozZs3y+/369JLL9V1112nn/70p0pLS2tT387Uu3dv\nvfvuu6qrq1N9fb22bNly1jYpKSl66aWXVFVVJb/frxdffLFDaktSnz59tHfvXknSoUOHtH///nbt\nrzUHqurqatXW1uryyy9vV60LAXOaOX0mG+b0jh07gp4j0BrtORZcdCcBRkVFKTk5WW+88Ubg7ZZG\nZ6bGxsfXXXedbr/9dt1+++2STr24Hn/88VbVa+nM2QULFuiuu+5Sr1691L17d40aNUqHDh1qsc2V\nV16pSZMm6ZZbblGPHj3UvXt33XfffYqLi9Nzzz2nG264QZGRkRo2bJj69Olz1tt8p++zpT5lZGTo\n448/1m233San06nLL79ceXl5Z2130003adWqVRo6dGjQcY8aNUrf+ta3dO+992rZsmUt1v7+97+v\nBx98UDfddJMaGhp09dVXa+nSpZKk+Ph4ORyOwBnPY8eO1WOPPabU1NQW6zdnzpw5ysrK0vXXX99k\n+dixYzVq1CilpaWpb9+++trXvnbWhB01apRmzJihrKwsRUREqF+/fudVe+/evbrvvvua/frR/Pnz\ntWTJEr3++uu64oorNGrUqKD7O9cZ2Zs2bdILL7wQeDx48GA9/fTTTbbZvn27rr/++rP+6rkYMaeZ\n0zbO6cGDB2vy5Mmt6uu59t2uY8F5nzVwAXjzzTfNnDlzurobF62HHnrIvPPOO13djfO2adMm8+qr\nr3bIvv785z8HThhqrTNPQupKM2bMMPv37+/qbnQY5nT7MKcv/jndVu05FgT9CMAYo9zcXGVmZmrG\njBk6fPhwk/VbtmxRenq6MjMz9cwzz7SqTV5engoLCwOPN23apClTpigzM1Ovv/560NAyYsQIXXHF\nFdq+fXtrMg7O0L9//3ZdDKOruN3us/5S6CxHjx5VVlZWl9Q+U1FRkUaOHBn0K2Dn0pHz+tChQ5o6\ndaqmTZumFStWBPaxfv163XrrrbrtttsCFyupra3VD37wA333u9/V3Llzdfz4cUnM6fZiTp+/C2lO\nt1W7jwXBEsIrr7xilixZYow59XWW+fPnB9b5fD6TmppqqqqqTF1dnZkyZYqprKxssU1lZaW58847\nTWpqqtm4caMxxpiKigozefJk4/P5TFVVlZk8eXLgazUAQqMj5/W8efPMrl27jDHGLFu2zLz66qvm\n0KFDZsqUKYF9ZmZmmv3795snn3zSPPLII8YYY/74xz+alStXdsp4AZwt6DsApaWlgc/lhg8fHjgx\nQjp1QYnY2Fh5PB6FhYUpOTlZO3fuPKtNeXm5JOnkyZO655579B//8R+Bffz9739XUlKS3G63PB6P\n4uLi2n3CBYBz68h5XV5eruTkZEnS+PHjtWPHDvXr109PPPFEYJ8NDQ3q1q2bSktLNX78+CbbAuga\nQQNAdXW1oqKiAo/dbnfg6yFnruvRo0fgykanL3e5XPL7/frSl76kq6+++pz7b9wHgNDpqHnd0NDQ\n5AznyMhIVVVVyeVyBb6jv3r1ag0ZMkSxsbGqrq6Wx+MJbNt4djuAzhc0AHg8niYXw/D7/YELQ3g8\nniYT2Ov1qmfPnuds09z+z9xHdHT0OftkzuP7nADO1lHz2uVyNZnbp8/furo6LVq0SDU1NcrNzT2r\n7pmBoiXMdyA0gn4NMDExUVu3blVaWprKysqanGwQHx+vgwcP6sSJE4qIiNDu3bs1a9YsSWqxzZmu\nvvpq/fznP1ddXZ1qa2v1/vvva9CgQefsk8PhUEVF171LEBMT1aX1L4Q+2F7/QuhDTEzwX54t6ch5\nPWTIEO3atUsjR47Utm3bNHr0aEmnvkp17bXX6s4772xSt7i4WMOGDVNxcXHgo4Nz6ez53tk/1y9y\nvS/y2Dq7Xnvme0uCBoDU1FSVlJQEbmSRl5enzZs3q6amRhkZGcrJydHMmTNljFF6err69u3bbJuW\nXHrppZo+fbqmTp0qY4wWLlx41tWVAHSsjpzX2dnZWrp0qXw+n+Lj45WWlqaioiLt3r1bPp9PxcXF\ncjgcWrRokbKyspSdna2pU6cqPDxca9as6bLnALCdw1yk76919V9e/PVpd/0LoQ+h+IvgQvVF/avu\ni17vizy2zq4Xivl+0V0KGAAAtB8BAAAACxEAAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAA\nCxEAAACwEAEAAAALEQAAALAQAQAAAAsFvRsgAHzRPfviS3r7/X9JkrpFhKn2c1+r245JHKQJXx8b\nqq4BIUMAAGC9f35YqX/Wxp56UHt+bfsd+rDjOwR0Aj4CAADAQgQAAAAsRAAAAMBCBAAAACxEAAAA\nwEIEAAAALEQAAADAQgQAAAAsRAAAAMBCBAAAACxEAAAAwEIX5b0AMmf/RGHde7VrHyc/O6pHV98n\nl8vVQb0CAODicVEGgEpfb0VED2rXPj7318oY00E9AgDg4sJHAAAAWIgAAACAhQgAAABYiAAAAICF\nCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgA\nAABYiAAAAICFCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFiIAAAAgIUIAAAA\nWIgAAACAhdzBNjDGaPny5dq/f7/Cw8O1atUqDRgwILB+y5YtWrdundxut6ZMmaKMjIwW2xw6dEhL\nliyR0+nUoEGDlJubK0n6zW9+o82bN8vlcmnu3LmaOHFi6EYMAACCvwNQVFSkuro6bdy4UYsWLVJe\nXl5gXX19vfLz87V+/XoVFBSosLBQx44da7FNXl6eFi5cqA0bNsjv96uoqEhVVVUqKCjQM888o1//\n+td64IEHQjdaAAAgqRUBoLS0VOPGjZMkDR8+XHv37g2sO3DggGJjY+XxeBQWFqbk5GTt3LnzrDbl\n5eWSpPLyciUnJ0uSxo8frx07dqh79+7q37+/vF6vTp48KaeTTyUAAAi1oB8BVFdXKyoq6t8N3G75\n/X45nc6z1vXo0UNVVVXyer1NlrtcLjU0NMgYE1gWGRmpqqoqSdJll12mG2+8UcYYzZkzp0MGBgAA\nWhY0AHg8Hnm93sDjxl/+jeuqq6sD67xer3r27NlsG5fL1eSve6/Xq+joaG3btk2ffPKJtm7dKmOM\nZs2apcTERA0bNqxDBtgSp9OhmJgoud1Bn4JmxcREBd8oxLq6D7bXv1D6AABtEfS3X2JiorZu3aq0\ntDSVlZUpISEhsC4+Pl4HDx7UiRMnFBERod27d2vWrFmS1GybIUOGaNeuXRo5cqS2bdum0aNHKzo6\nWhEREQoLC5MkRUVFBd4ZCCW/36iioqpNASAmJkoVFaHv44XcB9vrXwh9IHwAaI+gv/1SU1NVUlKi\nzMxMSadO5Nu8ebNqamqUkZGhnJwczZw5U8YYpaenq2/fvs22kaTs7GwtXbpUPp9P8fHxSktLk8Ph\n0I4dO3TrrbfK6XQqKSlJ1113XQiHDAAAggYAh8OhFStWNFk2cODAwP9TUlKUkpIStI0kxcXFqaCg\n4Kzl99xzj+65557W9hkAALQTp9wDAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGCh\ntl0HF8BFrTNu8y1Jx44dU1ZWlv7whz8oPDxc0qkbgcXFxUmSRowYoQULFnTq2AGcQgAALHT6Lbv3\n7NmjvLw8rVu3TtK/b/P9/PPPq1u3bsrKytKECRNUWlrabJvG23wnJycrNzdXRUVFmjhxorZv3641\na9aosrIyUPfQoUMaOnSoHnvssa4aOoD/w0cAgIVCfZtv6dRdQNevX6+ePXsG9r13714dPXpUM2bM\n0Ny5c/WPf/yjU8YL4Gy8AwBYqDNu833ttddKUpP1ffv21dy5c3XDDTeotLRUixcv1rPPPhuycQJo\nGQEAsFCob/N9OofDEfj/V7/6VblcLklSUlKSKioqOnZgAFqNAABYKNS3+T7d6e8ArF27Vr169dKd\nd96pffv26fLLL29Vf0N96+Pu3cOkz9rWNjKyW7v719m3du7Mel/ksXVFvY5EAAAsFOrbfJ/u9HcA\n5syZo8WLF6u4uFhutzuwj2AqKqo6Ytgtqqnxtbmt11vbrv7FxESFfHxdVe+LPLbOrheKoEEAACzU\nGbf5bvTaa68F/h8dHa3HH3+8jb0G0JH4FgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAh\nAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIA\nAAAWIgAAAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAhAgAAABYiAAAAYCECAAAA\nFiIAAABgIQIAAAAWIgAAAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAhAgAAABYi\nAAAAYCF3sA2MMVq+fLn279+v8PBwrVq1SgMGDAis37Jli9atWye3260pU6YoIyOjxTaHDh3SkiVL\n5HQ6NWjQIOXm5kqSiouLtW7dOknS0KFDtWzZshANFwAASK14B6CoqEh1dXXauHGjFi1apLy8vMC6\n+vp65efna/369SooKFBhYaGOHTvWYpu8vDwtXLhQGzZskN/vV1FRkbxer372s5/p8ccfV2Fhofr3\n76/jx4+HbsQAACD4OwClpaUaN26cJGn48OHau3dvYN2BAwcUGxsrj8cjSUpOTtbOnTtVVlbWpE15\nebkkqby8XMnJyZKk8ePHq6SkRBEREUpISFB+fr4OHz6sjIwM9e7du2NHCQAAmggaAKqrqxUVFfXv\nBm63/H6/nE7nWet69Oihqqoqeb3eJstdLpcaGhpkjAksi4yMVHV1tY4fP6433nhDv//97xUREaHv\nfve7GjFihGJjYztqjAAA4AxBPwLweDzyer2Bx42//BvXVVdXB9Z5vV717Nmz2TYulyvQrnHb6Oho\n9erVS8OGDVOfPn3Uo0cPJScn65133umQwQEAgOYFfQcgMTFRW7duVVpamsrKypSQkBBYFx8fr4MH\nD+rEiROKiIjQ7t27NWvWLElqts2QIUO0a9cujRw5Utu2bdPo0aM1dOhQvffee/r000/l8Xi0Z88e\n3XbbbSEa7r85nQ7FxETJ7Q76FDQrJiYq+EYh1tV9sL3+hdIHAGiLoL/9UlNTVVJSoszMTEmnTuTb\nvHmzampqlJGRoZycHM2cOVPGGKWnp6tv377NtpGk7OxsLV26VD6fT/Hx8UpLS5PD4dDChQs1c+ZM\nORwO3XjjjfrKV74SwiGf4vcbVVRUtSkAxMREqaKiKgS9unj6YHv9C6EPhA8A7RH0t5/D4dCKFSua\nLBs4cGDg/ykpKUpJSQnaRpLi4uJUUFBw1vIbb7xRN954Y2v7DAAA2okLAQEAYCECAAAAFiIAAABg\nIQIAAAAWIgAAAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAhAgAAABYiAAAAYCEC\nAAAAFiIAAABgIQIAAAAWIgAAAGAhAgAAABYiAAAAYCECAAAAFiIAAABgIQIAAAAWIgAAAGAhAgBg\nIWOMcnNzlZmZqRkzZujw4cNN1m/ZskXp6enKzMzUM888c842hw4d0tSpUzVt2jStWLGiyX6OHTum\nG264QXV1dZKk2tpa/eAHP9B3v/tdzZ07V8ePH++E0QJoDgEAsFBRUZHq6uq0ceNGLVq0SHl5eYF1\n9fX1ys/P1/r161VQUKDCwkIdO3asxTZ5eXlauHChNmzYIL/fr6KiIknS9u3bNWvWLFVWVgb2/fTT\nTyshIUFPPfWUvvOd72jdunWdO3AAAQQAwEKlpaUaN26cJGn48OHau3dvYN2BAwcUGxsrj8ejsLAw\nJScna+fOnWe1KS8vlySVl5crOTlZkjR+/Hjt2LFDkuRyubR+/Xr17NmzSd3x48eftS2Azufu6g4A\n6HzV1dWKiooKPHa73fL7/XI6nWet69Gjh6qqquT1epssd7lcamhokDEmsCwyMlJVVVWSpGuvvVaS\nmqyvrq6Wx+MJbFtdXR2aAQIIigAAWMjj8cjr9QYeN/7yb1x3+i9mr9ernj17NtvG5XIF2jVuGx0d\n3aSWw+Fotu6ZgeJcYmJat11bde8eJn3WtraRkd3a3b9Qj68r632Rx9YV9ToSAQCwUGJiorZu3aq0\ntDSVlZUpISEhsC4+Pl4HDx7UiRMnFBERod27d2vWrFmS1GybIUOGaNeuXRo5cqS2bdum0aNHN6l1\n+jsAiYmJKi4u1rBhw1RcXBz46CCYioqq9g75nGpqfG1u6/XWtqt/MTFRIR9fV9X7Io+ts+uFImgQ\nAAALpaamqqSkRJmZmZJOnci3efNm1dTUKCMjQzk5OZo5c6aMMUpPT1ffvn2bbSNJ2dnZWrp0qXw+\nn+Lj45WWltak1unvAGRlZSk7O1tTp05VeHi41qxZ00kjBnAmAgBgIYfDcdZX9gYOHBj4f0pKilJS\nUoK2kaS4uDgVFBS0WOu1114L/D8iIkIPP/xwG3sNoCPxLQAAACxEAAAAwEIEAAAALEQAAADAQgQA\nAAAsRAAAAMBCBAAAACxEAAAAwEIEAAAALEQAAADAQgQAAAAsRAAAAMBCBAAAACxEAAAAwEIEAAAA\nLEQAAADAQgQAAAAsRAAAAMBCBAAAACxEAAAAwEIEAAAALEQAAADAQgQAAAAsFDQAGGOUm5urzMxM\nzZgxQ4cPH26yfsuWLUpPT1dmZqaeeeaZc7Y5dOiQpk6dqmnTpmnFihVn1Zk9e7YKCws7amwAAKAF\nQQNAUVGR6urqtHHjRi1atEh5eXmBdfX19crPz9f69etVUFCgwsJCHTt2rMU2eXl5WrhwoTZs2CC/\n36+ioqLAvn7+85+rqqoqBEMEAABnChoASktLNW7cOEnS8OHDtXfv3sC6AwcOKDY2Vh6PR2FhYUpO\nTtbOnTvPalNeXi5JKi8vV3JysiRp/Pjx2rFjhyTp5ZdfltPp1NixYzt2dAAAoFlBA0B1dbWioqIC\nj91ut/x+f7PrevTooaqqKnm93ibLXS6XGhoaZIwJLIuMjFRVVZXee+89bd68WT/4wQ86ZEAAACA4\nd7ANPB6PvF5v4LHf75fT6Qysq66uDqzzer3q2bNns21cLlegXeO20dHRevHFF/Xxxx9rxowZOnLk\niMLDw9W/f/+QvxvgdDoUExMltzvoU9CsmJio4BuFWFf3wfb6F0ofAKAtgv72S0xM1NatW5WWlqay\nsjIlJCQE1sXHx+vgwYM6ceKEIiIitHv3bs2aNUuSmm0zZMgQ7dq1SyNHjtS2bds0evRoTZo0KbC/\ntWvXKiYmplM+CvD7jSoqqtoUAGJiolRR0bXnK3R1H2yvfyH0gfABoD2C/vZLTU1VSUmJMjMzJZ06\nkW/z5s2qqalRRkaGcnJyNHPmTBljlJ6err59+zbbRpKys7O1dOlS+Xw+xcfHKy0tLYRDAwAALQka\nABwOx1lf2Rs4cGDg/ykpKUpJSQnaRpLi4uJUUFDQYq277747WHcAAEAH4EJAAABYiAAAAICFCAAA\nAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABY\niAAAAICFCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFiIAAAAgIUIAAAAWIgA\nAACAhQgAAABYiAAAAICFCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFiIAAAA\ngIUIAADpki9qAAARWUlEQVQAWIgAAACAhQgAAABYiAAAAICF3F3dAQCdzxij5cuXa//+/QoPD9eq\nVas0YMCAwPotW7Zo3bp1crvdmjJlijIyMlpsc+jQIS1ZskROp1ODBg1Sbm6uJGnTpk0qLCxUWFiY\n5s2bp5SUFEnS+PHjFRcXJ0kaMWKEFixY0NnDByACAGCloqIi1dXVaePGjdqzZ4/y8vK0bt06SVJ9\nfb3y8/P1/PPPq1u3bsrKytKECRNUWlrabJu8vDwtXLhQycnJys3NVVFRka655hoVFBTohRde0Oef\nf66srCyNGTNGH330kYYOHarHHnusi58BAAQAwEKlpaUaN26cJGn48OHau3dvYN2BAwcUGxsrj8cj\nSUpOTtbOnTtVVlbWpE15ebkkqby8XMnJyZJO/XVfUlIip9OppKQkud1ueTwexcXFaf/+/Tp06JCO\nHj2qGTNmqHv37lqyZIkGDhzYmUMH8H84BwCwUHV1taKiogKP3W63/H5/s+t69Oihqqoqeb3eJstd\nLpcaGhpkjAksi4yMVHV19VnbNu6jb9++mjt3rv73f/9Xc+bM0eLFi0M5TADnwDsAgIU8Ho+8Xm/g\nsd/vl9PpDKyrrq4OrPN6verZs2ezbVwuV6Bd47bR0dHN7iM6Olrx8fFyuVySpKSkJFVUVLSqvzEx\nUcE3aofu3cOkz9rWNjKyW7v7F+rxdWW9L/LYuqJeRyIAABZKTEzU1q1blZaWprKyMiUkJATWxcfH\n6+DBgzpx4oQiIiK0e/duzZo1S5KabTNkyBDt2rVLI0eO1LZt2zR69GgNGzZMDz30kOrq6lRbW6v3\n339fgwYN0i9+8Qv16tVLd955p/bt26fLL7+8Vf2tqKjq+CfhNDU1vja39Xpr29W/mJiokI+vq+p9\nkcfW2fVCETQIAICFUlNTVVJSoszMTElSXl6eNm/erJqaGmVkZCgnJ0czZ86UMUbp6enq27dvs20k\nKTs7W0uXLpXP51N8fLzS0tLkcDg0ffp0TZ06VcYYLVy4UOHh4YG3/YuLi+V2uwP7AND5CACAhRwO\nh1asWNFk2ekn46WkpAS+tneuNpIUFxengoKCs5ZnZGQoIyOjybLo6Gg9/vjj7eg5gI7CSYAAAFiI\nAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICFCAAAAFgo6HUAOuO2oevXr9ef/vQnORwOjR8/Xnfd\ndVfoRgwAAIK/A3D6bUMXLVrU5MpdjbcNXb9+vQoKClRYWKhjx4612KbxtqEbNmyQ3+9XUVGRDh8+\nrM2bNwfuHb59+3a9++67oRsxAAAIHgBae9vQsLCwwG1Dz2zT0m1Dd+zYoX79+umJJ54I7LO+vl7d\nunXruBECAICzBA0AobxtaFVVlVwul3r16iVJWr16tYYMGaLY2Nj2jwwAALQo6DkAob5tqCTV1dUp\nJydHUVFRWr58ebsH1RpOp0MxMVFyu9t2O4QL4RaQXd0H2+tfKH0AgLYI+tsv1LcNlaT58+fr2muv\n1Z133hmKMTbL7zeqqKhqUwDo7FtOXoh9sL3+hdAHwgeA9gj62y/Utw0tKirS7t275fP5VFxcLIfD\noUWLFmn48OEhHDYAAHYLGgBCfdvQiRMnas+ePefTZwAA0E5cCAgAAAsRAAAAsBABAAAACxEAAACw\nEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAACxEAAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBAB\nAAAACxEAAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAACxEAAACwEAEAAAALEQAAALAQAQAA\nAAsRAAAAsBABAAAACxEAAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAACxEAAACwEAEAAAAL\nEQAAALAQAQAAAAsRAAAAsBABAAAACxEAAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAACxEA\nAACwEAEAAAALEQAAALAQAQAAAAsRAAAAsBABAAAACxEAAACwEAEAAAALEQAAALCQO9gGxhgtX75c\n+/fvV3h4uFatWqUBAwYE1m/ZskXr1q2T2+3WlClTlJGR0WKbQ4cOacmSJXI6nRo0aJByc3MlSZs2\nbVJhYaHCwsI0b948paSkhGzAALpuXtfW1mrx4sWqrKyUx+NRfn6+evfu3VVPA2C1oO8AFBUVqa6u\nThs3btSiRYuUl5cXWFdfX6/8/HytX79eBQUFKiws1LFjx1psk5eXp4ULF2rDhg3y+/0qKirSJ598\nEmj7xBNPaM2aNfL5fKEbMYAum9dPP/20EhIS9NRTT+k73/mO1q1b11VPAWC9oO8AlJaWaty4cZKk\n4cOHa+/evYF1Bw4cUGxsrDwejyQpOTlZO3fuVFlZWZM25eXlkqTy8nIlJydLksaPH6+SkhI5nU4l\nJSXJ7XbL4/EoLi5O+/fv11e/+tWOHekZjN+vAwf+n9xu13m3PX7co2PHqgOP4+KukMt1/vsBukpX\nzOt9+/aptLRUs2fPDmxLAAC6TtAAUF1draioqH83cLvl9/vldDrPWtejRw9VVVXJ6/U2We5yudTQ\n0CBjTGBZZGSkqqurz9q2cR/nYqoPyq/PWzfCFlR9tFffX/EvRXj6tGs/n1cf032zU/XlL8e2az/n\n68wQ0tlsr98RfYiPH9SBvTk/XTGvG5c3BovGbS8IDbXyV74lSXK5nWqo97e66fEIpw4ceK/NpTv7\ntdyZ9b7IY2upXlfO6/MVNAB4PB55vd7A48aDROO60yew1+tVz549m23jcrkC7Rq3jY6ObnYf0dHR\n5+xT0bOPtGJoAFrSVfP69H2cGRLOJSamddu11eoVPwrp/oELUdBzABITE1VcXCxJKisrU0JCQmBd\nfHy8Dh48qBMnTqiurk67d+/WNddcoxEjRjTbZsiQIdq1a5ckadu2bUpKStKwYcNUWlqquro6VVVV\n6f3339egQRdPggIuRl01r0/fR3FxceCjAwCdz2FOf/+uGaef+SudOuGnvLxcNTU1ysjI0Ouvv661\na9fKGKP09HRlZWU122bgwIH65z//qaVLl8rn8yk+Pl4rV66Uw+HQM888o8LCQhljNH/+fE2cODH0\nIwcs1lXz+vPPP1d2drYqKioUHh6uNWvW6JJLLunKpwKwVtAAAAAAvni4EBAAABYiAAAAYCECAAAA\nFgr6NcALSbDLl7ZHfX297r33Xh05ckQ+n0/z5s3TV77ylS65xGllZaWmTJmiJ598Ui6Xq1P78Ktf\n/UpbtmyRz+fT1KlTNXLkyE6tX19fr+zsbB05ckRut1v3339/pz0He/bs0c9+9jMVFBR0yOVty8rK\n9MADD8jtduu6667T3Xff3er677zzjlauXCmXy6Xw8HA9+OCD6tOnT0jrd4VXX31Vf/7zn7VmzRpJ\np56DVatWndXntWvXqri4WG63Wzk5Obr66qt1/Phx/fjHP1Ztba369u2rvLw8devWrdnLGLdGRx1f\nOut1FKpjVkv1/H6/7rvvPv3jH/+Q0+nUihUrFB4eHtJ50tHHwnPVuuWWWwLXqPjSl76kefPmhbRe\nKI615z3nzUXklVdeMUuWLDHGGFNWVmbmz5/fYft+7rnnzAMPPGCMMeazzz4zKSkpZt68eWbXrl3G\nGGOWLVtmXn31VVNRUWEmT55sfD6fqaqqMpMnTzZ1dXXmySefNI888ogxxpg//vGPZuXKlW3qh8/n\nM3fddZe54YYbzPvvv9+pfXjjjTfMvHnzjDHGeL1e88gjj3T6c1BUVGR+9KMfGWOMKSkpMffcc0+n\n9OG///u/zeTJk81tt91mjDEdUvM73/mOOXz4sDHGmNmzZ5t33nmn1fWnTZtm9u3bZ4wxZuPGjSY/\nPz+k9bvCypUrzaRJk8zChQsDy5rrc3l5ubn99tuNMcZ8+OGHZsqUKcYYY+6//37zwgsvGGOMefzx\nx8369euNz+czqamppqqqytTV1ZkpU6aYysrKVvWnI44vnfk6CtUxq6V6r776qrn33nuNMaeOFfPn\nzw9pvVAcC1uqVVtba26++eYmP8tQ1gvVsfZ85/xF9RHAuS5f2l6TJk3SD3/4Q0lSQ0ODXC6X3n77\n7SaXOP3rX/+qv//97y1e4nT8+PGBbXfs2NGmfqxevVpZWVnq27evjDGd2oft27crISFB3//+9zV/\n/nylpKR0+nMQFxcXuLpcVVWV3G53p/QhNjZWjz76aODxmZe3PZ+af/vb31RdXS2fz6cvfelLkqSx\nY8fqr3/9a6vrP/TQQxo8eLCkU3/phYeHh7R+V0hMTNTy5csDj5vrc0lJiUpLSzVmzBhJ0uWXXy6/\n369jx47pzTffDBwPGn/Wp1/GOCwsTElJSYFrFATTEceXznwddfQxK1i9iRMn6v7775ckffjhh+rZ\ns2dI63XksTBYrX379unkyZOaNWuW7rjjDu3Zsyek9Tr6WNvWOX9RBYCWLl/aEbp37x64XOkPf/hD\nLViwoNMvcfr888/rkksu0ZgxYwK1Tx9fqPtw/Phx7d27V7/4xS+0fPly/fjHP+7U+o3tPvjgA6Wl\npWnZsmWaPn16p/wcUlNTm9zPoT01Gy+b27js9OWtrX/ppZdKkt5880399re/1R133NHsJXo7qn4o\nPfvss/r2t7/d5N/evXs1adKkJtu11Oczn/fmfh7n2ra14+6I40tnvo46+pjVmteN0+nUkiVLtHLl\nSk2ePDlk9Tr6WBhsbBEREZo1a5Z+/etfB459oXwuO/pY29Y5f1GdA3Cuy5d2hI8++kh33323pk2b\npm9961v66U9/GlgXqkucnu7555+Xw+FQSUmJ9u/fr+zsbB0/frzT+tCrVy/Fx8fL7XZr4MCB6tat\nm44ePdqpz8H69es1btw4LViwQEePHtX06dOb3B2yM/ogqd2Xtz0zfLTmEtdn+tOf/qTHH39cv/rV\nr9S7d+9Or99R0tPTlZ6eHnS75vrcs2dPhYWFNZn31dXVio6ODmzfp0+fwLjbcmnxRqE4voT6ddTR\nx6zWvG7y8/NVWVmp9PR01dbWhqReKI6F5xpbXFycYmNjA//v1auX3n777ZDVC8Wxti1z/qJ6B+Bc\nly9tr08++USzZs3S4sWLdfPNN0uSrrrqqk69xOmGDRtUUFCggoICXXnllXrwwQc1bty4TutDUlKS\n/vKXv0iSjh49qpqaGo0ePVo7d+7stOeg8ZrzkhQVFaX6+noNGTKkU/sgtf/yth6PR+Hh4Tp8+LCM\nMdq+fbuSkpJaXf/FF1/UU089pYKCAvXv31+SdPXVV3da/a7QUp9HjBih7du3yxijDz/8UMYY9erV\nS4mJidq2bZukUz+j5ORkXXHFFU0uY7xr1y5dc801raofiuNLKF9HoThmnaveiy++qF/96leSpG7d\nusnpdOqrX/1qu+ZmS/VCcSw819iee+455efnSzp17KuurtaYMWNCMjYpNMfatsz5i+pKgKaFS5F2\nhFWrVumll17SFVdcIWOMHA6HfvKTn2jlypVdconTGTNmaMWKFXI4HJ16mdWf/exn+tvf/iZjjBYt\nWqT+/fvrvvvu67T6J0+e1L333quKigrV19fr9ttv19ChQzulD0eOHNGiRYu0cePGDrm87d///net\nWrVKfr9fY8aM0Y9+dO4bzjTW/+1vf6trr71W/fr1k8fjkcPh0KhRo3T33XeHtH5X2LlzpwoLCwPf\nAmipz2vXrtW2bdtkjFFOTo4SExNVWVmp7OxsnTx5Ur1799aaNWsUERHR7GWMW6Ojji+d9ToK1TGr\npXo1NTXKycnRJ598ovr6es2dO1dXXHFFu+dmsNdpRx4LW6rl8/mUk5OjDz/8UE6nU4sXL1avXr1C\nOrZQHGvPd85fVAEAAAB0jIvqIwAAANAxCAAAAFiIAAAAgIUIAAAAWIgAAACAhQgAAABYiAAAAICF\nCAAAAFjo/wOY67O5O3hY0wAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x43a236a0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "df_cln2.hist(normed=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Single pipeline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "all_pipeline = Pipeline([\n",
    "        ('transform',mimic_transform),\n",
    "        ('format',standard_pipeline),\n",
    "        ('clean',cleaning_pipeline)\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>382993.000000</td>\n",
       "      <td>393608</td>\n",
       "      <td>393608</td>\n",
       "      <td>393608</td>\n",
       "      <td>393608.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>187323</td>\n",
       "      <td>616</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2140-07-14 03:59:00</td>\n",
       "      <td>1.2</td>\n",
       "      <td>mmol/L</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>8</td>\n",
       "      <td>20704</td>\n",
       "      <td>393592</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2096-08-25 16:32:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>last</th>\n",
       "      <td>NaN</td>\n",
       "      <td>2210-08-19 04:56:00</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>150112.612539</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>64364.242439</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>28874.716612</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>77900.842790</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>100001.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>818.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>125190.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1531.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>149789.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50813.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>175567.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>50813.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>199999.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>225668.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   id             datetime   value   units         itemid\n",
       "count   382993.000000               393608  393608  393608  393608.000000\n",
       "unique            NaN               187323     616       2            NaN\n",
       "top               NaN  2140-07-14 03:59:00     1.2  mmol/L            NaN\n",
       "freq              NaN                    8   20704  393592            NaN\n",
       "first             NaN  2096-08-25 16:32:00     NaN     NaN            NaN\n",
       "last              NaN  2210-08-19 04:56:00     NaN     NaN            NaN\n",
       "mean    150112.612539                  NaN     NaN     NaN   64364.242439\n",
       "std      28874.716612                  NaN     NaN     NaN   77900.842790\n",
       "min     100001.000000                  NaN     NaN     NaN     818.000000\n",
       "25%     125190.000000                  NaN     NaN     NaN    1531.000000\n",
       "50%     149789.000000                  NaN     NaN     NaN   50813.000000\n",
       "75%     175567.000000                  NaN     NaN     NaN   50813.000000\n",
       "max     199999.000000                  NaN     NaN     NaN  225668.000000"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label = 'lactate'\n",
    "df = utils.open_df(hdf5_fname,'extract/{}'.format(label))\n",
    "df.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th>mmol/L</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100003</th>\n",
       "      <th>2150-04-17 19:12:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-08 10:58:00</th>\n",
       "      <td>4.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-03-31 00:44:00</th>\n",
       "      <td>3.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-02 14:10:00</th>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100009</th>\n",
       "      <th>2162-05-17 13:19:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162-05-17 17:14:00</th>\n",
       "      <td>1.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">100010</th>\n",
       "      <th>2109-12-10 10:25:00</th>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 12:11:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:05:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:58:00</th>\n",
       "      <td>0.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100011</th>\n",
       "      <th>2177-08-29 04:44:00</th>\n",
       "      <td>3.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-08-29 06:55:00</th>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100012</th>\n",
       "      <th>2177-03-14 07:38:00</th>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-14 11:42:00</th>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 08:05:00</th>\n",
       "      <td>2.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 14:01:00</th>\n",
       "      <td>2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 21:42:00</th>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100016</th>\n",
       "      <th>2188-05-24 12:00:00</th>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100017</th>\n",
       "      <th>2103-03-11 05:10:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100018</th>\n",
       "      <th>2176-08-29 15:29:00</th>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 09:23:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 10:19:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 11:29:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 12:40:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100020</th>\n",
       "      <th>2142-11-30 21:54:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2142-12-03 00:17:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100024</th>\n",
       "      <th>2170-09-19 10:25:00</th>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-19 16:33:00</th>\n",
       "      <td>2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-20 02:04:00</th>\n",
       "      <td>3.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199976</th>\n",
       "      <th>2182-02-14 11:15:00</th>\n",
       "      <td>0.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-16 03:57:00</th>\n",
       "      <td>0.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-19 03:59:00</th>\n",
       "      <td>0.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-20 03:31:00</th>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-21 04:55:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199979</th>\n",
       "      <th>2182-02-06 09:17:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-06 14:16:00</th>\n",
       "      <td>3.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">199981</th>\n",
       "      <th>2110-09-24 16:34:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-24 20:09:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-25 06:10:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199987</th>\n",
       "      <th>2175-05-19 16:30:00</th>\n",
       "      <td>2.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">199988</th>\n",
       "      <th>2169-01-24 12:48:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 01:35:00</th>\n",
       "      <td>1.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 11:18:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 16:43:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 22:35:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-10 05:33:00</th>\n",
       "      <td>1.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199993</th>\n",
       "      <th>2161-11-12 23:14:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-11-13 03:46:00</th>\n",
       "      <td>0.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">199994</th>\n",
       "      <th>2188-07-07 21:23:00</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 03:09:00</th>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 04:13:00</th>\n",
       "      <td>0.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 06:20:00</th>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-20 10:52:00</th>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 12:36:00</th>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:33:00</th>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:59:00</th>\n",
       "      <td>2.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 20:43:00</th>\n",
       "      <td>1.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-04 20:55:00</th>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 15:29:00</th>\n",
       "      <td>1.8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>177439 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      lactate\n",
       "status                       known\n",
       "variable_type                   qn\n",
       "units                       mmol/L\n",
       "id     datetime                   \n",
       "100001 2117-09-11 09:32:00     1.9\n",
       "100003 2150-04-17 19:12:00     1.1\n",
       "100006 2108-04-08 10:58:00     4.5\n",
       "100007 2145-03-31 00:44:00     3.1\n",
       "       2145-04-02 14:10:00     1.9\n",
       "100009 2162-05-17 13:19:00     1.1\n",
       "       2162-05-17 17:14:00     1.5\n",
       "100010 2109-12-10 10:25:00     0.6\n",
       "       2109-12-10 12:11:00     0.9\n",
       "       2109-12-10 13:05:00     1.0\n",
       "       2109-12-10 13:58:00     0.8\n",
       "100011 2177-08-29 04:44:00     3.8\n",
       "       2177-08-29 06:55:00     2.3\n",
       "100012 2177-03-14 07:38:00     2.3\n",
       "       2177-03-14 11:42:00     2.5\n",
       "       2177-03-15 08:05:00     2.1\n",
       "       2177-03-15 14:01:00     2.6\n",
       "       2177-03-15 21:42:00     1.8\n",
       "100016 2188-05-24 12:00:00     2.0\n",
       "100017 2103-03-11 05:10:00     1.1\n",
       "100018 2176-08-29 15:29:00     1.3\n",
       "       2176-08-30 09:23:00     0.9\n",
       "       2176-08-30 10:19:00     1.0\n",
       "       2176-08-30 11:29:00     0.9\n",
       "       2176-08-30 12:40:00     1.1\n",
       "100020 2142-11-30 21:54:00     1.1\n",
       "       2142-12-03 00:17:00     1.0\n",
       "100024 2170-09-19 10:25:00     1.4\n",
       "       2170-09-19 16:33:00     2.6\n",
       "       2170-09-20 02:04:00     3.2\n",
       "...                            ...\n",
       "199976 2182-02-14 11:15:00     0.8\n",
       "       2182-02-16 03:57:00     0.8\n",
       "       2182-02-19 03:59:00     0.8\n",
       "       2182-02-20 03:31:00     0.7\n",
       "       2182-02-21 04:55:00     0.9\n",
       "199979 2182-02-06 09:17:00     1.0\n",
       "       2182-02-06 14:16:00     3.8\n",
       "199981 2110-09-24 16:34:00     1.1\n",
       "       2110-09-24 20:09:00     1.0\n",
       "       2110-09-25 06:10:00     1.0\n",
       "199987 2175-05-19 16:30:00     2.3\n",
       "199988 2169-01-24 12:48:00     1.0\n",
       "       2169-02-07 01:35:00     1.6\n",
       "       2169-02-07 11:18:00     1.0\n",
       "       2169-02-07 16:43:00     1.0\n",
       "       2169-02-07 22:35:00     1.1\n",
       "       2169-02-10 05:33:00     1.4\n",
       "199993 2161-11-12 23:14:00     0.9\n",
       "       2161-11-13 03:46:00     0.9\n",
       "199994 2188-07-07 21:23:00     1.0\n",
       "       2188-07-08 03:09:00     0.7\n",
       "       2188-07-08 04:13:00     0.6\n",
       "       2188-07-08 06:20:00     0.7\n",
       "199998 2119-02-20 10:52:00     1.1\n",
       "       2119-02-20 12:36:00     1.9\n",
       "       2119-02-20 13:33:00     2.0\n",
       "       2119-02-20 13:59:00     2.6\n",
       "       2119-02-20 20:43:00     1.3\n",
       "199999 2136-04-04 20:55:00     1.9\n",
       "       2136-04-06 15:29:00     1.8\n",
       "\n",
       "[177439 rows x 1 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_pipeline.set_params(transform__add_level__level_val=label)\n",
    "all_pipeline.transform(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Loop over everything"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import logger"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:18:23) heart rate\n",
      "(2017-06-03 07:18:23)>> Open Extract\n",
      "(2017-06-03 07:18:26)<< DONE (3.0s)\n",
      "(2017-06-03 07:18:26)>> Run Pipeline\n",
      "(2017-06-03 07:21:19)<< DONE (173.0s)\n",
      "(2017-06-03 07:21:19)>> Analyze...\n",
      "((7952939, 1), (7922961, 1), 29978L, 171, '0.3015% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>heart rate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>beats/min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>7.922961e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.026618e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.552932e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-8.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>7.700000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>9.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.190000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>9.999999e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label            heart rate\n",
       "status                known\n",
       "variable_type            qn\n",
       "units             beats/min\n",
       "count          7.922961e+06\n",
       "mean           1.026618e+02\n",
       "std            3.552932e+03\n",
       "min           -8.800000e+01\n",
       "25%            7.700000e+01\n",
       "50%            9.200000e+01\n",
       "75%            1.190000e+02\n",
       "max            9.999999e+06"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:21:20)<< DONE (1.0s)\n",
      "(2017-06-03 07:21:20)>> Joining!\n",
      "(2017-06-03 07:21:20)<< DONE (0.0s)\n",
      "(2017-06-03 07:21:20) DONE (177.0s)\n",
      "(2017-06-03 07:21:20) blood pressure systolic\n",
      "(2017-06-03 07:21:20)>> Open Extract\n",
      "(2017-06-03 07:21:23)<< DONE (3.0s)\n",
      "(2017-06-03 07:21:23)>> Run Pipeline\n",
      "(2017-06-03 07:25:39)<< DONE (256.0s)\n",
      "(2017-06-03 07:25:39)>> Analyze...\n",
      "((6374824, 1), (5974186, 1), 548979L, 174, '0.307% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>blood pressure systolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mmHg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5.825845e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.214682e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.311542e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-6.900000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.040000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.190000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.370000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.411460e+05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         blood pressure systolic\n",
       "status                          known\n",
       "variable_type                      qn\n",
       "units                            mmHg\n",
       "count                    5.825845e+06\n",
       "mean                     1.214682e+02\n",
       "std                      1.311542e+02\n",
       "min                     -6.900000e+01\n",
       "25%                      1.040000e+02\n",
       "50%                      1.190000e+02\n",
       "75%                      1.370000e+02\n",
       "max                      1.411460e+05"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:25:40)<< DONE (1.0s)\n",
      "(2017-06-03 07:25:40)>> Joining!\n",
      "(2017-06-03 07:26:28)<< DONE (48.0s)\n",
      "(2017-06-03 07:26:28) DONE (308.0s)\n",
      "(2017-06-03 07:26:28) blood pressure diastolic\n",
      "(2017-06-03 07:26:28)>> Open Extract\n",
      "(2017-06-03 07:26:31)<< DONE (3.0s)\n",
      "(2017-06-03 07:26:31)>> Run Pipeline\n",
      "(2017-06-03 07:30:32)<< DONE (241.0s)\n",
      "(2017-06-03 07:30:32)>> Analyze...\n",
      "((6371249, 1), (5976313, 1), 543269L, 170, '0.2999% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>blood pressure diastolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mmHg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5.827980e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>6.115824e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.350768e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.600000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>5.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>5.900000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>6.900000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.141090e+05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         blood pressure diastolic\n",
       "status                           known\n",
       "variable_type                       qn\n",
       "units                             mmHg\n",
       "count                     5.827980e+06\n",
       "mean                      6.115824e+01\n",
       "std                       2.350768e+02\n",
       "min                      -1.600000e+01\n",
       "25%                       5.000000e+01\n",
       "50%                       5.900000e+01\n",
       "75%                       6.900000e+01\n",
       "max                       1.141090e+05"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:30:34)<< DONE (2.0s)\n",
      "(2017-06-03 07:30:34)>> Joining!\n",
      "(2017-06-03 07:31:25)<< DONE (51.0s)\n",
      "(2017-06-03 07:31:25) DONE (297.0s)\n",
      "(2017-06-03 07:31:25) blood pressure mean\n",
      "(2017-06-03 07:31:25)>> Open Extract\n",
      "(2017-06-03 07:31:26)<< DONE (1.0s)\n",
      "(2017-06-03 07:31:26)>> Run Pipeline\n",
      "(2017-06-03 07:32:39)<< DONE (73.0s)\n",
      "(2017-06-03 07:32:39)>> Analyze...\n",
      "((2536271, 1), (2415995, 1), 120276L, 0, '0.0% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>blood pressure mean</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mmHg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>2.415995e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>7.879668e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.413279e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.350000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>6.700000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>7.700000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>8.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.201300e+05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         blood pressure mean\n",
       "status                      known\n",
       "variable_type                  qn\n",
       "units                        mmHg\n",
       "count                2.415995e+06\n",
       "mean                 7.879668e+01\n",
       "std                  1.413279e+02\n",
       "min                 -1.350000e+02\n",
       "25%                  6.700000e+01\n",
       "50%                  7.700000e+01\n",
       "75%                  8.800000e+01\n",
       "max                  1.201300e+05"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:32:40)<< DONE (1.0s)\n",
      "(2017-06-03 07:32:40)>> Joining!\n",
      "(2017-06-03 07:33:20)<< DONE (40.0s)\n",
      "(2017-06-03 07:33:20) DONE (115.0s)\n",
      "(2017-06-03 07:33:20) respiratory rate\n",
      "(2017-06-03 07:33:20)>> Open Extract\n",
      "(2017-06-03 07:33:24)<< DONE (4.0s)\n",
      "(2017-06-03 07:33:24)>> Run Pipeline\n",
      "(2017-06-03 07:37:33)<< DONE (249.0s)\n",
      "(2017-06-03 07:37:33)>> Analyze...\n",
      "((7810019, 1), (7780015, 1), 5072936L, 172, '0.3035% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>respiratory rate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>insp/min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>2.737083e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.092347e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.423811e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.600000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2.400000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2.355555e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         respiratory rate\n",
       "status                   known\n",
       "variable_type               qn\n",
       "units                 insp/min\n",
       "count             2.737083e+06\n",
       "mean              2.092347e+01\n",
       "std               1.423811e+03\n",
       "min               0.000000e+00\n",
       "25%               1.600000e+01\n",
       "50%               2.000000e+01\n",
       "75%               2.400000e+01\n",
       "max               2.355555e+06"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:37:35)<< DONE (2.0s)\n",
      "(2017-06-03 07:37:35)>> Joining!\n",
      "(2017-06-03 07:38:29)<< DONE (54.0s)\n",
      "(2017-06-03 07:38:29) DONE (309.0s)\n",
      "(2017-06-03 07:38:29) temperature body\n",
      "(2017-06-03 07:38:29)>> Open Extract\n",
      "(2017-06-03 07:38:31)<< DONE (2.0s)\n",
      "(2017-06-03 07:38:31)>> Run Pipeline\n",
      "(2017-06-03 07:39:35)<< DONE (64.0s)\n",
      "(2017-06-03 07:39:35)>> Analyze...\n",
      "((1751447, 1), (1731794, 1), 461152L, 156, '0.3189% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>degF</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1.290295e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>9.850479e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.563484e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-9.990000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>9.760000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>9.850000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>9.950000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>9.637000e+03</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         temperature body\n",
       "status                   known\n",
       "variable_type               qn\n",
       "units                     degF\n",
       "count             1.290295e+06\n",
       "mean              9.850479e+01\n",
       "std               9.563484e+00\n",
       "min              -9.990000e+01\n",
       "25%               9.760000e+01\n",
       "50%               9.850000e+01\n",
       "75%               9.950000e+01\n",
       "max               9.637000e+03"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:39:36)<< DONE (1.0s)\n",
      "(2017-06-03 07:39:36)>> Joining!\n",
      "(2017-06-03 07:40:14)<< DONE (38.0s)\n",
      "(2017-06-03 07:40:14) DONE (105.0s)\n",
      "(2017-06-03 07:40:14) oxygen saturation pulse oximetry\n",
      "(2017-06-03 07:40:14)>> Open Extract\n",
      "(2017-06-03 07:40:17)<< DONE (3.0s)\n",
      "(2017-06-03 07:40:17)>> Run Pipeline\n",
      "(2017-06-03 07:42:42)<< DONE (145.0s)\n",
      "(2017-06-03 07:42:42)>> Analyze...\n",
      "((6099827, 1), (6073019, 1), 26808L, 163, '0.3326% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>oxygen saturation pulse oximetry</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>6.073019e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>9.885942e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.942035e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>9.600000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>9.800000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>9.900000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>6.363333e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         oxygen saturation pulse oximetry\n",
       "status                                   known\n",
       "variable_type                               qn\n",
       "units                                  percent\n",
       "count                             6.073019e+06\n",
       "mean                              9.885942e+01\n",
       "std                               2.942035e+03\n",
       "min                               0.000000e+00\n",
       "25%                               9.600000e+01\n",
       "50%                               9.800000e+01\n",
       "75%                               9.900000e+01\n",
       "max                               6.363333e+06"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:42:43)<< DONE (1.0s)\n",
      "(2017-06-03 07:42:43)>> Joining!\n",
      "(2017-06-03 07:43:35)<< DONE (52.0s)\n",
      "(2017-06-03 07:43:35) DONE (201.0s)\n",
      "(2017-06-03 07:43:35) weight body\n",
      "(2017-06-03 07:43:35)>> Open Extract\n",
      "(2017-06-03 07:43:36)<< DONE (1.0s)\n",
      "(2017-06-03 07:43:36)>> Run Pipeline\n",
      "(2017-06-03 07:43:40)<< DONE (4.0s)\n",
      "(2017-06-03 07:43:40)>> Analyze...\n",
      "((95425, 1), (94457, 1), 1956L, 158, '0.4958% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>weight body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>kg</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>9.346900e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.676828e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>3.908673e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>6.980000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>8.340000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>9.890000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>8.654765e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label           weight body\n",
       "status                known\n",
       "variable_type            qn\n",
       "units                    kg\n",
       "count          9.346900e+04\n",
       "mean           2.676828e+02\n",
       "std            3.908673e+04\n",
       "min            0.000000e+00\n",
       "25%            6.980000e+01\n",
       "50%            8.340000e+01\n",
       "75%            9.890000e+01\n",
       "max            8.654765e+06"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:43:40)<< DONE (0.0s)\n",
      "(2017-06-03 07:43:40)>> Joining!\n",
      "(2017-06-03 07:44:15)<< DONE (35.0s)\n",
      "(2017-06-03 07:44:15) DONE (40.0s)\n",
      "(2017-06-03 07:44:15) output urine\n",
      "(2017-06-03 07:44:15)>> Open Extract\n",
      "(2017-06-03 07:44:16)<< DONE (1.0s)\n",
      "(2017-06-03 07:44:16)>> Run Pipeline\n",
      "(2017-06-03 07:47:57)<< DONE (221.0s)\n",
      "(2017-06-03 07:47:57)>> Analyze...\n",
      "((3644639, 1), (3624029, 1), 417242L, 92, '0.1758% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>output urine</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>3.216363e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.265574e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.577291e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-1.500000e+03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>4.200000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>8.000000e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.600000e+02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.555555e+06</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label          output urine\n",
       "status                known\n",
       "variable_type            qn\n",
       "units                    mL\n",
       "count          3.216363e+06\n",
       "mean           1.265574e+02\n",
       "std            2.577291e+03\n",
       "min           -1.500000e+03\n",
       "25%            4.200000e+01\n",
       "50%            8.000000e+01\n",
       "75%            1.600000e+02\n",
       "max            4.555555e+06"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:47:58)<< DONE (1.0s)\n",
      "(2017-06-03 07:47:58)>> Joining!\n",
      "(2017-06-03 07:48:43)<< DONE (45.0s)\n",
      "(2017-06-03 07:48:43) DONE (268.0s)\n",
      "(2017-06-03 07:48:43) glasgow coma scale motor\n",
      "(2017-06-03 07:48:43)>> Open Extract\n",
      "(2017-06-03 07:48:43)<< DONE (0.0s)\n",
      "(2017-06-03 07:48:43)>> Run Pipeline\n",
      "(2017-06-03 07:49:29)<< DONE (46.0s)\n",
      "(2017-06-03 07:49:29)>> Analyze...\n",
      "((952565, 1), (949198, 1), 3367L, 1, '0.0037% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>glasgow coma scale motor</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>949198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>Obeys commands</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>642879</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         glasgow coma scale motor\n",
       "status                           known\n",
       "variable_type                      ord\n",
       "units                         no_units\n",
       "count                           949198\n",
       "unique                               6\n",
       "top                     Obeys commands\n",
       "freq                            642879"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:49:29)<< DONE (0.0s)\n",
      "(2017-06-03 07:49:29)>> Joining!\n",
      "(2017-06-03 07:50:06)<< DONE (37.0s)\n",
      "(2017-06-03 07:50:06) DONE (83.0s)\n",
      "(2017-06-03 07:50:06) glasgow coma scale eye opening\n",
      "(2017-06-03 07:50:06)>> Open Extract\n",
      "(2017-06-03 07:50:07)<< DONE (1.0s)\n",
      "(2017-06-03 07:50:07)>> Run Pipeline\n",
      "(2017-06-03 07:50:50)<< DONE (43.0s)\n",
      "(2017-06-03 07:50:50)>> Analyze...\n",
      "((956672, 1), (953595, 1), 3077L, 1, '0.0037% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>glasgow coma scale eye opening</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>953595</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>Eyes open spontaneously</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>588057</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         glasgow coma scale eye opening\n",
       "status                                 known\n",
       "variable_type                            ord\n",
       "units                               no_units\n",
       "count                                 953595\n",
       "unique                                     4\n",
       "top                  Eyes open spontaneously\n",
       "freq                                  588057"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:50:50)<< DONE (0.0s)\n",
      "(2017-06-03 07:50:50)>> Joining!\n",
      "(2017-06-03 07:51:28)<< DONE (38.0s)\n",
      "(2017-06-03 07:51:28) DONE (82.0s)\n",
      "(2017-06-03 07:51:28) glasgow coma scale verbal\n",
      "(2017-06-03 07:51:28)>> Open Extract\n",
      "(2017-06-03 07:51:29)<< DONE (1.0s)\n",
      "(2017-06-03 07:51:29)>> Run Pipeline\n",
      "(2017-06-03 07:52:14)<< DONE (45.0s)\n",
      "(2017-06-03 07:52:14)>> Analyze...\n",
      "((954700, 1), (950913, 1), 3787L, 2, '0.0074% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>glasgow coma scale verbal</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>ord</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>no_units</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>950913</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>No verbal response (&gt;2 yrs); no vocal response...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>465193</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                                  glasgow coma scale verbal\n",
       "status                                                     known\n",
       "variable_type                                                ord\n",
       "units                                                   no_units\n",
       "count                                                     950913\n",
       "unique                                                         5\n",
       "top            No verbal response (>2 yrs); no vocal response...\n",
       "freq                                                      465193"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 07:52:14)<< DONE (0.0s)\n",
      "(2017-06-03 07:52:14)>> Joining!\n",
      "(2017-06-03 07:52:51)<< DONE (37.0s)\n",
      "(2017-06-03 07:52:51) DONE (83.0s)\n",
      "(2017-06-03 07:52:51) normal saline\n",
      "(2017-06-03 07:52:51)>> Open Extract\n",
      "(2017-06-03 07:52:51)<< DONE (0.0s)\n",
      "(2017-06-03 07:52:51)>> Run Pipeline\n",
      "(2017-06-03 08:01:38)<< DONE (527.0s)\n",
      "(2017-06-03 08:01:38)>> Analyze...\n",
      "((817373, 1), (504874, 2), 265615L, 8, '0.0405% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">normal saline</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>49173.000000</td>\n",
       "      <td>456484.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>389.563178</td>\n",
       "      <td>75.626948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>380.287765</td>\n",
       "      <td>217.512467</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>-906.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>250.000000</td>\n",
       "      <td>15.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>500.000000</td>\n",
       "      <td>62.204886</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>11000.000000</td>\n",
       "      <td>51947.999400</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         normal saline               \n",
       "status                known               \n",
       "variable_type            qn               \n",
       "units                    mL          mL/hr\n",
       "count          49173.000000  456484.000000\n",
       "mean             389.563178      75.626948\n",
       "std              380.287765     217.512467\n",
       "min                0.000000    -906.000000\n",
       "25%              100.000000       6.000000\n",
       "50%              250.000000      15.000000\n",
       "75%              500.000000      62.204886\n",
       "max            11000.000000   51947.999400"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:01:38)<< DONE (0.0s)\n",
      "(2017-06-03 08:01:38)>> Joining!\n",
      "(2017-06-03 08:02:15)<< DONE (37.0s)\n",
      "(2017-06-03 08:02:15) DONE (564.0s)\n",
      "(2017-06-03 08:02:15) lactated ringers\n",
      "(2017-06-03 08:02:15)>> Open Extract\n",
      "(2017-06-03 08:02:15)<< DONE (0.0s)\n",
      "(2017-06-03 08:02:15)>> Run Pipeline\n",
      "(2017-06-03 08:02:57)<< DONE (42.0s)\n",
      "(2017-06-03 08:02:57)>> Analyze...\n",
      "((504306, 1), (254175, 2), 19193L, 18, '0.1085% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">lactated ringers</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>248510.000000</td>\n",
       "      <td>2161.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>204.109607</td>\n",
       "      <td>289.877760</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>338.277593</td>\n",
       "      <td>418.153805</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>15.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>100.000000</td>\n",
       "      <td>99.994818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>200.000000</td>\n",
       "      <td>499.999980</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>60000.000000</td>\n",
       "      <td>3923.333176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         lactated ringers             \n",
       "status                   known             \n",
       "variable_type               qn             \n",
       "units                       mL        mL/hr\n",
       "count            248510.000000  2161.000000\n",
       "mean                204.109607   289.877760\n",
       "std                 338.277593   418.153805\n",
       "min                   0.000000     0.000000\n",
       "25%                  15.000000     0.000000\n",
       "50%                 100.000000    99.994818\n",
       "75%                 200.000000   499.999980\n",
       "max               60000.000000  3923.333176"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:02:57)<< DONE (0.0s)\n",
      "(2017-06-03 08:02:57)>> Joining!\n",
      "(2017-06-03 08:03:34)<< DONE (37.0s)\n",
      "(2017-06-03 08:03:34) DONE (79.0s)\n",
      "(2017-06-03 08:03:34) norepinephrine\n",
      "(2017-06-03 08:03:34)>> Open Extract\n",
      "(2017-06-03 08:03:34)<< DONE (0.0s)\n",
      "(2017-06-03 08:03:34)>> Run Pipeline\n",
      "(2017-06-03 08:05:19)<< DONE (105.0s)\n",
      "(2017-06-03 08:05:19)>> Analyze...\n",
      "((1136938, 1), (389986, 2), 331666L, 17, '0.231% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">norepinephrine</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mcg/kg/min</th>\n",
       "      <th>mcg/min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>286781.000000</td>\n",
       "      <td>14226.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.141207</td>\n",
       "      <td>9.687762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.343212</td>\n",
       "      <td>14.889317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.035000</td>\n",
       "      <td>2.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>0.080000</td>\n",
       "      <td>6.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>0.180000</td>\n",
       "      <td>12.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>50.000000</td>\n",
       "      <td>250.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         norepinephrine              \n",
       "status                 known              \n",
       "variable_type             qn              \n",
       "units             mcg/kg/min       mcg/min\n",
       "count          286781.000000  14226.000000\n",
       "mean                0.141207      9.687762\n",
       "std                 0.343212     14.889317\n",
       "min                 0.000000      0.000000\n",
       "25%                 0.035000      2.666667\n",
       "50%                 0.080000      6.000000\n",
       "75%                 0.180000     12.000000\n",
       "max                50.000000    250.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:05:20)<< DONE (1.0s)\n",
      "(2017-06-03 08:05:20)>> Joining!\n",
      "(2017-06-03 08:05:58)<< DONE (38.0s)\n",
      "(2017-06-03 08:05:58) DONE (144.0s)\n",
      "(2017-06-03 08:05:58) vasopressin\n",
      "(2017-06-03 08:05:58)>> Open Extract\n",
      "(2017-06-03 08:05:58)<< DONE (0.0s)\n",
      "(2017-06-03 08:05:58)>> Run Pipeline\n",
      "(2017-06-03 08:06:20)<< DONE (22.0s)\n",
      "(2017-06-03 08:06:20)>> Analyze...\n",
      "((339184, 1), (110333, 2), 16608L, 8, '0.3406% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"2\" halign=\"left\">vasopressin</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>units</th>\n",
       "      <th>units/min</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>57371.000000</td>\n",
       "      <td>98872.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.517366</td>\n",
       "      <td>0.992521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>4.842854</td>\n",
       "      <td>1.291858</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.800000</td>\n",
       "      <td>0.040000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.400000</td>\n",
       "      <td>0.044000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2.400000</td>\n",
       "      <td>2.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>722.400000</td>\n",
       "      <td>50.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label           vasopressin              \n",
       "status                known              \n",
       "variable_type            qn              \n",
       "units                 units     units/min\n",
       "count          57371.000000  98872.000000\n",
       "mean               2.517366      0.992521\n",
       "std                4.842854      1.291858\n",
       "min                0.000000      0.000000\n",
       "25%                1.800000      0.040000\n",
       "50%                2.400000      0.044000\n",
       "75%                2.400000      2.400000\n",
       "max              722.400000     50.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:06:20)<< DONE (0.0s)\n",
      "(2017-06-03 08:06:20)>> Joining!\n",
      "(2017-06-03 08:06:56)<< DONE (36.0s)\n",
      "(2017-06-03 08:06:56) DONE (58.0s)\n",
      "(2017-06-03 08:06:56) hemoglobin\n",
      "(2017-06-03 08:06:56)>> Open Extract\n",
      "(2017-06-03 08:06:56)<< DONE (0.0s)\n",
      "(2017-06-03 08:06:56)>> Run Pipeline\n",
      "(2017-06-03 08:07:52)<< DONE (56.0s)\n",
      "(2017-06-03 08:07:52)>> Analyze...\n",
      "((1167921, 1), (671087, 1), 641393L, 7, '0.0123% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>hemoglobin</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>g/dL</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>526528.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>10.351784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>1.980592</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>9.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>10.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>11.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>130.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label             hemoglobin\n",
       "status                 known\n",
       "variable_type             qn\n",
       "units                   g/dL\n",
       "count          526528.000000\n",
       "mean               10.351784\n",
       "std                 1.980592\n",
       "min                 0.000000\n",
       "25%                 9.000000\n",
       "50%                10.100000\n",
       "75%                11.400000\n",
       "max               130.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:07:52)<< DONE (0.0s)\n",
      "(2017-06-03 08:07:52)>> Joining!\n",
      "(2017-06-03 08:08:36)<< DONE (44.0s)\n",
      "(2017-06-03 08:08:36) DONE (100.0s)\n",
      "(2017-06-03 08:08:36) lactate\n",
      "(2017-06-03 08:08:36)>> Open Extract\n",
      "(2017-06-03 08:08:37)<< DONE (1.0s)\n",
      "(2017-06-03 08:08:37)>> Run Pipeline\n",
      "(2017-06-03 08:08:52)<< DONE (15.0s)\n",
      "(2017-06-03 08:08:52)>> Analyze...\n",
      "((393608, 1), (177439, 1), 216244L, 33, '0.0962% records')\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th>lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>mmol/L</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>177364.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>8.286749</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2374.466306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>999999.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                lactate\n",
       "status                 known\n",
       "variable_type             qn\n",
       "units                 mmol/L\n",
       "count          177364.000000\n",
       "mean                8.286749\n",
       "std              2374.466306\n",
       "min                 0.000000\n",
       "25%                 1.200000\n",
       "50%                 1.800000\n",
       "75%                 2.900000\n",
       "max            999999.000000"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-03 08:08:52)<< DONE (0.0s)\n",
      "(2017-06-03 08:08:52)>> Joining!\n",
      "(2017-06-03 08:09:35)<< DONE (43.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>heart rate</th>\n",
       "      <th>blood pressure systolic</th>\n",
       "      <th>blood pressure diastolic</th>\n",
       "      <th>blood pressure mean</th>\n",
       "      <th>respiratory rate</th>\n",
       "      <th>temperature body</th>\n",
       "      <th>oxygen saturation pulse oximetry</th>\n",
       "      <th>weight body</th>\n",
       "      <th>output urine</th>\n",
       "      <th>glasgow coma scale motor</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">normal saline</th>\n",
       "      <th colspan=\"2\" halign=\"left\">lactated ringers</th>\n",
       "      <th colspan=\"2\" halign=\"left\">norepinephrine</th>\n",
       "      <th colspan=\"2\" halign=\"left\">vasopressin</th>\n",
       "      <th>hemoglobin</th>\n",
       "      <th>lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>ord</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th>beats/min</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>insp/min</th>\n",
       "      <th>degF</th>\n",
       "      <th>percent</th>\n",
       "      <th>kg</th>\n",
       "      <th>mL</th>\n",
       "      <th>no_units</th>\n",
       "      <th>...</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th>mcg/kg/min</th>\n",
       "      <th>mcg/min</th>\n",
       "      <th>units</th>\n",
       "      <th>units/min</th>\n",
       "      <th>g/dL</th>\n",
       "      <th>mmol/L</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 09:22:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:50:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:55:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.996487</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:57:00</th>\n",
       "      <td>122.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:00:00</th>\n",
       "      <td>118.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:01:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>192.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>122.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>499.999980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:49:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>300.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:50:00</th>\n",
       "      <td>118.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      heart rate blood pressure systolic  \\\n",
       "status                          known                   known   \n",
       "variable_type                      qn                      qn   \n",
       "units                       beats/min                    mmHg   \n",
       "id     datetime                                                 \n",
       "100001 2117-09-11 09:22:00        NaN                     NaN   \n",
       "       2117-09-11 09:32:00        NaN                     NaN   \n",
       "       2117-09-11 12:50:00        NaN                     NaN   \n",
       "       2117-09-11 12:55:00        NaN                     NaN   \n",
       "       2117-09-11 12:57:00      122.0                     NaN   \n",
       "       2117-09-11 13:00:00      118.0                     NaN   \n",
       "       2117-09-11 13:01:00        NaN                   192.0   \n",
       "       2117-09-11 13:48:00        NaN                     NaN   \n",
       "       2117-09-11 13:49:00        NaN                     NaN   \n",
       "       2117-09-11 13:50:00      118.0                     NaN   \n",
       "\n",
       "label                      blood pressure diastolic blood pressure mean  \\\n",
       "status                                        known               known   \n",
       "variable_type                                    qn                  qn   \n",
       "units                                          mmHg                mmHg   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00                      NaN                 NaN   \n",
       "       2117-09-11 09:32:00                      NaN                 NaN   \n",
       "       2117-09-11 12:50:00                      NaN                 NaN   \n",
       "       2117-09-11 12:55:00                      NaN                 NaN   \n",
       "       2117-09-11 12:57:00                      NaN                 NaN   \n",
       "       2117-09-11 13:00:00                      NaN                 NaN   \n",
       "       2117-09-11 13:01:00                    100.0               122.0   \n",
       "       2117-09-11 13:48:00                      NaN                 NaN   \n",
       "       2117-09-11 13:49:00                      NaN                 NaN   \n",
       "       2117-09-11 13:50:00                      NaN                 NaN   \n",
       "\n",
       "label                      respiratory rate temperature body  \\\n",
       "status                                known            known   \n",
       "variable_type                            qn               qn   \n",
       "units                              insp/min             degF   \n",
       "id     datetime                                                \n",
       "100001 2117-09-11 09:22:00              NaN              NaN   \n",
       "       2117-09-11 09:32:00              NaN              NaN   \n",
       "       2117-09-11 12:50:00              NaN              NaN   \n",
       "       2117-09-11 12:55:00              NaN              NaN   \n",
       "       2117-09-11 12:57:00             14.0              NaN   \n",
       "       2117-09-11 13:00:00             22.0              NaN   \n",
       "       2117-09-11 13:01:00              NaN              NaN   \n",
       "       2117-09-11 13:48:00              NaN             98.0   \n",
       "       2117-09-11 13:49:00              NaN              NaN   \n",
       "       2117-09-11 13:50:00             22.0              NaN   \n",
       "\n",
       "label                      oxygen saturation pulse oximetry weight body  \\\n",
       "status                                                known       known   \n",
       "variable_type                                            qn          qn   \n",
       "units                                               percent          kg   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00                              NaN         NaN   \n",
       "       2117-09-11 09:32:00                              NaN         NaN   \n",
       "       2117-09-11 12:50:00                              NaN         NaN   \n",
       "       2117-09-11 12:55:00                              NaN         NaN   \n",
       "       2117-09-11 12:57:00                              NaN         NaN   \n",
       "       2117-09-11 13:00:00                              NaN         NaN   \n",
       "       2117-09-11 13:01:00                              NaN         NaN   \n",
       "       2117-09-11 13:48:00                              NaN         NaN   \n",
       "       2117-09-11 13:49:00                              NaN         NaN   \n",
       "       2117-09-11 13:50:00                              NaN         NaN   \n",
       "\n",
       "label                      output urine glasgow coma scale motor   ...    \\\n",
       "status                            known                    known   ...     \n",
       "variable_type                        qn                      ord   ...     \n",
       "units                                mL                 no_units   ...     \n",
       "id     datetime                                                    ...     \n",
       "100001 2117-09-11 09:22:00          NaN                      NaN   ...     \n",
       "       2117-09-11 09:32:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:50:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:55:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:57:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:01:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:48:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:49:00        300.0                      NaN   ...     \n",
       "       2117-09-11 13:50:00          NaN                      NaN   ...     \n",
       "\n",
       "label                      normal saline             lactated ringers        \\\n",
       "status                             known                        known         \n",
       "variable_type                         qn                           qn         \n",
       "units                                 mL       mL/hr               mL mL/hr   \n",
       "id     datetime                                                               \n",
       "100001 2117-09-11 09:22:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 09:32:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 12:50:00           NaN    5.000000              NaN   NaN   \n",
       "       2117-09-11 12:55:00           NaN    6.996487              NaN   NaN   \n",
       "       2117-09-11 12:57:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:01:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:48:00           NaN  499.999980              NaN   NaN   \n",
       "       2117-09-11 13:49:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:50:00           NaN         NaN              NaN   NaN   \n",
       "\n",
       "label                      norepinephrine         vasopressin            \\\n",
       "status                              known               known             \n",
       "variable_type                          qn                  qn             \n",
       "units                          mcg/kg/min mcg/min       units units/min   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 09:32:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:50:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:55:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:57:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:01:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:48:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:49:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:50:00            NaN     NaN         NaN       NaN   \n",
       "\n",
       "label                      hemoglobin lactate  \n",
       "status                          known   known  \n",
       "variable_type                      qn      qn  \n",
       "units                            g/dL  mmol/L  \n",
       "id     datetime                                \n",
       "100001 2117-09-11 09:22:00       13.0     NaN  \n",
       "       2117-09-11 09:32:00        NaN     1.9  \n",
       "       2117-09-11 12:50:00        NaN     NaN  \n",
       "       2117-09-11 12:55:00        NaN     NaN  \n",
       "       2117-09-11 12:57:00        NaN     NaN  \n",
       "       2117-09-11 13:00:00        NaN     NaN  \n",
       "       2117-09-11 13:01:00        NaN     NaN  \n",
       "       2117-09-11 13:48:00        NaN     NaN  \n",
       "       2117-09-11 13:49:00        NaN     NaN  \n",
       "       2117-09-11 13:50:00        NaN     NaN  \n",
       "\n",
       "[10 rows x 22 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "reload(logger)\n",
    "simple_data = data_dict.get_panel_defintions(12) #12 is \"simple data\"\n",
    "labels = simple_data.label.unique().tolist()\n",
    "\n",
    "df_final = None\n",
    "for label in labels:\n",
    "    logger.log(label,new_level=True)\n",
    "    logger.log('Open Extract')\n",
    "    df_extract = utils.open_df(hdf5_fname,'extract/{}'.format(label))\n",
    "\n",
    "    logger.log('Run Pipeline')\n",
    "    all_pipeline.set_params(transform__add_level__level_val=label)\n",
    "    df = all_pipeline.transform(df_extract)\n",
    "    \n",
    "    logger.log('Analyze...')\n",
    "    print utils.data_loss(df_extract.set_index('id').value.to_frame(),df)\n",
    "    display(df.describe())\n",
    "\n",
    "    logger.log('Joining!')\n",
    "\n",
    "    if df_final is None: df_final = df\n",
    "    else: \n",
    "        df_final = df_final.join(df,how='outer')\n",
    "        del df\n",
    "    logger.end_log_level()\n",
    "\n",
    "df_final.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>heart rate</th>\n",
       "      <th>blood pressure systolic</th>\n",
       "      <th>blood pressure diastolic</th>\n",
       "      <th>blood pressure mean</th>\n",
       "      <th>respiratory rate</th>\n",
       "      <th>temperature body</th>\n",
       "      <th>oxygen saturation pulse oximetry</th>\n",
       "      <th>weight body</th>\n",
       "      <th>output urine</th>\n",
       "      <th>glasgow coma scale motor</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">normal saline</th>\n",
       "      <th colspan=\"2\" halign=\"left\">lactated ringers</th>\n",
       "      <th colspan=\"2\" halign=\"left\">norepinephrine</th>\n",
       "      <th colspan=\"2\" halign=\"left\">vasopressin</th>\n",
       "      <th>hemoglobin</th>\n",
       "      <th>lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th colspan=\"2\" halign=\"left\">known</th>\n",
       "      <th>known</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>ord</th>\n",
       "      <th>...</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"2\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th>beats/min</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>mmHg</th>\n",
       "      <th>insp/min</th>\n",
       "      <th>degF</th>\n",
       "      <th>percent</th>\n",
       "      <th>kg</th>\n",
       "      <th>mL</th>\n",
       "      <th>no_units</th>\n",
       "      <th>...</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th>mL</th>\n",
       "      <th>mL/hr</th>\n",
       "      <th>mcg/kg/min</th>\n",
       "      <th>mcg/min</th>\n",
       "      <th>units</th>\n",
       "      <th>units/min</th>\n",
       "      <th>g/dL</th>\n",
       "      <th>mmol/L</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"30\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 09:22:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>13.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:50:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:55:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.996487</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 12:57:00</th>\n",
       "      <td>122.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:00:00</th>\n",
       "      <td>118.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:01:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>192.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>122.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>499.999980</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:49:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>300.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 13:50:00</th>\n",
       "      <td>118.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 14:00:00</th>\n",
       "      <td>118.0</td>\n",
       "      <td>165.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>103.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.990600</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 15:00:00</th>\n",
       "      <td>110.0</td>\n",
       "      <td>119.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 15:48:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>999.999960</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 15:59:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>199.999998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <td>104.0</td>\n",
       "      <td>169.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>105.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>97.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:02:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:11:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:12:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.992476</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 17:00:00</th>\n",
       "      <td>101.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 18:00:00</th>\n",
       "      <td>112.0</td>\n",
       "      <td>170.0</td>\n",
       "      <td>86.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 18:34:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>400.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <td>108.0</td>\n",
       "      <td>179.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:31:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>11.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 20:00:00</th>\n",
       "      <td>116.0</td>\n",
       "      <td>183.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>114.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 21:00:00</th>\n",
       "      <td>117.0</td>\n",
       "      <td>189.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>121.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 21:12:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.396080</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 21:16:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.012718</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <td>124.0</td>\n",
       "      <td>180.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>200.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:10:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:25:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>500.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"30\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-09 09:00:00</th>\n",
       "      <td>79.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>350.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 10:00:00</th>\n",
       "      <td>81.0</td>\n",
       "      <td>123.0</td>\n",
       "      <td>64.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 11:00:00</th>\n",
       "      <td>89.0</td>\n",
       "      <td>115.0</td>\n",
       "      <td>57.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>16.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 12:00:00</th>\n",
       "      <td>72.0</td>\n",
       "      <td>119.0</td>\n",
       "      <td>45.0</td>\n",
       "      <td>65.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>98.3</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 13:00:00</th>\n",
       "      <td>82.0</td>\n",
       "      <td>156.0</td>\n",
       "      <td>58.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 14:00:00</th>\n",
       "      <td>74.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>28.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 15:00:00</th>\n",
       "      <td>69.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 16:00:00</th>\n",
       "      <td>71.0</td>\n",
       "      <td>133.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>400.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 17:00:00</th>\n",
       "      <td>68.0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>55.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 18:00:00</th>\n",
       "      <td>90.0</td>\n",
       "      <td>146.0</td>\n",
       "      <td>53.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 19:00:00</th>\n",
       "      <td>78.0</td>\n",
       "      <td>134.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>72.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 20:00:00</th>\n",
       "      <td>82.0</td>\n",
       "      <td>142.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>23.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>95.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>480.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 21:00:00</th>\n",
       "      <td>81.0</td>\n",
       "      <td>132.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>29.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 22:00:00</th>\n",
       "      <td>72.0</td>\n",
       "      <td>112.0</td>\n",
       "      <td>48.0</td>\n",
       "      <td>62.0</td>\n",
       "      <td>25.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 23:00:00</th>\n",
       "      <td>65.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>42.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 00:00:00</th>\n",
       "      <td>80.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>25.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>94.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 00:01:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>128.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 01:00:00</th>\n",
       "      <td>80.0</td>\n",
       "      <td>124.0</td>\n",
       "      <td>59.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 02:00:00</th>\n",
       "      <td>76.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>49.0</td>\n",
       "      <td>68.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>350.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 02:28:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 03:00:00</th>\n",
       "      <td>72.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>91.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 04:00:00</th>\n",
       "      <td>66.0</td>\n",
       "      <td>126.0</td>\n",
       "      <td>31.0</td>\n",
       "      <td>52.0</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 05:00:00</th>\n",
       "      <td>66.0</td>\n",
       "      <td>129.0</td>\n",
       "      <td>39.0</td>\n",
       "      <td>63.0</td>\n",
       "      <td>27.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>330.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 06:00:00</th>\n",
       "      <td>65.0</td>\n",
       "      <td>143.0</td>\n",
       "      <td>47.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 07:00:00</th>\n",
       "      <td>79.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>92.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 08:00:00</th>\n",
       "      <td>69.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.0</td>\n",
       "      <td>98.1</td>\n",
       "      <td>96.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>200.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 09:00:00</th>\n",
       "      <td>79.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>93.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 09:10:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>128.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>89.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 10:00:00</th>\n",
       "      <td>72.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>26.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>200.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 11:00:00</th>\n",
       "      <td>72.0</td>\n",
       "      <td>144.0</td>\n",
       "      <td>123.0</td>\n",
       "      <td>128.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10071464 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      heart rate blood pressure systolic  \\\n",
       "status                          known                   known   \n",
       "variable_type                      qn                      qn   \n",
       "units                       beats/min                    mmHg   \n",
       "id     datetime                                                 \n",
       "100001 2117-09-11 09:22:00        NaN                     NaN   \n",
       "       2117-09-11 09:32:00        NaN                     NaN   \n",
       "       2117-09-11 12:50:00        NaN                     NaN   \n",
       "       2117-09-11 12:55:00        NaN                     NaN   \n",
       "       2117-09-11 12:57:00      122.0                     NaN   \n",
       "       2117-09-11 13:00:00      118.0                     NaN   \n",
       "       2117-09-11 13:01:00        NaN                   192.0   \n",
       "       2117-09-11 13:48:00        NaN                     NaN   \n",
       "       2117-09-11 13:49:00        NaN                     NaN   \n",
       "       2117-09-11 13:50:00      118.0                     NaN   \n",
       "       2117-09-11 14:00:00      118.0                   165.0   \n",
       "       2117-09-11 15:00:00      110.0                   119.0   \n",
       "       2117-09-11 15:48:00        NaN                     NaN   \n",
       "       2117-09-11 15:59:00        NaN                     NaN   \n",
       "       2117-09-11 16:00:00      104.0                   169.0   \n",
       "       2117-09-11 16:02:00        NaN                     NaN   \n",
       "       2117-09-11 16:11:00        NaN                     NaN   \n",
       "       2117-09-11 16:12:00        NaN                     NaN   \n",
       "       2117-09-11 17:00:00      101.0                   110.0   \n",
       "       2117-09-11 18:00:00      112.0                   170.0   \n",
       "       2117-09-11 18:34:00        NaN                     NaN   \n",
       "       2117-09-11 19:00:00      108.0                   179.0   \n",
       "       2117-09-11 19:31:00        NaN                     NaN   \n",
       "       2117-09-11 20:00:00      116.0                   183.0   \n",
       "       2117-09-11 21:00:00      117.0                   189.0   \n",
       "       2117-09-11 21:12:00        NaN                     NaN   \n",
       "       2117-09-11 21:16:00        NaN                     NaN   \n",
       "       2117-09-11 22:00:00      124.0                   180.0   \n",
       "       2117-09-11 22:10:00        NaN                     NaN   \n",
       "       2117-09-11 22:25:00        NaN                     NaN   \n",
       "...                               ...                     ...   \n",
       "199999 2136-04-09 09:00:00       79.0                   130.0   \n",
       "       2136-04-09 10:00:00       81.0                   123.0   \n",
       "       2136-04-09 11:00:00       89.0                   115.0   \n",
       "       2136-04-09 12:00:00       72.0                   119.0   \n",
       "       2136-04-09 13:00:00       82.0                   156.0   \n",
       "       2136-04-09 14:00:00       74.0                   128.0   \n",
       "       2136-04-09 15:00:00       69.0                   129.0   \n",
       "       2136-04-09 16:00:00       71.0                   133.0   \n",
       "       2136-04-09 17:00:00       68.0                   150.0   \n",
       "       2136-04-09 18:00:00       90.0                   146.0   \n",
       "       2136-04-09 19:00:00       78.0                   134.0   \n",
       "       2136-04-09 20:00:00       82.0                   142.0   \n",
       "       2136-04-09 21:00:00       81.0                   132.0   \n",
       "       2136-04-09 22:00:00       72.0                   112.0   \n",
       "       2136-04-09 23:00:00       65.0                   129.0   \n",
       "       2136-04-10 00:00:00       80.0                     NaN   \n",
       "       2136-04-10 00:01:00        NaN                   128.0   \n",
       "       2136-04-10 01:00:00       80.0                   124.0   \n",
       "       2136-04-10 02:00:00       76.0                   130.0   \n",
       "       2136-04-10 02:28:00        NaN                     NaN   \n",
       "       2136-04-10 03:00:00       72.0                   126.0   \n",
       "       2136-04-10 04:00:00       66.0                   126.0   \n",
       "       2136-04-10 05:00:00       66.0                   129.0   \n",
       "       2136-04-10 06:00:00       65.0                   143.0   \n",
       "       2136-04-10 07:00:00       79.0                     NaN   \n",
       "       2136-04-10 08:00:00       69.0                     NaN   \n",
       "       2136-04-10 09:00:00       79.0                     NaN   \n",
       "       2136-04-10 09:10:00        NaN                   128.0   \n",
       "       2136-04-10 10:00:00       72.0                     NaN   \n",
       "       2136-04-10 11:00:00       72.0                   144.0   \n",
       "\n",
       "label                      blood pressure diastolic blood pressure mean  \\\n",
       "status                                        known               known   \n",
       "variable_type                                    qn                  qn   \n",
       "units                                          mmHg                mmHg   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00                      NaN                 NaN   \n",
       "       2117-09-11 09:32:00                      NaN                 NaN   \n",
       "       2117-09-11 12:50:00                      NaN                 NaN   \n",
       "       2117-09-11 12:55:00                      NaN                 NaN   \n",
       "       2117-09-11 12:57:00                      NaN                 NaN   \n",
       "       2117-09-11 13:00:00                      NaN                 NaN   \n",
       "       2117-09-11 13:01:00                    100.0               122.0   \n",
       "       2117-09-11 13:48:00                      NaN                 NaN   \n",
       "       2117-09-11 13:49:00                      NaN                 NaN   \n",
       "       2117-09-11 13:50:00                      NaN                 NaN   \n",
       "       2117-09-11 14:00:00                     85.0               103.0   \n",
       "       2117-09-11 15:00:00                     64.0                76.0   \n",
       "       2117-09-11 15:48:00                      NaN                 NaN   \n",
       "       2117-09-11 15:59:00                      NaN                 NaN   \n",
       "       2117-09-11 16:00:00                     84.0               105.0   \n",
       "       2117-09-11 16:02:00                      NaN                 NaN   \n",
       "       2117-09-11 16:11:00                      NaN                 NaN   \n",
       "       2117-09-11 16:12:00                      NaN                 NaN   \n",
       "       2117-09-11 17:00:00                     62.0                75.0   \n",
       "       2117-09-11 18:00:00                     86.0               106.0   \n",
       "       2117-09-11 18:34:00                      NaN                 NaN   \n",
       "       2117-09-11 19:00:00                     88.0               110.0   \n",
       "       2117-09-11 19:31:00                      NaN                 NaN   \n",
       "       2117-09-11 20:00:00                     91.0               114.0   \n",
       "       2117-09-11 21:00:00                     98.0               121.0   \n",
       "       2117-09-11 21:12:00                      NaN                 NaN   \n",
       "       2117-09-11 21:16:00                      NaN                 NaN   \n",
       "       2117-09-11 22:00:00                     88.0               110.0   \n",
       "       2117-09-11 22:10:00                      NaN                 NaN   \n",
       "       2117-09-11 22:25:00                      NaN                 NaN   \n",
       "...                                             ...                 ...   \n",
       "199999 2136-04-09 09:00:00                     48.0                69.0   \n",
       "       2136-04-09 10:00:00                     64.0                80.0   \n",
       "       2136-04-09 11:00:00                     57.0                70.0   \n",
       "       2136-04-09 12:00:00                     45.0                65.0   \n",
       "       2136-04-09 13:00:00                     58.0                78.0   \n",
       "       2136-04-09 14:00:00                     53.0                72.0   \n",
       "       2136-04-09 15:00:00                     49.0                69.0   \n",
       "       2136-04-09 16:00:00                     46.0                70.0   \n",
       "       2136-04-09 17:00:00                     55.0                79.0   \n",
       "       2136-04-09 18:00:00                     53.0                79.0   \n",
       "       2136-04-09 19:00:00                     50.0                72.0   \n",
       "       2136-04-09 20:00:00                     52.0                75.0   \n",
       "       2136-04-09 21:00:00                     50.0                70.0   \n",
       "       2136-04-09 22:00:00                     48.0                62.0   \n",
       "       2136-04-09 23:00:00                     42.0                66.0   \n",
       "       2136-04-10 00:00:00                      NaN                 NaN   \n",
       "       2136-04-10 00:01:00                     59.0                75.0   \n",
       "       2136-04-10 01:00:00                     59.0                75.0   \n",
       "       2136-04-10 02:00:00                     49.0                68.0   \n",
       "       2136-04-10 02:28:00                      NaN                 NaN   \n",
       "       2136-04-10 03:00:00                     37.0                61.0   \n",
       "       2136-04-10 04:00:00                     31.0                52.0   \n",
       "       2136-04-10 05:00:00                     39.0                63.0   \n",
       "       2136-04-10 06:00:00                     47.0                71.0   \n",
       "       2136-04-10 07:00:00                      NaN                 NaN   \n",
       "       2136-04-10 08:00:00                      NaN                 NaN   \n",
       "       2136-04-10 09:00:00                      NaN                 NaN   \n",
       "       2136-04-10 09:10:00                     81.0                89.0   \n",
       "       2136-04-10 10:00:00                      NaN                 NaN   \n",
       "       2136-04-10 11:00:00                    123.0               128.0   \n",
       "\n",
       "label                      respiratory rate temperature body  \\\n",
       "status                                known            known   \n",
       "variable_type                            qn               qn   \n",
       "units                              insp/min             degF   \n",
       "id     datetime                                                \n",
       "100001 2117-09-11 09:22:00              NaN              NaN   \n",
       "       2117-09-11 09:32:00              NaN              NaN   \n",
       "       2117-09-11 12:50:00              NaN              NaN   \n",
       "       2117-09-11 12:55:00              NaN              NaN   \n",
       "       2117-09-11 12:57:00             14.0              NaN   \n",
       "       2117-09-11 13:00:00             22.0              NaN   \n",
       "       2117-09-11 13:01:00              NaN              NaN   \n",
       "       2117-09-11 13:48:00              NaN             98.0   \n",
       "       2117-09-11 13:49:00              NaN              NaN   \n",
       "       2117-09-11 13:50:00             22.0              NaN   \n",
       "       2117-09-11 14:00:00             15.0              NaN   \n",
       "       2117-09-11 15:00:00             15.0              NaN   \n",
       "       2117-09-11 15:48:00              NaN              NaN   \n",
       "       2117-09-11 15:59:00              NaN              NaN   \n",
       "       2117-09-11 16:00:00             16.0             97.9   \n",
       "       2117-09-11 16:02:00              NaN              NaN   \n",
       "       2117-09-11 16:11:00              NaN              NaN   \n",
       "       2117-09-11 16:12:00              NaN              NaN   \n",
       "       2117-09-11 17:00:00             15.0              NaN   \n",
       "       2117-09-11 18:00:00             16.0              NaN   \n",
       "       2117-09-11 18:34:00              NaN              NaN   \n",
       "       2117-09-11 19:00:00              NaN             99.5   \n",
       "       2117-09-11 19:31:00              NaN              NaN   \n",
       "       2117-09-11 20:00:00              NaN              NaN   \n",
       "       2117-09-11 21:00:00              NaN              NaN   \n",
       "       2117-09-11 21:12:00              NaN              NaN   \n",
       "       2117-09-11 21:16:00              NaN              NaN   \n",
       "       2117-09-11 22:00:00              NaN            100.0   \n",
       "       2117-09-11 22:10:00              NaN              NaN   \n",
       "       2117-09-11 22:25:00              NaN              NaN   \n",
       "...                                     ...              ...   \n",
       "199999 2136-04-09 09:00:00             25.0              NaN   \n",
       "       2136-04-09 10:00:00             24.0              NaN   \n",
       "       2136-04-09 11:00:00             16.0              NaN   \n",
       "       2136-04-09 12:00:00             24.0             98.3   \n",
       "       2136-04-09 13:00:00             22.0              NaN   \n",
       "       2136-04-09 14:00:00             28.0              NaN   \n",
       "       2136-04-09 15:00:00             26.0              NaN   \n",
       "       2136-04-09 16:00:00             27.0             98.0   \n",
       "       2136-04-09 17:00:00             24.0              NaN   \n",
       "       2136-04-09 18:00:00             24.0              NaN   \n",
       "       2136-04-09 19:00:00             26.0              NaN   \n",
       "       2136-04-09 20:00:00             23.0             97.4   \n",
       "       2136-04-09 21:00:00             29.0              NaN   \n",
       "       2136-04-09 22:00:00             25.0              NaN   \n",
       "       2136-04-09 23:00:00             24.0              NaN   \n",
       "       2136-04-10 00:00:00             25.0             97.4   \n",
       "       2136-04-10 00:01:00              NaN              NaN   \n",
       "       2136-04-10 01:00:00             26.0              NaN   \n",
       "       2136-04-10 02:00:00             26.0              NaN   \n",
       "       2136-04-10 02:28:00              NaN              NaN   \n",
       "       2136-04-10 03:00:00             27.0             97.4   \n",
       "       2136-04-10 04:00:00             26.0              NaN   \n",
       "       2136-04-10 05:00:00             27.0              NaN   \n",
       "       2136-04-10 06:00:00             19.0              NaN   \n",
       "       2136-04-10 07:00:00             28.0              NaN   \n",
       "       2136-04-10 08:00:00             24.0             98.1   \n",
       "       2136-04-10 09:00:00             24.0              NaN   \n",
       "       2136-04-10 09:10:00              NaN              NaN   \n",
       "       2136-04-10 10:00:00             26.0              NaN   \n",
       "       2136-04-10 11:00:00             24.0              NaN   \n",
       "\n",
       "label                      oxygen saturation pulse oximetry weight body  \\\n",
       "status                                                known       known   \n",
       "variable_type                                            qn          qn   \n",
       "units                                               percent          kg   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00                              NaN         NaN   \n",
       "       2117-09-11 09:32:00                              NaN         NaN   \n",
       "       2117-09-11 12:50:00                              NaN         NaN   \n",
       "       2117-09-11 12:55:00                              NaN         NaN   \n",
       "       2117-09-11 12:57:00                              NaN         NaN   \n",
       "       2117-09-11 13:00:00                              NaN         NaN   \n",
       "       2117-09-11 13:01:00                              NaN         NaN   \n",
       "       2117-09-11 13:48:00                              NaN         NaN   \n",
       "       2117-09-11 13:49:00                              NaN         NaN   \n",
       "       2117-09-11 13:50:00                              NaN         NaN   \n",
       "       2117-09-11 14:00:00                              NaN         NaN   \n",
       "       2117-09-11 15:00:00                              NaN         NaN   \n",
       "       2117-09-11 15:48:00                              NaN         NaN   \n",
       "       2117-09-11 15:59:00                              NaN         NaN   \n",
       "       2117-09-11 16:00:00                              NaN         NaN   \n",
       "       2117-09-11 16:02:00                            100.0         NaN   \n",
       "       2117-09-11 16:11:00                              NaN         NaN   \n",
       "       2117-09-11 16:12:00                              NaN         NaN   \n",
       "       2117-09-11 17:00:00                             97.0         NaN   \n",
       "       2117-09-11 18:00:00                             98.0         NaN   \n",
       "       2117-09-11 18:34:00                              NaN         NaN   \n",
       "       2117-09-11 19:00:00                              NaN         NaN   \n",
       "       2117-09-11 19:31:00                              NaN         NaN   \n",
       "       2117-09-11 20:00:00                             97.0         NaN   \n",
       "       2117-09-11 21:00:00                              NaN         NaN   \n",
       "       2117-09-11 21:12:00                              NaN         NaN   \n",
       "       2117-09-11 21:16:00                              NaN         NaN   \n",
       "       2117-09-11 22:00:00                              NaN         NaN   \n",
       "       2117-09-11 22:10:00                             97.0         NaN   \n",
       "       2117-09-11 22:25:00                              NaN         NaN   \n",
       "...                                                     ...         ...   \n",
       "199999 2136-04-09 09:00:00                             95.0         NaN   \n",
       "       2136-04-09 10:00:00                             94.0         NaN   \n",
       "       2136-04-09 11:00:00                             94.0         NaN   \n",
       "       2136-04-09 12:00:00                             93.0         NaN   \n",
       "       2136-04-09 13:00:00                             94.0         NaN   \n",
       "       2136-04-09 14:00:00                             93.0         NaN   \n",
       "       2136-04-09 15:00:00                             93.0         NaN   \n",
       "       2136-04-09 16:00:00                             94.0         NaN   \n",
       "       2136-04-09 17:00:00                             94.0         NaN   \n",
       "       2136-04-09 18:00:00                             93.0         NaN   \n",
       "       2136-04-09 19:00:00                             93.0         NaN   \n",
       "       2136-04-09 20:00:00                             95.0         NaN   \n",
       "       2136-04-09 21:00:00                             93.0         NaN   \n",
       "       2136-04-09 22:00:00                             97.0         NaN   \n",
       "       2136-04-09 23:00:00                             96.0         NaN   \n",
       "       2136-04-10 00:00:00                             94.0         NaN   \n",
       "       2136-04-10 00:01:00                              NaN         NaN   \n",
       "       2136-04-10 01:00:00                             95.0         NaN   \n",
       "       2136-04-10 02:00:00                             93.0         NaN   \n",
       "       2136-04-10 02:28:00                              NaN         NaN   \n",
       "       2136-04-10 03:00:00                             91.0         NaN   \n",
       "       2136-04-10 04:00:00                             95.0         NaN   \n",
       "       2136-04-10 05:00:00                             96.0         NaN   \n",
       "       2136-04-10 06:00:00                             92.0         NaN   \n",
       "       2136-04-10 07:00:00                             92.0         NaN   \n",
       "       2136-04-10 08:00:00                             96.0         NaN   \n",
       "       2136-04-10 09:00:00                             93.0         NaN   \n",
       "       2136-04-10 09:10:00                              NaN         NaN   \n",
       "       2136-04-10 10:00:00                             97.0         NaN   \n",
       "       2136-04-10 11:00:00                             96.0         NaN   \n",
       "\n",
       "label                      output urine glasgow coma scale motor   ...    \\\n",
       "status                            known                    known   ...     \n",
       "variable_type                        qn                      ord   ...     \n",
       "units                                mL                 no_units   ...     \n",
       "id     datetime                                                    ...     \n",
       "100001 2117-09-11 09:22:00          NaN                      NaN   ...     \n",
       "       2117-09-11 09:32:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:50:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:55:00          NaN                      NaN   ...     \n",
       "       2117-09-11 12:57:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:01:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:48:00          NaN                      NaN   ...     \n",
       "       2117-09-11 13:49:00        300.0                      NaN   ...     \n",
       "       2117-09-11 13:50:00          NaN                      NaN   ...     \n",
       "       2117-09-11 14:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 15:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 15:48:00          NaN                      NaN   ...     \n",
       "       2117-09-11 15:59:00          NaN                      NaN   ...     \n",
       "       2117-09-11 16:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 16:02:00          NaN                      NaN   ...     \n",
       "       2117-09-11 16:11:00          NaN                      NaN   ...     \n",
       "       2117-09-11 16:12:00          NaN                      NaN   ...     \n",
       "       2117-09-11 17:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 18:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 18:34:00        400.0                      NaN   ...     \n",
       "       2117-09-11 19:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 19:31:00          NaN                      NaN   ...     \n",
       "       2117-09-11 20:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 21:00:00          NaN                      NaN   ...     \n",
       "       2117-09-11 21:12:00          NaN                      NaN   ...     \n",
       "       2117-09-11 21:16:00          NaN                      NaN   ...     \n",
       "       2117-09-11 22:00:00        200.0                      NaN   ...     \n",
       "       2117-09-11 22:10:00          NaN                      NaN   ...     \n",
       "       2117-09-11 22:25:00          NaN                      NaN   ...     \n",
       "...                                 ...                      ...   ...     \n",
       "199999 2136-04-09 09:00:00        350.0                      NaN   ...     \n",
       "       2136-04-09 10:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 11:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 12:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 13:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 14:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 15:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 16:00:00        400.0                      NaN   ...     \n",
       "       2136-04-09 17:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 18:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 19:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 20:00:00        480.0                      NaN   ...     \n",
       "       2136-04-09 21:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 22:00:00          NaN                      NaN   ...     \n",
       "       2136-04-09 23:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 00:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 00:01:00          NaN                      NaN   ...     \n",
       "       2136-04-10 01:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 02:00:00        350.0                      NaN   ...     \n",
       "       2136-04-10 02:28:00          NaN                      NaN   ...     \n",
       "       2136-04-10 03:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 04:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 05:00:00        330.0                      NaN   ...     \n",
       "       2136-04-10 06:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 07:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 08:00:00        200.0                      NaN   ...     \n",
       "       2136-04-10 09:00:00          NaN                      NaN   ...     \n",
       "       2136-04-10 09:10:00          NaN                      NaN   ...     \n",
       "       2136-04-10 10:00:00        200.0                      NaN   ...     \n",
       "       2136-04-10 11:00:00          NaN                      NaN   ...     \n",
       "\n",
       "label                      normal saline             lactated ringers        \\\n",
       "status                             known                        known         \n",
       "variable_type                         qn                           qn         \n",
       "units                                 mL       mL/hr               mL mL/hr   \n",
       "id     datetime                                                               \n",
       "100001 2117-09-11 09:22:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 09:32:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 12:50:00           NaN    5.000000              NaN   NaN   \n",
       "       2117-09-11 12:55:00           NaN    6.996487              NaN   NaN   \n",
       "       2117-09-11 12:57:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:01:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:48:00           NaN  499.999980              NaN   NaN   \n",
       "       2117-09-11 13:49:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 13:50:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 14:00:00           NaN    8.990600              NaN   NaN   \n",
       "       2117-09-11 15:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 15:48:00           NaN  999.999960              NaN   NaN   \n",
       "       2117-09-11 15:59:00           NaN  199.999998              NaN   NaN   \n",
       "       2117-09-11 16:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 16:02:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 16:11:00           NaN   40.000000              NaN   NaN   \n",
       "       2117-09-11 16:12:00           NaN    4.992476              NaN   NaN   \n",
       "       2117-09-11 17:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 18:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 18:34:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 19:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 19:31:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 20:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 21:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 21:12:00           NaN    3.396080              NaN   NaN   \n",
       "       2117-09-11 21:16:00           NaN    5.012718              NaN   NaN   \n",
       "       2117-09-11 22:00:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 22:10:00           NaN         NaN              NaN   NaN   \n",
       "       2117-09-11 22:25:00         500.0         NaN              NaN   NaN   \n",
       "...                                  ...         ...              ...   ...   \n",
       "199999 2136-04-09 09:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 10:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 11:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 12:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 13:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 14:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 15:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 16:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 17:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 18:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 19:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 20:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 21:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 22:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-09 23:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 00:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 00:01:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 01:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 02:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 02:28:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 03:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 04:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 05:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 06:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 07:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 08:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 09:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 09:10:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 10:00:00           NaN         NaN              NaN   NaN   \n",
       "       2136-04-10 11:00:00           NaN         NaN              NaN   NaN   \n",
       "\n",
       "label                      norepinephrine         vasopressin            \\\n",
       "status                              known               known             \n",
       "variable_type                          qn                  qn             \n",
       "units                          mcg/kg/min mcg/min       units units/min   \n",
       "id     datetime                                                           \n",
       "100001 2117-09-11 09:22:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 09:32:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:50:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:55:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 12:57:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:01:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:48:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:49:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 13:50:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 14:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 15:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 15:48:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 15:59:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 16:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 16:02:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 16:11:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 16:12:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 17:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 18:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 18:34:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 19:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 19:31:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 20:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 21:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 21:12:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 21:16:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 22:00:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 22:10:00            NaN     NaN         NaN       NaN   \n",
       "       2117-09-11 22:25:00            NaN     NaN         NaN       NaN   \n",
       "...                                   ...     ...         ...       ...   \n",
       "199999 2136-04-09 09:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 10:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 11:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 12:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 13:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 14:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 15:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 16:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 17:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 18:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 19:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 20:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 21:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 22:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-09 23:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 00:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 00:01:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 01:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 02:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 02:28:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 03:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 04:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 05:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 06:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 07:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 08:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 09:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 09:10:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 10:00:00            NaN     NaN         NaN       NaN   \n",
       "       2136-04-10 11:00:00            NaN     NaN         NaN       NaN   \n",
       "\n",
       "label                      hemoglobin lactate  \n",
       "status                          known   known  \n",
       "variable_type                      qn      qn  \n",
       "units                            g/dL  mmol/L  \n",
       "id     datetime                                \n",
       "100001 2117-09-11 09:22:00       13.0     NaN  \n",
       "       2117-09-11 09:32:00        NaN     1.9  \n",
       "       2117-09-11 12:50:00        NaN     NaN  \n",
       "       2117-09-11 12:55:00        NaN     NaN  \n",
       "       2117-09-11 12:57:00        NaN     NaN  \n",
       "       2117-09-11 13:00:00        NaN     NaN  \n",
       "       2117-09-11 13:01:00        NaN     NaN  \n",
       "       2117-09-11 13:48:00        NaN     NaN  \n",
       "       2117-09-11 13:49:00        NaN     NaN  \n",
       "       2117-09-11 13:50:00        NaN     NaN  \n",
       "       2117-09-11 14:00:00        NaN     NaN  \n",
       "       2117-09-11 15:00:00        NaN     NaN  \n",
       "       2117-09-11 15:48:00        NaN     NaN  \n",
       "       2117-09-11 15:59:00        NaN     NaN  \n",
       "       2117-09-11 16:00:00        NaN     NaN  \n",
       "       2117-09-11 16:02:00        NaN     NaN  \n",
       "       2117-09-11 16:11:00        NaN     NaN  \n",
       "       2117-09-11 16:12:00        NaN     NaN  \n",
       "       2117-09-11 17:00:00        NaN     NaN  \n",
       "       2117-09-11 18:00:00        NaN     NaN  \n",
       "       2117-09-11 18:34:00        NaN     NaN  \n",
       "       2117-09-11 19:00:00        NaN     NaN  \n",
       "       2117-09-11 19:31:00       11.0     NaN  \n",
       "       2117-09-11 20:00:00        NaN     NaN  \n",
       "       2117-09-11 21:00:00        NaN     NaN  \n",
       "       2117-09-11 21:12:00        NaN     NaN  \n",
       "       2117-09-11 21:16:00        NaN     NaN  \n",
       "       2117-09-11 22:00:00        NaN     NaN  \n",
       "       2117-09-11 22:10:00        NaN     NaN  \n",
       "       2117-09-11 22:25:00        NaN     NaN  \n",
       "...                               ...     ...  \n",
       "199999 2136-04-09 09:00:00        NaN     NaN  \n",
       "       2136-04-09 10:00:00        NaN     NaN  \n",
       "       2136-04-09 11:00:00        NaN     NaN  \n",
       "       2136-04-09 12:00:00        NaN     NaN  \n",
       "       2136-04-09 13:00:00        NaN     NaN  \n",
       "       2136-04-09 14:00:00        NaN     NaN  \n",
       "       2136-04-09 15:00:00        NaN     NaN  \n",
       "       2136-04-09 16:00:00        NaN     NaN  \n",
       "       2136-04-09 17:00:00        NaN     NaN  \n",
       "       2136-04-09 18:00:00        NaN     NaN  \n",
       "       2136-04-09 19:00:00        NaN     NaN  \n",
       "       2136-04-09 20:00:00        NaN     NaN  \n",
       "       2136-04-09 21:00:00        NaN     NaN  \n",
       "       2136-04-09 22:00:00        NaN     NaN  \n",
       "       2136-04-09 23:00:00        NaN     NaN  \n",
       "       2136-04-10 00:00:00        NaN     NaN  \n",
       "       2136-04-10 00:01:00        NaN     NaN  \n",
       "       2136-04-10 01:00:00        NaN     NaN  \n",
       "       2136-04-10 02:00:00        NaN     NaN  \n",
       "       2136-04-10 02:28:00       12.6     NaN  \n",
       "       2136-04-10 03:00:00        NaN     NaN  \n",
       "       2136-04-10 04:00:00        NaN     NaN  \n",
       "       2136-04-10 05:00:00        NaN     NaN  \n",
       "       2136-04-10 06:00:00        NaN     NaN  \n",
       "       2136-04-10 07:00:00        NaN     NaN  \n",
       "       2136-04-10 08:00:00        NaN     NaN  \n",
       "       2136-04-10 09:00:00        NaN     NaN  \n",
       "       2136-04-10 09:10:00        NaN     NaN  \n",
       "       2136-04-10 10:00:00        NaN     NaN  \n",
       "       2136-04-10 11:00:00        NaN     NaN  \n",
       "\n",
       "[10071464 rows x 22 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "utils.save_df(df_final,hdf5_fname,'cleaned/test1')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Prepare for ML\n",
    "\n",
    "\n",
    "1. All categories to numeric representations\n",
    "2. Segment, add to index\n",
    "3. Transform into features (FeatureUnion)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "intro_pipeline = Pipeline([\n",
    "        ('transform',mimic_transform),\n",
    "        ('format',standard_pipeline),\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"8\" halign=\"left\">lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "      <th colspan=\"4\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th colspan=\"3\" halign=\"left\">nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">mmol/L</th>\n",
       "      <th>no_units</th>\n",
       "      <th colspan=\"3\" halign=\"left\">no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>50813</th>\n",
       "      <th>225668</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>225668</th>\n",
       "      <th>50813(mmol/L)</th>\n",
       "      <th>818(mmol/L)</th>\n",
       "      <th>1531(mmol/L)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100003</th>\n",
       "      <th>2150-04-17 19:12:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-08 10:58:00</th>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-03-31 00:44:00</th>\n",
       "      <td>3.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-02 14:10:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      lactate                                          \\\n",
       "status                       known                   unknown                 \n",
       "variable_type                   qn                        qn           nom   \n",
       "units                       mmol/L                  no_units      no_units   \n",
       "description                  50813 225668  818 1531   225668 50813(mmol/L)   \n",
       "id     datetime                                                              \n",
       "100001 2117-09-11 09:32:00     1.9    NaN  NaN  NaN      NaN           NaN   \n",
       "100003 2150-04-17 19:12:00     1.1    1.1  NaN  NaN      NaN           NaN   \n",
       "100006 2108-04-08 10:58:00     4.5    NaN  4.5  NaN      NaN           NaN   \n",
       "100007 2145-03-31 00:44:00     3.1    NaN  NaN  NaN      NaN           NaN   \n",
       "       2145-04-02 14:10:00     1.9    NaN  NaN  NaN      NaN           NaN   \n",
       "\n",
       "label                                                \n",
       "status                                               \n",
       "variable_type                                        \n",
       "units                                                \n",
       "description                818(mmol/L) 1531(mmol/L)  \n",
       "id     datetime                                      \n",
       "100001 2117-09-11 09:32:00         NaN          NaN  \n",
       "100003 2150-04-17 19:12:00         NaN          NaN  \n",
       "100006 2108-04-08 10:58:00         NaN          NaN  \n",
       "100007 2145-03-31 00:44:00         NaN          NaN  \n",
       "       2145-04-02 14:10:00         NaN          NaN  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "label = 'lactate'\n",
    "df_extract = utils.open_df(hdf5_fname,'extract/{}'.format(label))\n",
    "df_cleaned = intro_pipeline.transform(df_extract)\n",
    "df_cleaned.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"8\" halign=\"left\">lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "      <th colspan=\"4\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th colspan=\"3\" halign=\"left\">nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">mmol/L</th>\n",
       "      <th>no_units</th>\n",
       "      <th colspan=\"3\" halign=\"left\">no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>50813</th>\n",
       "      <th>225668</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>225668</th>\n",
       "      <th>50813(mmol/L)</th>\n",
       "      <th>818(mmol/L)</th>\n",
       "      <th>1531(mmol/L)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>176767.000000</td>\n",
       "      <td>69402.000000</td>\n",
       "      <td>72916.000000</td>\n",
       "      <td>63134.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>57</td>\n",
       "      <td>57</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34</td>\n",
       "      <td>14</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>ERROR</td>\n",
       "      <td>no data</td>\n",
       "      <td>no data</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.635120</td>\n",
       "      <td>16.943607</td>\n",
       "      <td>2.849300</td>\n",
       "      <td>2.780338</td>\n",
       "      <td>52.835714</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.590268</td>\n",
       "      <td>3795.881233</td>\n",
       "      <td>3.152031</td>\n",
       "      <td>3.045527</td>\n",
       "      <td>82.567153</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.050000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.200000</td>\n",
       "      <td>1.200000</td>\n",
       "      <td>1.200000</td>\n",
       "      <td>1.200000</td>\n",
       "      <td>1.175000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.800000</td>\n",
       "      <td>1.800000</td>\n",
       "      <td>1.800000</td>\n",
       "      <td>1.800000</td>\n",
       "      <td>3.400000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>2.900000</td>\n",
       "      <td>2.900000</td>\n",
       "      <td>3.100000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>115.400000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>36.000000</td>\n",
       "      <td>999999.000000</td>\n",
       "      <td>153.000000</td>\n",
       "      <td>153.000000</td>\n",
       "      <td>203.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                lactate                                             \\\n",
       "status                 known                                              \n",
       "variable_type             qn                                              \n",
       "units                 mmol/L                                              \n",
       "description            50813         225668           818          1531   \n",
       "count          176767.000000   69402.000000  72916.000000  63134.000000   \n",
       "unique                   NaN            NaN           NaN           NaN   \n",
       "top                      NaN            NaN           NaN           NaN   \n",
       "freq                     NaN            NaN           NaN           NaN   \n",
       "mean                2.635120      16.943607      2.849300      2.780338   \n",
       "std                 2.590268    3795.881233      3.152031      3.045527   \n",
       "min                 0.000000       0.050000      0.000000      0.000000   \n",
       "25%                 1.200000       1.200000      1.200000      1.200000   \n",
       "50%                 1.800000       1.800000      1.800000      1.800000   \n",
       "75%                 2.900000       2.900000      3.100000      3.000000   \n",
       "max                36.000000  999999.000000    153.000000    153.000000   \n",
       "\n",
       "label                                                             \n",
       "status            unknown                                         \n",
       "variable_type          qn           nom                           \n",
       "units            no_units      no_units                           \n",
       "description        225668 50813(mmol/L) 818(mmol/L) 1531(mmol/L)  \n",
       "count           14.000000            57          57           23  \n",
       "unique                NaN            34          14            8  \n",
       "top                   NaN         ERROR     no data      no data  \n",
       "freq                  NaN            18          37            9  \n",
       "mean            52.835714           NaN         NaN          NaN  \n",
       "std             82.567153           NaN         NaN          NaN  \n",
       "min              0.000000           NaN         NaN          NaN  \n",
       "25%              1.175000           NaN         NaN          NaN  \n",
       "50%              3.400000           NaN         NaN          NaN  \n",
       "75%            115.400000           NaN         NaN          NaN  \n",
       "max            203.000000           NaN         NaN          NaN  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_cleaned.describe(include='all')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"21\" halign=\"left\">lactate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "      <th colspan=\"17\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "      <th>qn</th>\n",
       "      <th colspan=\"16\" halign=\"left\">nom</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">mmol/L</th>\n",
       "      <th>no_units</th>\n",
       "      <th colspan=\"16\" halign=\"left\">no_units</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>50813</th>\n",
       "      <th>225668</th>\n",
       "      <th>818</th>\n",
       "      <th>1531</th>\n",
       "      <th>225668</th>\n",
       "      <th>50813(mmol/L)_.</th>\n",
       "      <th>50813(mmol/L)_5,0</th>\n",
       "      <th>50813(mmol/L)_&gt;30</th>\n",
       "      <th>50813(mmol/L)_&gt;30.0</th>\n",
       "      <th>50813(mmol/L)_CLOTTED</th>\n",
       "      <th>...</th>\n",
       "      <th>818(mmol/L)_VOIDED</th>\n",
       "      <th>818(mmol/L)_no data</th>\n",
       "      <th>1531(mmol/L)_.</th>\n",
       "      <th>1531(mmol/L)_5,0</th>\n",
       "      <th>1531(mmol/L)_&gt;30</th>\n",
       "      <th>1531(mmol/L)_&gt;30.0</th>\n",
       "      <th>1531(mmol/L)_CLOTTED</th>\n",
       "      <th>1531(mmol/L)_ERROR</th>\n",
       "      <th>1531(mmol/L)_VOIDED</th>\n",
       "      <th>1531(mmol/L)_no data</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>100001</th>\n",
       "      <th>2117-09-11 09:32:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100003</th>\n",
       "      <th>2150-04-17 19:12:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-08 10:58:00</th>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-03-31 00:44:00</th>\n",
       "      <td>3.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-02 14:10:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100009</th>\n",
       "      <th>2162-05-17 13:19:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2162-05-17 17:14:00</th>\n",
       "      <td>1.5</td>\n",
       "      <td>1.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">100010</th>\n",
       "      <th>2109-12-10 10:25:00</th>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 12:11:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:05:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-10 13:58:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100011</th>\n",
       "      <th>2177-08-29 04:44:00</th>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-08-29 06:55:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100012</th>\n",
       "      <th>2177-03-14 07:38:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-14 11:42:00</th>\n",
       "      <td>2.5</td>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 08:05:00</th>\n",
       "      <td>2.1</td>\n",
       "      <td>2.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 14:01:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-03-15 21:42:00</th>\n",
       "      <td>1.8</td>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100016</th>\n",
       "      <th>2188-05-24 12:00:00</th>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100017</th>\n",
       "      <th>2103-03-11 05:10:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">100018</th>\n",
       "      <th>2176-08-29 15:29:00</th>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 09:23:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 10:19:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 11:29:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2176-08-30 12:40:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100020</th>\n",
       "      <th>2142-11-30 21:54:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2142-12-03 00:17:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100024</th>\n",
       "      <th>2170-09-19 10:25:00</th>\n",
       "      <td>1.4</td>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-19 16:33:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2170-09-20 02:04:00</th>\n",
       "      <td>3.2</td>\n",
       "      <td>3.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199976</th>\n",
       "      <th>2182-02-14 11:15:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-16 03:57:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-19 03:59:00</th>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-20 03:31:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-21 04:55:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199979</th>\n",
       "      <th>2182-02-06 09:17:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2182-02-06 14:16:00</th>\n",
       "      <td>3.8</td>\n",
       "      <td>3.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">199981</th>\n",
       "      <th>2110-09-24 16:34:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-24 20:09:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2110-09-25 06:10:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>199987</th>\n",
       "      <th>2175-05-19 16:30:00</th>\n",
       "      <td>2.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">199988</th>\n",
       "      <th>2169-01-24 12:48:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 01:35:00</th>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 11:18:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 16:43:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-07 22:35:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2169-02-10 05:33:00</th>\n",
       "      <td>1.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199993</th>\n",
       "      <th>2161-11-12 23:14:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-11-13 03:46:00</th>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">199994</th>\n",
       "      <th>2188-07-07 21:23:00</th>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 03:09:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 04:13:00</th>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2188-07-08 06:20:00</th>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"5\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-20 10:52:00</th>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 12:36:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:33:00</th>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 13:59:00</th>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>2.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 20:43:00</th>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-04 20:55:00</th>\n",
       "      <td>1.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 15:29:00</th>\n",
       "      <td>1.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>177450 rows × 29 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      lactate                                            \\\n",
       "status                       known                   unknown                   \n",
       "variable_type                   qn                        qn             nom   \n",
       "units                       mmol/L                  no_units        no_units   \n",
       "description                  50813 225668  818 1531   225668 50813(mmol/L)_.   \n",
       "id     datetime                                                                \n",
       "100001 2117-09-11 09:32:00     1.9    NaN  NaN  NaN      NaN               0   \n",
       "100003 2150-04-17 19:12:00     1.1    1.1  NaN  NaN      NaN               0   \n",
       "100006 2108-04-08 10:58:00     4.5    NaN  4.5  NaN      NaN               0   \n",
       "100007 2145-03-31 00:44:00     3.1    NaN  NaN  NaN      NaN               0   \n",
       "       2145-04-02 14:10:00     1.9    NaN  NaN  NaN      NaN               0   \n",
       "100009 2162-05-17 13:19:00     1.1    1.1  NaN  NaN      NaN               0   \n",
       "       2162-05-17 17:14:00     1.5    1.5  NaN  NaN      NaN               0   \n",
       "100010 2109-12-10 10:25:00     0.6    NaN  NaN  NaN      NaN               0   \n",
       "       2109-12-10 12:11:00     0.9    NaN  NaN  NaN      NaN               0   \n",
       "       2109-12-10 13:05:00     1.0    NaN  NaN  NaN      NaN               0   \n",
       "       2109-12-10 13:58:00     0.8    NaN  NaN  NaN      NaN               0   \n",
       "100011 2177-08-29 04:44:00     3.8    NaN  NaN  NaN      NaN               0   \n",
       "       2177-08-29 06:55:00     2.3    2.3  NaN  NaN      NaN               0   \n",
       "100012 2177-03-14 07:38:00     2.3    NaN  NaN  NaN      NaN               0   \n",
       "       2177-03-14 11:42:00     2.5    2.5  NaN  NaN      NaN               0   \n",
       "       2177-03-15 08:05:00     2.1    2.1  NaN  NaN      NaN               0   \n",
       "       2177-03-15 14:01:00     2.6    2.6  NaN  NaN      NaN               0   \n",
       "       2177-03-15 21:42:00     1.8    1.8  NaN  NaN      NaN               0   \n",
       "100016 2188-05-24 12:00:00     2.0    NaN  NaN  NaN      NaN               0   \n",
       "100017 2103-03-11 05:10:00     1.1    NaN  1.1  NaN      NaN               0   \n",
       "100018 2176-08-29 15:29:00     1.3    NaN  NaN  NaN      NaN               0   \n",
       "       2176-08-30 09:23:00     0.9    0.9  NaN  NaN      NaN               0   \n",
       "       2176-08-30 10:19:00     1.0    1.0  NaN  NaN      NaN               0   \n",
       "       2176-08-30 11:29:00     0.9    0.9  NaN  NaN      NaN               0   \n",
       "       2176-08-30 12:40:00     1.1    1.1  NaN  NaN      NaN               0   \n",
       "100020 2142-11-30 21:54:00     1.1    NaN  NaN  NaN      NaN               0   \n",
       "       2142-12-03 00:17:00     1.0    NaN  NaN  NaN      NaN               0   \n",
       "100024 2170-09-19 10:25:00     1.4    1.4  NaN  NaN      NaN               0   \n",
       "       2170-09-19 16:33:00     2.6    2.6  NaN  NaN      NaN               0   \n",
       "       2170-09-20 02:04:00     3.2    3.2  NaN  NaN      NaN               0   \n",
       "...                            ...    ...  ...  ...      ...             ...   \n",
       "199976 2182-02-14 11:15:00     0.8    NaN  0.8  NaN      NaN               0   \n",
       "       2182-02-16 03:57:00     0.8    NaN  0.8  NaN      NaN               0   \n",
       "       2182-02-19 03:59:00     0.8    NaN  0.8  NaN      NaN               0   \n",
       "       2182-02-20 03:31:00     0.7    NaN  0.7  NaN      NaN               0   \n",
       "       2182-02-21 04:55:00     0.9    NaN  0.9  NaN      NaN               0   \n",
       "199979 2182-02-06 09:17:00     1.0    NaN  NaN  NaN      NaN               0   \n",
       "       2182-02-06 14:16:00     3.8    3.8  NaN  NaN      NaN               0   \n",
       "199981 2110-09-24 16:34:00     1.1    NaN  1.1  1.1      NaN               0   \n",
       "       2110-09-24 20:09:00     1.0    NaN  1.0  1.0      NaN               0   \n",
       "       2110-09-25 06:10:00     1.0    NaN  1.0  1.0      NaN               0   \n",
       "199987 2175-05-19 16:30:00     2.3    NaN  NaN  NaN      NaN               0   \n",
       "199988 2169-01-24 12:48:00     1.0    NaN  NaN  NaN      NaN               0   \n",
       "       2169-02-07 01:35:00     1.6    NaN  1.6  NaN      NaN               0   \n",
       "       2169-02-07 11:18:00     1.0    NaN  1.0  NaN      NaN               0   \n",
       "       2169-02-07 16:43:00     1.0    NaN  1.0  NaN      NaN               0   \n",
       "       2169-02-07 22:35:00     1.1    NaN  1.1  NaN      NaN               0   \n",
       "       2169-02-10 05:33:00     1.4    NaN  NaN  NaN      NaN               0   \n",
       "199993 2161-11-12 23:14:00     0.9    NaN  0.9  NaN      NaN               0   \n",
       "       2161-11-13 03:46:00     0.9    NaN  0.9  NaN      NaN               0   \n",
       "199994 2188-07-07 21:23:00     1.0    NaN  1.0  NaN      NaN               0   \n",
       "       2188-07-08 03:09:00     0.7    NaN  0.7  NaN      NaN               0   \n",
       "       2188-07-08 04:13:00     0.6    NaN  0.6  NaN      NaN               0   \n",
       "       2188-07-08 06:20:00     0.7    NaN  0.7  NaN      NaN               0   \n",
       "199998 2119-02-20 10:52:00     1.1    1.1  1.1  1.1      NaN               0   \n",
       "       2119-02-20 12:36:00     1.9    1.9  1.9  1.9      NaN               0   \n",
       "       2119-02-20 13:33:00     2.0    2.0  2.0  2.0      NaN               0   \n",
       "       2119-02-20 13:59:00     2.6    2.6  2.6  2.6      NaN               0   \n",
       "       2119-02-20 20:43:00     1.3    1.3  1.3  1.3      NaN               0   \n",
       "199999 2136-04-04 20:55:00     1.9    NaN  NaN  NaN      NaN               0   \n",
       "       2136-04-06 15:29:00     1.8    NaN  NaN  NaN      NaN               0   \n",
       "\n",
       "label                                                           \\\n",
       "status                                                           \n",
       "variable_type                                                    \n",
       "units                                                            \n",
       "description                50813(mmol/L)_5,0 50813(mmol/L)_>30   \n",
       "id     datetime                                                  \n",
       "100001 2117-09-11 09:32:00                 0                 0   \n",
       "100003 2150-04-17 19:12:00                 0                 0   \n",
       "100006 2108-04-08 10:58:00                 0                 0   \n",
       "100007 2145-03-31 00:44:00                 0                 0   \n",
       "       2145-04-02 14:10:00                 0                 0   \n",
       "100009 2162-05-17 13:19:00                 0                 0   \n",
       "       2162-05-17 17:14:00                 0                 0   \n",
       "100010 2109-12-10 10:25:00                 0                 0   \n",
       "       2109-12-10 12:11:00                 0                 0   \n",
       "       2109-12-10 13:05:00                 0                 0   \n",
       "       2109-12-10 13:58:00                 0                 0   \n",
       "100011 2177-08-29 04:44:00                 0                 0   \n",
       "       2177-08-29 06:55:00                 0                 0   \n",
       "100012 2177-03-14 07:38:00                 0                 0   \n",
       "       2177-03-14 11:42:00                 0                 0   \n",
       "       2177-03-15 08:05:00                 0                 0   \n",
       "       2177-03-15 14:01:00                 0                 0   \n",
       "       2177-03-15 21:42:00                 0                 0   \n",
       "100016 2188-05-24 12:00:00                 0                 0   \n",
       "100017 2103-03-11 05:10:00                 0                 0   \n",
       "100018 2176-08-29 15:29:00                 0                 0   \n",
       "       2176-08-30 09:23:00                 0                 0   \n",
       "       2176-08-30 10:19:00                 0                 0   \n",
       "       2176-08-30 11:29:00                 0                 0   \n",
       "       2176-08-30 12:40:00                 0                 0   \n",
       "100020 2142-11-30 21:54:00                 0                 0   \n",
       "       2142-12-03 00:17:00                 0                 0   \n",
       "100024 2170-09-19 10:25:00                 0                 0   \n",
       "       2170-09-19 16:33:00                 0                 0   \n",
       "       2170-09-20 02:04:00                 0                 0   \n",
       "...                                      ...               ...   \n",
       "199976 2182-02-14 11:15:00                 0                 0   \n",
       "       2182-02-16 03:57:00                 0                 0   \n",
       "       2182-02-19 03:59:00                 0                 0   \n",
       "       2182-02-20 03:31:00                 0                 0   \n",
       "       2182-02-21 04:55:00                 0                 0   \n",
       "199979 2182-02-06 09:17:00                 0                 0   \n",
       "       2182-02-06 14:16:00                 0                 0   \n",
       "199981 2110-09-24 16:34:00                 0                 0   \n",
       "       2110-09-24 20:09:00                 0                 0   \n",
       "       2110-09-25 06:10:00                 0                 0   \n",
       "199987 2175-05-19 16:30:00                 0                 0   \n",
       "199988 2169-01-24 12:48:00                 0                 0   \n",
       "       2169-02-07 01:35:00                 0                 0   \n",
       "       2169-02-07 11:18:00                 0                 0   \n",
       "       2169-02-07 16:43:00                 0                 0   \n",
       "       2169-02-07 22:35:00                 0                 0   \n",
       "       2169-02-10 05:33:00                 0                 0   \n",
       "199993 2161-11-12 23:14:00                 0                 0   \n",
       "       2161-11-13 03:46:00                 0                 0   \n",
       "199994 2188-07-07 21:23:00                 0                 0   \n",
       "       2188-07-08 03:09:00                 0                 0   \n",
       "       2188-07-08 04:13:00                 0                 0   \n",
       "       2188-07-08 06:20:00                 0                 0   \n",
       "199998 2119-02-20 10:52:00                 0                 0   \n",
       "       2119-02-20 12:36:00                 0                 0   \n",
       "       2119-02-20 13:33:00                 0                 0   \n",
       "       2119-02-20 13:59:00                 0                 0   \n",
       "       2119-02-20 20:43:00                 0                 0   \n",
       "199999 2136-04-04 20:55:00                 0                 0   \n",
       "       2136-04-06 15:29:00                 0                 0   \n",
       "\n",
       "label                                                                 \\\n",
       "status                                                                 \n",
       "variable_type                                                          \n",
       "units                                                                  \n",
       "description                50813(mmol/L)_>30.0 50813(mmol/L)_CLOTTED   \n",
       "id     datetime                                                        \n",
       "100001 2117-09-11 09:32:00                   0                     0   \n",
       "100003 2150-04-17 19:12:00                   0                     0   \n",
       "100006 2108-04-08 10:58:00                   0                     0   \n",
       "100007 2145-03-31 00:44:00                   0                     0   \n",
       "       2145-04-02 14:10:00                   0                     0   \n",
       "100009 2162-05-17 13:19:00                   0                     0   \n",
       "       2162-05-17 17:14:00                   0                     0   \n",
       "100010 2109-12-10 10:25:00                   0                     0   \n",
       "       2109-12-10 12:11:00                   0                     0   \n",
       "       2109-12-10 13:05:00                   0                     0   \n",
       "       2109-12-10 13:58:00                   0                     0   \n",
       "100011 2177-08-29 04:44:00                   0                     0   \n",
       "       2177-08-29 06:55:00                   0                     0   \n",
       "100012 2177-03-14 07:38:00                   0                     0   \n",
       "       2177-03-14 11:42:00                   0                     0   \n",
       "       2177-03-15 08:05:00                   0                     0   \n",
       "       2177-03-15 14:01:00                   0                     0   \n",
       "       2177-03-15 21:42:00                   0                     0   \n",
       "100016 2188-05-24 12:00:00                   0                     0   \n",
       "100017 2103-03-11 05:10:00                   0                     0   \n",
       "100018 2176-08-29 15:29:00                   0                     0   \n",
       "       2176-08-30 09:23:00                   0                     0   \n",
       "       2176-08-30 10:19:00                   0                     0   \n",
       "       2176-08-30 11:29:00                   0                     0   \n",
       "       2176-08-30 12:40:00                   0                     0   \n",
       "100020 2142-11-30 21:54:00                   0                     0   \n",
       "       2142-12-03 00:17:00                   0                     0   \n",
       "100024 2170-09-19 10:25:00                   0                     0   \n",
       "       2170-09-19 16:33:00                   0                     0   \n",
       "       2170-09-20 02:04:00                   0                     0   \n",
       "...                                        ...                   ...   \n",
       "199976 2182-02-14 11:15:00                   0                     0   \n",
       "       2182-02-16 03:57:00                   0                     0   \n",
       "       2182-02-19 03:59:00                   0                     0   \n",
       "       2182-02-20 03:31:00                   0                     0   \n",
       "       2182-02-21 04:55:00                   0                     0   \n",
       "199979 2182-02-06 09:17:00                   0                     0   \n",
       "       2182-02-06 14:16:00                   0                     0   \n",
       "199981 2110-09-24 16:34:00                   0                     0   \n",
       "       2110-09-24 20:09:00                   0                     0   \n",
       "       2110-09-25 06:10:00                   0                     0   \n",
       "199987 2175-05-19 16:30:00                   0                     0   \n",
       "199988 2169-01-24 12:48:00                   0                     0   \n",
       "       2169-02-07 01:35:00                   0                     0   \n",
       "       2169-02-07 11:18:00                   0                     0   \n",
       "       2169-02-07 16:43:00                   0                     0   \n",
       "       2169-02-07 22:35:00                   0                     0   \n",
       "       2169-02-10 05:33:00                   0                     0   \n",
       "199993 2161-11-12 23:14:00                   0                     0   \n",
       "       2161-11-13 03:46:00                   0                     0   \n",
       "199994 2188-07-07 21:23:00                   0                     0   \n",
       "       2188-07-08 03:09:00                   0                     0   \n",
       "       2188-07-08 04:13:00                   0                     0   \n",
       "       2188-07-08 06:20:00                   0                     0   \n",
       "199998 2119-02-20 10:52:00                   0                     0   \n",
       "       2119-02-20 12:36:00                   0                     0   \n",
       "       2119-02-20 13:33:00                   0                     0   \n",
       "       2119-02-20 13:59:00                   0                     0   \n",
       "       2119-02-20 20:43:00                   0                     0   \n",
       "199999 2136-04-04 20:55:00                   0                     0   \n",
       "       2136-04-06 15:29:00                   0                     0   \n",
       "\n",
       "label                              ...                              \\\n",
       "status                             ...                               \n",
       "variable_type                      ...                               \n",
       "units                              ...                               \n",
       "description                        ...          818(mmol/L)_VOIDED   \n",
       "id     datetime                    ...                               \n",
       "100001 2117-09-11 09:32:00         ...                           0   \n",
       "100003 2150-04-17 19:12:00         ...                           0   \n",
       "100006 2108-04-08 10:58:00         ...                           0   \n",
       "100007 2145-03-31 00:44:00         ...                           0   \n",
       "       2145-04-02 14:10:00         ...                           0   \n",
       "100009 2162-05-17 13:19:00         ...                           0   \n",
       "       2162-05-17 17:14:00         ...                           0   \n",
       "100010 2109-12-10 10:25:00         ...                           0   \n",
       "       2109-12-10 12:11:00         ...                           0   \n",
       "       2109-12-10 13:05:00         ...                           0   \n",
       "       2109-12-10 13:58:00         ...                           0   \n",
       "100011 2177-08-29 04:44:00         ...                           0   \n",
       "       2177-08-29 06:55:00         ...                           0   \n",
       "100012 2177-03-14 07:38:00         ...                           0   \n",
       "       2177-03-14 11:42:00         ...                           0   \n",
       "       2177-03-15 08:05:00         ...                           0   \n",
       "       2177-03-15 14:01:00         ...                           0   \n",
       "       2177-03-15 21:42:00         ...                           0   \n",
       "100016 2188-05-24 12:00:00         ...                           0   \n",
       "100017 2103-03-11 05:10:00         ...                           0   \n",
       "100018 2176-08-29 15:29:00         ...                           0   \n",
       "       2176-08-30 09:23:00         ...                           0   \n",
       "       2176-08-30 10:19:00         ...                           0   \n",
       "       2176-08-30 11:29:00         ...                           0   \n",
       "       2176-08-30 12:40:00         ...                           0   \n",
       "100020 2142-11-30 21:54:00         ...                           0   \n",
       "       2142-12-03 00:17:00         ...                           0   \n",
       "100024 2170-09-19 10:25:00         ...                           0   \n",
       "       2170-09-19 16:33:00         ...                           0   \n",
       "       2170-09-20 02:04:00         ...                           0   \n",
       "...                                ...                         ...   \n",
       "199976 2182-02-14 11:15:00         ...                           0   \n",
       "       2182-02-16 03:57:00         ...                           0   \n",
       "       2182-02-19 03:59:00         ...                           0   \n",
       "       2182-02-20 03:31:00         ...                           0   \n",
       "       2182-02-21 04:55:00         ...                           0   \n",
       "199979 2182-02-06 09:17:00         ...                           0   \n",
       "       2182-02-06 14:16:00         ...                           0   \n",
       "199981 2110-09-24 16:34:00         ...                           0   \n",
       "       2110-09-24 20:09:00         ...                           0   \n",
       "       2110-09-25 06:10:00         ...                           0   \n",
       "199987 2175-05-19 16:30:00         ...                           0   \n",
       "199988 2169-01-24 12:48:00         ...                           0   \n",
       "       2169-02-07 01:35:00         ...                           0   \n",
       "       2169-02-07 11:18:00         ...                           0   \n",
       "       2169-02-07 16:43:00         ...                           0   \n",
       "       2169-02-07 22:35:00         ...                           0   \n",
       "       2169-02-10 05:33:00         ...                           0   \n",
       "199993 2161-11-12 23:14:00         ...                           0   \n",
       "       2161-11-13 03:46:00         ...                           0   \n",
       "199994 2188-07-07 21:23:00         ...                           0   \n",
       "       2188-07-08 03:09:00         ...                           0   \n",
       "       2188-07-08 04:13:00         ...                           0   \n",
       "       2188-07-08 06:20:00         ...                           0   \n",
       "199998 2119-02-20 10:52:00         ...                           0   \n",
       "       2119-02-20 12:36:00         ...                           0   \n",
       "       2119-02-20 13:33:00         ...                           0   \n",
       "       2119-02-20 13:59:00         ...                           0   \n",
       "       2119-02-20 20:43:00         ...                           0   \n",
       "199999 2136-04-04 20:55:00         ...                           0   \n",
       "       2136-04-06 15:29:00         ...                           0   \n",
       "\n",
       "label                                                          \\\n",
       "status                                                          \n",
       "variable_type                                                   \n",
       "units                                                           \n",
       "description                818(mmol/L)_no data 1531(mmol/L)_.   \n",
       "id     datetime                                                 \n",
       "100001 2117-09-11 09:32:00                   0              0   \n",
       "100003 2150-04-17 19:12:00                   0              0   \n",
       "100006 2108-04-08 10:58:00                   0              0   \n",
       "100007 2145-03-31 00:44:00                   0              0   \n",
       "       2145-04-02 14:10:00                   0              0   \n",
       "100009 2162-05-17 13:19:00                   0              0   \n",
       "       2162-05-17 17:14:00                   0              0   \n",
       "100010 2109-12-10 10:25:00                   0              0   \n",
       "       2109-12-10 12:11:00                   0              0   \n",
       "       2109-12-10 13:05:00                   0              0   \n",
       "       2109-12-10 13:58:00                   0              0   \n",
       "100011 2177-08-29 04:44:00                   0              0   \n",
       "       2177-08-29 06:55:00                   0              0   \n",
       "100012 2177-03-14 07:38:00                   0              0   \n",
       "       2177-03-14 11:42:00                   0              0   \n",
       "       2177-03-15 08:05:00                   0              0   \n",
       "       2177-03-15 14:01:00                   0              0   \n",
       "       2177-03-15 21:42:00                   0              0   \n",
       "100016 2188-05-24 12:00:00                   0              0   \n",
       "100017 2103-03-11 05:10:00                   0              0   \n",
       "100018 2176-08-29 15:29:00                   0              0   \n",
       "       2176-08-30 09:23:00                   0              0   \n",
       "       2176-08-30 10:19:00                   0              0   \n",
       "       2176-08-30 11:29:00                   0              0   \n",
       "       2176-08-30 12:40:00                   0              0   \n",
       "100020 2142-11-30 21:54:00                   0              0   \n",
       "       2142-12-03 00:17:00                   0              0   \n",
       "100024 2170-09-19 10:25:00                   0              0   \n",
       "       2170-09-19 16:33:00                   0              0   \n",
       "       2170-09-20 02:04:00                   0              0   \n",
       "...                                        ...            ...   \n",
       "199976 2182-02-14 11:15:00                   0              0   \n",
       "       2182-02-16 03:57:00                   0              0   \n",
       "       2182-02-19 03:59:00                   0              0   \n",
       "       2182-02-20 03:31:00                   0              0   \n",
       "       2182-02-21 04:55:00                   0              0   \n",
       "199979 2182-02-06 09:17:00                   0              0   \n",
       "       2182-02-06 14:16:00                   0              0   \n",
       "199981 2110-09-24 16:34:00                   0              0   \n",
       "       2110-09-24 20:09:00                   0              0   \n",
       "       2110-09-25 06:10:00                   0              0   \n",
       "199987 2175-05-19 16:30:00                   0              0   \n",
       "199988 2169-01-24 12:48:00                   0              0   \n",
       "       2169-02-07 01:35:00                   0              0   \n",
       "       2169-02-07 11:18:00                   0              0   \n",
       "       2169-02-07 16:43:00                   0              0   \n",
       "       2169-02-07 22:35:00                   0              0   \n",
       "       2169-02-10 05:33:00                   0              0   \n",
       "199993 2161-11-12 23:14:00                   0              0   \n",
       "       2161-11-13 03:46:00                   0              0   \n",
       "199994 2188-07-07 21:23:00                   0              0   \n",
       "       2188-07-08 03:09:00                   0              0   \n",
       "       2188-07-08 04:13:00                   0              0   \n",
       "       2188-07-08 06:20:00                   0              0   \n",
       "199998 2119-02-20 10:52:00                   0              0   \n",
       "       2119-02-20 12:36:00                   0              0   \n",
       "       2119-02-20 13:33:00                   0              0   \n",
       "       2119-02-20 13:59:00                   0              0   \n",
       "       2119-02-20 20:43:00                   0              0   \n",
       "199999 2136-04-04 20:55:00                   0              0   \n",
       "       2136-04-06 15:29:00                   0              0   \n",
       "\n",
       "label                                                         \\\n",
       "status                                                         \n",
       "variable_type                                                  \n",
       "units                                                          \n",
       "description                1531(mmol/L)_5,0 1531(mmol/L)_>30   \n",
       "id     datetime                                                \n",
       "100001 2117-09-11 09:32:00                0                0   \n",
       "100003 2150-04-17 19:12:00                0                0   \n",
       "100006 2108-04-08 10:58:00                0                0   \n",
       "100007 2145-03-31 00:44:00                0                0   \n",
       "       2145-04-02 14:10:00                0                0   \n",
       "100009 2162-05-17 13:19:00                0                0   \n",
       "       2162-05-17 17:14:00                0                0   \n",
       "100010 2109-12-10 10:25:00                0                0   \n",
       "       2109-12-10 12:11:00                0                0   \n",
       "       2109-12-10 13:05:00                0                0   \n",
       "       2109-12-10 13:58:00                0                0   \n",
       "100011 2177-08-29 04:44:00                0                0   \n",
       "       2177-08-29 06:55:00                0                0   \n",
       "100012 2177-03-14 07:38:00                0                0   \n",
       "       2177-03-14 11:42:00                0                0   \n",
       "       2177-03-15 08:05:00                0                0   \n",
       "       2177-03-15 14:01:00                0                0   \n",
       "       2177-03-15 21:42:00                0                0   \n",
       "100016 2188-05-24 12:00:00                0                0   \n",
       "100017 2103-03-11 05:10:00                0                0   \n",
       "100018 2176-08-29 15:29:00                0                0   \n",
       "       2176-08-30 09:23:00                0                0   \n",
       "       2176-08-30 10:19:00                0                0   \n",
       "       2176-08-30 11:29:00                0                0   \n",
       "       2176-08-30 12:40:00                0                0   \n",
       "100020 2142-11-30 21:54:00                0                0   \n",
       "       2142-12-03 00:17:00                0                0   \n",
       "100024 2170-09-19 10:25:00                0                0   \n",
       "       2170-09-19 16:33:00                0                0   \n",
       "       2170-09-20 02:04:00                0                0   \n",
       "...                                     ...              ...   \n",
       "199976 2182-02-14 11:15:00                0                0   \n",
       "       2182-02-16 03:57:00                0                0   \n",
       "       2182-02-19 03:59:00                0                0   \n",
       "       2182-02-20 03:31:00                0                0   \n",
       "       2182-02-21 04:55:00                0                0   \n",
       "199979 2182-02-06 09:17:00                0                0   \n",
       "       2182-02-06 14:16:00                0                0   \n",
       "199981 2110-09-24 16:34:00                0                0   \n",
       "       2110-09-24 20:09:00                0                0   \n",
       "       2110-09-25 06:10:00                0                0   \n",
       "199987 2175-05-19 16:30:00                0                0   \n",
       "199988 2169-01-24 12:48:00                0                0   \n",
       "       2169-02-07 01:35:00                0                0   \n",
       "       2169-02-07 11:18:00                0                0   \n",
       "       2169-02-07 16:43:00                0                0   \n",
       "       2169-02-07 22:35:00                0                0   \n",
       "       2169-02-10 05:33:00                0                0   \n",
       "199993 2161-11-12 23:14:00                0                0   \n",
       "       2161-11-13 03:46:00                0                0   \n",
       "199994 2188-07-07 21:23:00                0                0   \n",
       "       2188-07-08 03:09:00                0                0   \n",
       "       2188-07-08 04:13:00                0                0   \n",
       "       2188-07-08 06:20:00                0                0   \n",
       "199998 2119-02-20 10:52:00                0                0   \n",
       "       2119-02-20 12:36:00                0                0   \n",
       "       2119-02-20 13:33:00                0                0   \n",
       "       2119-02-20 13:59:00                0                0   \n",
       "       2119-02-20 20:43:00                0                0   \n",
       "199999 2136-04-04 20:55:00                0                0   \n",
       "       2136-04-06 15:29:00                0                0   \n",
       "\n",
       "label                                                               \\\n",
       "status                                                               \n",
       "variable_type                                                        \n",
       "units                                                                \n",
       "description                1531(mmol/L)_>30.0 1531(mmol/L)_CLOTTED   \n",
       "id     datetime                                                      \n",
       "100001 2117-09-11 09:32:00                  0                    0   \n",
       "100003 2150-04-17 19:12:00                  0                    0   \n",
       "100006 2108-04-08 10:58:00                  0                    0   \n",
       "100007 2145-03-31 00:44:00                  0                    0   \n",
       "       2145-04-02 14:10:00                  0                    0   \n",
       "100009 2162-05-17 13:19:00                  0                    0   \n",
       "       2162-05-17 17:14:00                  0                    0   \n",
       "100010 2109-12-10 10:25:00                  0                    0   \n",
       "       2109-12-10 12:11:00                  0                    0   \n",
       "       2109-12-10 13:05:00                  0                    0   \n",
       "       2109-12-10 13:58:00                  0                    0   \n",
       "100011 2177-08-29 04:44:00                  0                    0   \n",
       "       2177-08-29 06:55:00                  0                    0   \n",
       "100012 2177-03-14 07:38:00                  0                    0   \n",
       "       2177-03-14 11:42:00                  0                    0   \n",
       "       2177-03-15 08:05:00                  0                    0   \n",
       "       2177-03-15 14:01:00                  0                    0   \n",
       "       2177-03-15 21:42:00                  0                    0   \n",
       "100016 2188-05-24 12:00:00                  0                    0   \n",
       "100017 2103-03-11 05:10:00                  0                    0   \n",
       "100018 2176-08-29 15:29:00                  0                    0   \n",
       "       2176-08-30 09:23:00                  0                    0   \n",
       "       2176-08-30 10:19:00                  0                    0   \n",
       "       2176-08-30 11:29:00                  0                    0   \n",
       "       2176-08-30 12:40:00                  0                    0   \n",
       "100020 2142-11-30 21:54:00                  0                    0   \n",
       "       2142-12-03 00:17:00                  0                    0   \n",
       "100024 2170-09-19 10:25:00                  0                    0   \n",
       "       2170-09-19 16:33:00                  0                    0   \n",
       "       2170-09-20 02:04:00                  0                    0   \n",
       "...                                       ...                  ...   \n",
       "199976 2182-02-14 11:15:00                  0                    0   \n",
       "       2182-02-16 03:57:00                  0                    0   \n",
       "       2182-02-19 03:59:00                  0                    0   \n",
       "       2182-02-20 03:31:00                  0                    0   \n",
       "       2182-02-21 04:55:00                  0                    0   \n",
       "199979 2182-02-06 09:17:00                  0                    0   \n",
       "       2182-02-06 14:16:00                  0                    0   \n",
       "199981 2110-09-24 16:34:00                  0                    0   \n",
       "       2110-09-24 20:09:00                  0                    0   \n",
       "       2110-09-25 06:10:00                  0                    0   \n",
       "199987 2175-05-19 16:30:00                  0                    0   \n",
       "199988 2169-01-24 12:48:00                  0                    0   \n",
       "       2169-02-07 01:35:00                  0                    0   \n",
       "       2169-02-07 11:18:00                  0                    0   \n",
       "       2169-02-07 16:43:00                  0                    0   \n",
       "       2169-02-07 22:35:00                  0                    0   \n",
       "       2169-02-10 05:33:00                  0                    0   \n",
       "199993 2161-11-12 23:14:00                  0                    0   \n",
       "       2161-11-13 03:46:00                  0                    0   \n",
       "199994 2188-07-07 21:23:00                  0                    0   \n",
       "       2188-07-08 03:09:00                  0                    0   \n",
       "       2188-07-08 04:13:00                  0                    0   \n",
       "       2188-07-08 06:20:00                  0                    0   \n",
       "199998 2119-02-20 10:52:00                  0                    0   \n",
       "       2119-02-20 12:36:00                  0                    0   \n",
       "       2119-02-20 13:33:00                  0                    0   \n",
       "       2119-02-20 13:59:00                  0                    0   \n",
       "       2119-02-20 20:43:00                  0                    0   \n",
       "199999 2136-04-04 20:55:00                  0                    0   \n",
       "       2136-04-06 15:29:00                  0                    0   \n",
       "\n",
       "label                                                              \\\n",
       "status                                                              \n",
       "variable_type                                                       \n",
       "units                                                               \n",
       "description                1531(mmol/L)_ERROR 1531(mmol/L)_VOIDED   \n",
       "id     datetime                                                     \n",
       "100001 2117-09-11 09:32:00                  0                   0   \n",
       "100003 2150-04-17 19:12:00                  0                   0   \n",
       "100006 2108-04-08 10:58:00                  0                   0   \n",
       "100007 2145-03-31 00:44:00                  0                   0   \n",
       "       2145-04-02 14:10:00                  0                   0   \n",
       "100009 2162-05-17 13:19:00                  0                   0   \n",
       "       2162-05-17 17:14:00                  0                   0   \n",
       "100010 2109-12-10 10:25:00                  0                   0   \n",
       "       2109-12-10 12:11:00                  0                   0   \n",
       "       2109-12-10 13:05:00                  0                   0   \n",
       "       2109-12-10 13:58:00                  0                   0   \n",
       "100011 2177-08-29 04:44:00                  0                   0   \n",
       "       2177-08-29 06:55:00                  0                   0   \n",
       "100012 2177-03-14 07:38:00                  0                   0   \n",
       "       2177-03-14 11:42:00                  0                   0   \n",
       "       2177-03-15 08:05:00                  0                   0   \n",
       "       2177-03-15 14:01:00                  0                   0   \n",
       "       2177-03-15 21:42:00                  0                   0   \n",
       "100016 2188-05-24 12:00:00                  0                   0   \n",
       "100017 2103-03-11 05:10:00                  0                   0   \n",
       "100018 2176-08-29 15:29:00                  0                   0   \n",
       "       2176-08-30 09:23:00                  0                   0   \n",
       "       2176-08-30 10:19:00                  0                   0   \n",
       "       2176-08-30 11:29:00                  0                   0   \n",
       "       2176-08-30 12:40:00                  0                   0   \n",
       "100020 2142-11-30 21:54:00                  0                   0   \n",
       "       2142-12-03 00:17:00                  0                   0   \n",
       "100024 2170-09-19 10:25:00                  0                   0   \n",
       "       2170-09-19 16:33:00                  0                   0   \n",
       "       2170-09-20 02:04:00                  0                   0   \n",
       "...                                       ...                 ...   \n",
       "199976 2182-02-14 11:15:00                  0                   0   \n",
       "       2182-02-16 03:57:00                  0                   0   \n",
       "       2182-02-19 03:59:00                  0                   0   \n",
       "       2182-02-20 03:31:00                  0                   0   \n",
       "       2182-02-21 04:55:00                  0                   0   \n",
       "199979 2182-02-06 09:17:00                  0                   0   \n",
       "       2182-02-06 14:16:00                  0                   0   \n",
       "199981 2110-09-24 16:34:00                  0                   0   \n",
       "       2110-09-24 20:09:00                  0                   0   \n",
       "       2110-09-25 06:10:00                  0                   0   \n",
       "199987 2175-05-19 16:30:00                  0                   0   \n",
       "199988 2169-01-24 12:48:00                  0                   0   \n",
       "       2169-02-07 01:35:00                  0                   0   \n",
       "       2169-02-07 11:18:00                  0                   0   \n",
       "       2169-02-07 16:43:00                  0                   0   \n",
       "       2169-02-07 22:35:00                  0                   0   \n",
       "       2169-02-10 05:33:00                  0                   0   \n",
       "199993 2161-11-12 23:14:00                  0                   0   \n",
       "       2161-11-13 03:46:00                  0                   0   \n",
       "199994 2188-07-07 21:23:00                  0                   0   \n",
       "       2188-07-08 03:09:00                  0                   0   \n",
       "       2188-07-08 04:13:00                  0                   0   \n",
       "       2188-07-08 06:20:00                  0                   0   \n",
       "199998 2119-02-20 10:52:00                  0                   0   \n",
       "       2119-02-20 12:36:00                  0                   0   \n",
       "       2119-02-20 13:33:00                  0                   0   \n",
       "       2119-02-20 13:59:00                  0                   0   \n",
       "       2119-02-20 20:43:00                  0                   0   \n",
       "199999 2136-04-04 20:55:00                  0                   0   \n",
       "       2136-04-06 15:29:00                  0                   0   \n",
       "\n",
       "label                                            \n",
       "status                                           \n",
       "variable_type                                    \n",
       "units                                            \n",
       "description                1531(mmol/L)_no data  \n",
       "id     datetime                                  \n",
       "100001 2117-09-11 09:32:00                    0  \n",
       "100003 2150-04-17 19:12:00                    0  \n",
       "100006 2108-04-08 10:58:00                    0  \n",
       "100007 2145-03-31 00:44:00                    0  \n",
       "       2145-04-02 14:10:00                    0  \n",
       "100009 2162-05-17 13:19:00                    0  \n",
       "       2162-05-17 17:14:00                    0  \n",
       "100010 2109-12-10 10:25:00                    0  \n",
       "       2109-12-10 12:11:00                    0  \n",
       "       2109-12-10 13:05:00                    0  \n",
       "       2109-12-10 13:58:00                    0  \n",
       "100011 2177-08-29 04:44:00                    0  \n",
       "       2177-08-29 06:55:00                    0  \n",
       "100012 2177-03-14 07:38:00                    0  \n",
       "       2177-03-14 11:42:00                    0  \n",
       "       2177-03-15 08:05:00                    0  \n",
       "       2177-03-15 14:01:00                    0  \n",
       "       2177-03-15 21:42:00                    0  \n",
       "100016 2188-05-24 12:00:00                    0  \n",
       "100017 2103-03-11 05:10:00                    0  \n",
       "100018 2176-08-29 15:29:00                    0  \n",
       "       2176-08-30 09:23:00                    0  \n",
       "       2176-08-30 10:19:00                    0  \n",
       "       2176-08-30 11:29:00                    0  \n",
       "       2176-08-30 12:40:00                    0  \n",
       "100020 2142-11-30 21:54:00                    0  \n",
       "       2142-12-03 00:17:00                    0  \n",
       "100024 2170-09-19 10:25:00                    0  \n",
       "       2170-09-19 16:33:00                    0  \n",
       "       2170-09-20 02:04:00                    0  \n",
       "...                                         ...  \n",
       "199976 2182-02-14 11:15:00                    0  \n",
       "       2182-02-16 03:57:00                    0  \n",
       "       2182-02-19 03:59:00                    0  \n",
       "       2182-02-20 03:31:00                    0  \n",
       "       2182-02-21 04:55:00                    0  \n",
       "199979 2182-02-06 09:17:00                    0  \n",
       "       2182-02-06 14:16:00                    0  \n",
       "199981 2110-09-24 16:34:00                    0  \n",
       "       2110-09-24 20:09:00                    0  \n",
       "       2110-09-25 06:10:00                    0  \n",
       "199987 2175-05-19 16:30:00                    0  \n",
       "199988 2169-01-24 12:48:00                    0  \n",
       "       2169-02-07 01:35:00                    0  \n",
       "       2169-02-07 11:18:00                    0  \n",
       "       2169-02-07 16:43:00                    0  \n",
       "       2169-02-07 22:35:00                    0  \n",
       "       2169-02-10 05:33:00                    0  \n",
       "199993 2161-11-12 23:14:00                    0  \n",
       "       2161-11-13 03:46:00                    0  \n",
       "199994 2188-07-07 21:23:00                    0  \n",
       "       2188-07-08 03:09:00                    0  \n",
       "       2188-07-08 04:13:00                    0  \n",
       "       2188-07-08 06:20:00                    0  \n",
       "199998 2119-02-20 10:52:00                    0  \n",
       "       2119-02-20 12:36:00                    0  \n",
       "       2119-02-20 13:33:00                    0  \n",
       "       2119-02-20 13:59:00                    0  \n",
       "       2119-02-20 20:43:00                    0  \n",
       "199999 2136-04-04 20:55:00                    0  \n",
       "       2136-04-06 15:29:00                    0  \n",
       "\n",
       "[177450 rows x 29 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nominal_cols = df_cleaned.columns.get_level_values('variable_type') == variable_type.NOMINAL\n",
    "\n",
    "for col_name in df_cleaned.loc[:,nominal_cols]:\n",
    "    column = df_cleaned[col_nam]\n",
    "    df_dummies = pd.get_dummies(column)\n",
    "    dummy_col_names = [col_name[:-1] + ('{}_{}'.format(col_name[-1],text),) for text in df_dummies.columns]\n",
    "    df_dummies.columns = pd.MultiIndex.from_tuples(dummy_col_names,names=df_cleaned.columns.names)\n",
    "    \n",
    "    df_cleaned.drop(col_name,axis=1,inplace=True)\n",
    "    df_cleaned = df_cleaned.join(df_dummies,how='outer')\n",
    "\n",
    "df_cleaned"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Possible additional cleaning:\n",
    "\n",
    "1. Infer UOM \n",
    "2. Remove extreme values [DONE]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "import mimic\n",
    "import transformers\n",
    "from sklearn.pipeline import Pipeline\n",
    "import icu_data_defs\n",
    "import units"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_temp = utils.open_df('data/mimic_data','extract/temperature body')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(mimic)\n",
    "pipeline = mimic.transform_pipeline('temperature body')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-25 00:00:00</td>\n",
       "      <td>98.400001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>188670.0</td>\n",
       "      <td>2183-08-23 20:00:00</td>\n",
       "      <td>99.300003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-12 08:00:00</td>\n",
       "      <td>98.699996948242188</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-13 17:00:00</td>\n",
       "      <td>99.900001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-11 20:00:00</td>\n",
       "      <td>37.900001525878906</td>\n",
       "      <td>degC</td>\n",
       "      <td>676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>166707.0</td>\n",
       "      <td>2122-02-13 00:00:00</td>\n",
       "      <td>97.800003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-11 21:00:00</td>\n",
       "      <td>99.099998474121094</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-24 04:00:00</td>\n",
       "      <td>98.900001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>176176.0</td>\n",
       "      <td>2116-12-24 16:00:00</td>\n",
       "      <td>99.099998474121094</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-13 03:00:00</td>\n",
       "      <td>99.199996948242188</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-30 18:00:00</td>\n",
       "      <td>98.800003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>181750.0</td>\n",
       "      <td>2192-11-24 08:00:00</td>\n",
       "      <td>98.5</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-14 12:00:00</td>\n",
       "      <td>100.40000152587891</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-14 13:00:00</td>\n",
       "      <td>100.69999694824219</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-15 22:00:00</td>\n",
       "      <td>99.800003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-16 21:00:00</td>\n",
       "      <td>100.59999847412109</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>185910.0</td>\n",
       "      <td>2166-08-23 08:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>190797.0</td>\n",
       "      <td>2191-01-10 08:00:00</td>\n",
       "      <td>97.900001525878906</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>157907.0</td>\n",
       "      <td>2129-06-10 23:00:00</td>\n",
       "      <td>100.09999847412109</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>157907.0</td>\n",
       "      <td>2129-06-11 04:30:00</td>\n",
       "      <td>98.800003051757812</td>\n",
       "      <td>degF</td>\n",
       "      <td>678</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          id            datetime               value units  itemid\n",
       "0   185910.0 2166-08-25 00:00:00  98.400001525878906  degF     678\n",
       "1   188670.0 2183-08-23 20:00:00  99.300003051757812  degF     678\n",
       "2   185910.0 2166-08-12 08:00:00  98.699996948242188  degF     678\n",
       "3   185910.0 2166-08-13 17:00:00  99.900001525878906  degF     678\n",
       "4   166707.0 2122-02-11 20:00:00  37.900001525878906  degC     676\n",
       "5   166707.0 2122-02-13 00:00:00  97.800003051757812  degF     678\n",
       "6   185910.0 2166-08-11 21:00:00  99.099998474121094  degF     678\n",
       "7   185910.0 2166-08-24 04:00:00  98.900001525878906  degF     678\n",
       "8   176176.0 2116-12-24 16:00:00  99.099998474121094  degF     678\n",
       "9   185910.0 2166-08-13 03:00:00  99.199996948242188  degF     678\n",
       "10  185910.0 2166-08-30 18:00:00  98.800003051757812  degF     678\n",
       "11  181750.0 2192-11-24 08:00:00                98.5  degF     678\n",
       "12  185910.0 2166-08-14 12:00:00  100.40000152587891  degF     678\n",
       "13  185910.0 2166-08-14 13:00:00  100.69999694824219  degF     678\n",
       "14  185910.0 2166-08-15 22:00:00  99.800003051757812  degF     678\n",
       "15  185910.0 2166-08-16 21:00:00  100.59999847412109  degF     678\n",
       "16  185910.0 2166-08-23 08:00:00                  98  degF     678\n",
       "17  190797.0 2191-01-10 08:00:00  97.900001525878906  degF     678\n",
       "18  157907.0 2129-06-10 23:00:00  100.09999847412109  degF     678\n",
       "19  157907.0 2129-06-11 04:30:00  98.800003051757812  degF     678"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_temp_tr = pipeline.transform(df_temp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degF</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762</th>\n",
       "      <th>676</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"20\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <td>98</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <td>97.9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <td>99.5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <td>100</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <td>99.9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <td>97.7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 08:00:00</th>\n",
       "      <td>97.8</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 12:00:00</th>\n",
       "      <td>97.5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 19:00:00</th>\n",
       "      <td>99.8</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 00:00:00</th>\n",
       "      <td>99.7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 04:00:00</th>\n",
       "      <td>99.3</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 08:00:00</th>\n",
       "      <td>99.4</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 12:00:00</th>\n",
       "      <td>98.8</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 16:00:00</th>\n",
       "      <td>99.3</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 22:00:00</th>\n",
       "      <td>99.9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 00:00:00</th>\n",
       "      <td>99.4</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 04:00:00</th>\n",
       "      <td>99.6</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 08:00:00</th>\n",
       "      <td>99</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 13:00:00</th>\n",
       "      <td>98</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 19:00:00</th>\n",
       "      <td>99.5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      temperature body                     \n",
       "units                                  degF          degC       \n",
       "description                          223761 678    223762 676   \n",
       "id     datetime                                                 \n",
       "100001 2117-09-11 13:48:00               98   None   None   None\n",
       "       2117-09-11 16:00:00             97.9   None   None   None\n",
       "       2117-09-11 19:00:00             99.5   None   None   None\n",
       "       2117-09-11 22:00:00              100   None   None   None\n",
       "       2117-09-12 01:00:00             99.9   None   None   None\n",
       "       2117-09-12 04:00:00             97.7   None   None   None\n",
       "       2117-09-12 08:00:00             97.8   None   None   None\n",
       "       2117-09-12 12:00:00             97.5   None   None   None\n",
       "       2117-09-12 19:00:00             99.8   None   None   None\n",
       "       2117-09-13 00:00:00             99.7   None   None   None\n",
       "       2117-09-13 04:00:00             99.3   None   None   None\n",
       "       2117-09-13 08:00:00             99.4   None   None   None\n",
       "       2117-09-13 12:00:00             98.8   None   None   None\n",
       "       2117-09-13 16:00:00             99.3   None   None   None\n",
       "       2117-09-13 22:00:00             99.9   None   None   None\n",
       "       2117-09-14 00:00:00             99.4   None   None   None\n",
       "       2117-09-14 04:00:00             99.6   None   None   None\n",
       "       2117-09-14 08:00:00               99   None   None   None\n",
       "       2117-09-14 13:00:00               98   None   None   None\n",
       "       2117-09-14 19:00:00             99.5   None   None   None"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_tr.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(transformers)\n",
    "reload(units)\n",
    "reload(icu_data_defs)\n",
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')\n",
    "agg_func = lambda x:x.iloc[0]\n",
    "ureg = units.MedicalUreg()\n",
    "standard_pipeline = Pipeline([\n",
    "        ('drop_small_columns',transformers.remove_small_columns(threshold=5)),\n",
    "        ('aggregate_same_datetime',transformers.same_index_aggregator(agg_func)),\n",
    "        ('split_dtype',transformers.split_dtype())\n",
    "    ])\n",
    "\n",
    "stnd_cols = transformers.column_standardizer(data_dict,ureg)\n",
    "drop_oob = transformers.oob_value_remover(data_dict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_temp_cl = standard_pipeline.transform(df_temp_tr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degF</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762</th>\n",
       "      <th>676</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"20\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <td>97.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <td>97.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 08:00:00</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 12:00:00</th>\n",
       "      <td>97.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 19:00:00</th>\n",
       "      <td>99.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 00:00:00</th>\n",
       "      <td>99.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 04:00:00</th>\n",
       "      <td>99.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 08:00:00</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 12:00:00</th>\n",
       "      <td>98.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 16:00:00</th>\n",
       "      <td>99.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 22:00:00</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 00:00:00</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 04:00:00</th>\n",
       "      <td>99.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 08:00:00</th>\n",
       "      <td>99.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 13:00:00</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 19:00:00</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      temperature body                     \n",
       "units                                  degF          degC       \n",
       "description                          223761 678    223762 676   \n",
       "id     datetime                                                 \n",
       "100001 2117-09-11 13:48:00             98.0    NaN    NaN    NaN\n",
       "       2117-09-11 16:00:00             97.9    NaN    NaN    NaN\n",
       "       2117-09-11 19:00:00             99.5    NaN    NaN    NaN\n",
       "       2117-09-11 22:00:00            100.0    NaN    NaN    NaN\n",
       "       2117-09-12 01:00:00             99.9    NaN    NaN    NaN\n",
       "       2117-09-12 04:00:00             97.7    NaN    NaN    NaN\n",
       "       2117-09-12 08:00:00             97.8    NaN    NaN    NaN\n",
       "       2117-09-12 12:00:00             97.5    NaN    NaN    NaN\n",
       "       2117-09-12 19:00:00             99.8    NaN    NaN    NaN\n",
       "       2117-09-13 00:00:00             99.7    NaN    NaN    NaN\n",
       "       2117-09-13 04:00:00             99.3    NaN    NaN    NaN\n",
       "       2117-09-13 08:00:00             99.4    NaN    NaN    NaN\n",
       "       2117-09-13 12:00:00             98.8    NaN    NaN    NaN\n",
       "       2117-09-13 16:00:00             99.3    NaN    NaN    NaN\n",
       "       2117-09-13 22:00:00             99.9    NaN    NaN    NaN\n",
       "       2117-09-14 00:00:00             99.4    NaN    NaN    NaN\n",
       "       2117-09-14 04:00:00             99.6    NaN    NaN    NaN\n",
       "       2117-09-14 08:00:00             99.0    NaN    NaN    NaN\n",
       "       2117-09-14 13:00:00             98.0    NaN    NaN    NaN\n",
       "       2117-09-14 19:00:00             99.5    NaN    NaN    NaN"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_cl.head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df_temp_conv = stnd_cols.transform(df_temp_cl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">degF</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762(degC)</th>\n",
       "      <th>676(degC)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>522143.000000</td>\n",
       "      <td>768158.000000</td>\n",
       "      <td>74144.000000</td>\n",
       "      <td>370309.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>98.407970</td>\n",
       "      <td>98.570606</td>\n",
       "      <td>99.193821</td>\n",
       "      <td>98.765740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>14.681127</td>\n",
       "      <td>2.666614</td>\n",
       "      <td>9.735224</td>\n",
       "      <td>2.608901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>-99.900000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>26.600000</td>\n",
       "      <td>32.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>97.500000</td>\n",
       "      <td>97.599998</td>\n",
       "      <td>97.520000</td>\n",
       "      <td>97.879998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>98.300000</td>\n",
       "      <td>98.599998</td>\n",
       "      <td>98.600000</td>\n",
       "      <td>98.960002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>99.300000</td>\n",
       "      <td>99.599998</td>\n",
       "      <td>99.680000</td>\n",
       "      <td>99.860002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>9637.000000</td>\n",
       "      <td>109.000000</td>\n",
       "      <td>709.700000</td>\n",
       "      <td>115.700000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         temperature body                                            \n",
       "status                   known                                            \n",
       "variable_type               qn                                            \n",
       "units                     degF                                            \n",
       "description             223761            678  223762(degC)      676(degC)\n",
       "count            522143.000000  768158.000000  74144.000000  370309.000000\n",
       "mean                 98.407970      98.570606     99.193821      98.765740\n",
       "std                  14.681127       2.666614      9.735224       2.608901\n",
       "min                 -99.900000       0.000000     26.600000      32.000000\n",
       "25%                  97.500000      97.599998     97.520000      97.879998\n",
       "50%                  98.300000      98.599998     98.600000      98.960002\n",
       "75%                  99.300000      99.599998     99.680000      99.860002\n",
       "max                9637.000000     109.000000    709.700000     115.700000"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_conv.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df_temp_no_oob = drop_oob.transform(df_temp_conv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">degF</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762(degC)</th>\n",
       "      <th>676(degC)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>522116.000000</td>\n",
       "      <td>768158.000000</td>\n",
       "      <td>73638.000000</td>\n",
       "      <td>370309.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>98.352642</td>\n",
       "      <td>98.570606</td>\n",
       "      <td>98.430646</td>\n",
       "      <td>98.765740</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.825922</td>\n",
       "      <td>2.666614</td>\n",
       "      <td>2.467125</td>\n",
       "      <td>2.608901</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>26.600000</td>\n",
       "      <td>32.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>97.500000</td>\n",
       "      <td>97.599998</td>\n",
       "      <td>97.340000</td>\n",
       "      <td>97.879998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>98.300000</td>\n",
       "      <td>98.599998</td>\n",
       "      <td>98.600000</td>\n",
       "      <td>98.960002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>99.300000</td>\n",
       "      <td>99.599998</td>\n",
       "      <td>99.680000</td>\n",
       "      <td>99.860002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>129.000000</td>\n",
       "      <td>109.000000</td>\n",
       "      <td>113.000000</td>\n",
       "      <td>115.700000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label         temperature body                                            \n",
       "status                   known                                            \n",
       "variable_type               qn                                            \n",
       "units                     degF                                            \n",
       "description             223761            678  223762(degC)      676(degC)\n",
       "count            522116.000000  768158.000000  73638.000000  370309.000000\n",
       "mean                 98.352642      98.570606     98.430646      98.765740\n",
       "std                   2.825922       2.666614      2.467125       2.608901\n",
       "min                   0.000000       0.000000     26.600000      32.000000\n",
       "25%                  97.500000      97.599998     97.340000      97.879998\n",
       "50%                  98.300000      98.599998     98.600000      98.960002\n",
       "75%                  99.300000      99.599998     99.680000      99.860002\n",
       "max                 129.000000     109.000000    113.000000     115.700000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_no_oob.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "combine_like = transformers.combine_like_cols()\n",
    "df_temp_combined = combine_like.transform(df_temp_no_oob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th>known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th>qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th>degF</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>all</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"24\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <td>97.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <td>99.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <td>100.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <td>99.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <td>97.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 08:00:00</th>\n",
       "      <td>97.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 12:00:00</th>\n",
       "      <td>97.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 19:00:00</th>\n",
       "      <td>99.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 00:00:00</th>\n",
       "      <td>99.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 04:00:00</th>\n",
       "      <td>99.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 08:00:00</th>\n",
       "      <td>99.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 12:00:00</th>\n",
       "      <td>98.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 16:00:00</th>\n",
       "      <td>99.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 22:00:00</th>\n",
       "      <td>99.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 00:00:00</th>\n",
       "      <td>99.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 04:00:00</th>\n",
       "      <td>99.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 08:00:00</th>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 13:00:00</th>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 19:00:00</th>\n",
       "      <td>99.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 00:00:00</th>\n",
       "      <td>99.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 08:00:00</th>\n",
       "      <td>98.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 12:00:00</th>\n",
       "      <td>97.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 16:00:00</th>\n",
       "      <td>98.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">100003</th>\n",
       "      <th>2150-04-17 20:31:00</th>\n",
       "      <td>95.900000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-17 22:00:00</th>\n",
       "      <td>98.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 00:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 04:00:00</th>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 08:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 11:18:00</th>\n",
       "      <td>96.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"7\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-21 02:00:00</th>\n",
       "      <td>100.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 03:00:00</th>\n",
       "      <td>100.220003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 04:00:00</th>\n",
       "      <td>99.860002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 05:00:00</th>\n",
       "      <td>100.220003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 06:00:00</th>\n",
       "      <td>100.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 07:00:00</th>\n",
       "      <td>100.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-21 08:00:00</th>\n",
       "      <td>100.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"23\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-06 16:07:00</th>\n",
       "      <td>97.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 20:00:00</th>\n",
       "      <td>97.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 00:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 02:00:00</th>\n",
       "      <td>101.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 05:00:00</th>\n",
       "      <td>100.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 08:00:00</th>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 12:00:00</th>\n",
       "      <td>99.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 15:00:00</th>\n",
       "      <td>100.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 18:00:00</th>\n",
       "      <td>97.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 22:00:00</th>\n",
       "      <td>98.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 04:00:00</th>\n",
       "      <td>98.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 09:00:00</th>\n",
       "      <td>97.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 16:00:00</th>\n",
       "      <td>97.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 20:00:00</th>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 00:00:00</th>\n",
       "      <td>99.600000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 05:00:00</th>\n",
       "      <td>99.100000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 08:00:00</th>\n",
       "      <td>97.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 12:00:00</th>\n",
       "      <td>98.300000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 16:00:00</th>\n",
       "      <td>98.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 20:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 00:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 03:00:00</th>\n",
       "      <td>97.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-10 08:00:00</th>\n",
       "      <td>98.100000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1731503 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                      temperature body\n",
       "status                                known\n",
       "variable_type                            qn\n",
       "units                                  degF\n",
       "description                             all\n",
       "id     datetime                            \n",
       "100001 2117-09-11 13:48:00        98.000000\n",
       "       2117-09-11 16:00:00        97.900000\n",
       "       2117-09-11 19:00:00        99.500000\n",
       "       2117-09-11 22:00:00       100.000000\n",
       "       2117-09-12 01:00:00        99.900000\n",
       "       2117-09-12 04:00:00        97.700000\n",
       "       2117-09-12 08:00:00        97.800000\n",
       "       2117-09-12 12:00:00        97.500000\n",
       "       2117-09-12 19:00:00        99.800000\n",
       "       2117-09-13 00:00:00        99.700000\n",
       "       2117-09-13 04:00:00        99.300000\n",
       "       2117-09-13 08:00:00        99.400000\n",
       "       2117-09-13 12:00:00        98.800000\n",
       "       2117-09-13 16:00:00        99.300000\n",
       "       2117-09-13 22:00:00        99.900000\n",
       "       2117-09-14 00:00:00        99.400000\n",
       "       2117-09-14 04:00:00        99.600000\n",
       "       2117-09-14 08:00:00        99.000000\n",
       "       2117-09-14 13:00:00        98.000000\n",
       "       2117-09-14 19:00:00        99.500000\n",
       "       2117-09-15 00:00:00        99.600000\n",
       "       2117-09-15 08:00:00        98.300000\n",
       "       2117-09-15 12:00:00        97.800000\n",
       "       2117-09-15 16:00:00        98.600000\n",
       "100003 2150-04-17 20:31:00        95.900000\n",
       "       2150-04-17 22:00:00        98.200000\n",
       "       2150-04-18 00:00:00        97.400000\n",
       "       2150-04-18 04:00:00        98.000000\n",
       "       2150-04-18 08:00:00        97.400000\n",
       "       2150-04-18 11:18:00        96.300000\n",
       "...                                     ...\n",
       "199998 2119-02-21 02:00:00       100.400000\n",
       "       2119-02-21 03:00:00       100.220003\n",
       "       2119-02-21 04:00:00        99.860002\n",
       "       2119-02-21 05:00:00       100.220003\n",
       "       2119-02-21 06:00:00       100.400000\n",
       "       2119-02-21 07:00:00       100.400000\n",
       "       2119-02-21 08:00:00       100.400000\n",
       "199999 2136-04-06 16:07:00        97.800000\n",
       "       2136-04-06 20:00:00        97.500000\n",
       "       2136-04-07 00:00:00        97.400000\n",
       "       2136-04-07 02:00:00       101.000000\n",
       "       2136-04-07 05:00:00       100.500000\n",
       "       2136-04-07 08:00:00        99.000000\n",
       "       2136-04-07 12:00:00        99.800000\n",
       "       2136-04-07 15:00:00       100.700000\n",
       "       2136-04-07 18:00:00        97.600000\n",
       "       2136-04-07 22:00:00        98.600000\n",
       "       2136-04-08 04:00:00        98.500000\n",
       "       2136-04-08 09:00:00        97.800000\n",
       "       2136-04-08 16:00:00        97.700000\n",
       "       2136-04-08 20:00:00        99.000000\n",
       "       2136-04-09 00:00:00        99.600000\n",
       "       2136-04-09 05:00:00        99.100000\n",
       "       2136-04-09 08:00:00        97.800000\n",
       "       2136-04-09 12:00:00        98.300000\n",
       "       2136-04-09 16:00:00        98.000000\n",
       "       2136-04-09 20:00:00        97.400000\n",
       "       2136-04-10 00:00:00        97.400000\n",
       "       2136-04-10 03:00:00        97.400000\n",
       "       2136-04-10 08:00:00        98.100000\n",
       "\n",
       "[1731503 rows x 1 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_combined"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Segmenting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import segmenting"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_context = utils.open_df('data/mimic_data','context')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pt_id</th>\n",
       "      <th>id</th>\n",
       "      <th>start_dt</th>\n",
       "      <th>end_dt</th>\n",
       "      <th>lang</th>\n",
       "      <th>religion</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>ethnicity</th>\n",
       "      <th>dx_info</th>\n",
       "      <th>admission_location</th>\n",
       "      <th>...</th>\n",
       "      <th>dod</th>\n",
       "      <th>icd_codes</th>\n",
       "      <th>age</th>\n",
       "      <th>icustay_id</th>\n",
       "      <th>dbsource</th>\n",
       "      <th>first_icu</th>\n",
       "      <th>last_icu</th>\n",
       "      <th>intime</th>\n",
       "      <th>outtime</th>\n",
       "      <th>los</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>87</td>\n",
       "      <td>190659</td>\n",
       "      <td>2191-02-25 20:30:00</td>\n",
       "      <td>2191-04-25 15:18:00</td>\n",
       "      <td></td>\n",
       "      <td>UNOBTAINABLE</td>\n",
       "      <td></td>\n",
       "      <td>UNKNOWN/NOT SPECIFIED</td>\n",
       "      <td>NEWBORN</td>\n",
       "      <td>PHYS REFERRAL/NORMAL DELI</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>[V3101, 76515, 76524, 769, 7707, 7775, 7742, 7...</td>\n",
       "      <td>0 days 20:30:00</td>\n",
       "      <td>277633.0</td>\n",
       "      <td>carevue</td>\n",
       "      <td>NICU</td>\n",
       "      <td>NICU</td>\n",
       "      <td>2191-02-25 21:09:05</td>\n",
       "      <td>2191-04-25 15:38:56</td>\n",
       "      <td>58.7707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>88</td>\n",
       "      <td>123010</td>\n",
       "      <td>2111-08-29 03:03:00</td>\n",
       "      <td>2111-09-03 14:24:00</td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>BLACK/AFRICAN AMERICAN</td>\n",
       "      <td>S/P MOTOR VEHICLE ACCIDENT-STABBING</td>\n",
       "      <td>EMERGENCY ROOM ADMIT</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>[86113, 5119, 4239, E966]</td>\n",
       "      <td>10467 days 03:03:00</td>\n",
       "      <td>297289.0</td>\n",
       "      <td>carevue</td>\n",
       "      <td>TSICU</td>\n",
       "      <td>TSICU</td>\n",
       "      <td>2111-08-29 03:04:42</td>\n",
       "      <td>2111-08-30 21:08:09</td>\n",
       "      <td>1.7524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>89</td>\n",
       "      <td>188646</td>\n",
       "      <td>2185-06-17 05:22:00</td>\n",
       "      <td>2185-06-21 11:15:00</td>\n",
       "      <td></td>\n",
       "      <td>UNOBTAINABLE</td>\n",
       "      <td></td>\n",
       "      <td>UNKNOWN/NOT SPECIFIED</td>\n",
       "      <td>NEWBORN</td>\n",
       "      <td>PHYS REFERRAL/NORMAL DELI</td>\n",
       "      <td>...</td>\n",
       "      <td>None</td>\n",
       "      <td>[V3101, 7742, 76516, 76527, 22801, V290]</td>\n",
       "      <td>0 days 05:22:00</td>\n",
       "      <td>249786.0</td>\n",
       "      <td>carevue</td>\n",
       "      <td>NICU</td>\n",
       "      <td>NICU</td>\n",
       "      <td>2185-06-17 05:46:00</td>\n",
       "      <td>2185-06-21 12:00:41</td>\n",
       "      <td>4.2602</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>91</td>\n",
       "      <td>121205</td>\n",
       "      <td>2177-04-23 00:08:00</td>\n",
       "      <td>2177-05-10 15:16:00</td>\n",
       "      <td></td>\n",
       "      <td>JEWISH</td>\n",
       "      <td>MARRIED</td>\n",
       "      <td>WHITE</td>\n",
       "      <td>FEVER</td>\n",
       "      <td>EMERGENCY ROOM ADMIT</td>\n",
       "      <td>...</td>\n",
       "      <td>2177-05-10 00:00:00</td>\n",
       "      <td>[20008, 2765, 5559, 99812, 51881, 4470, 135, 5...</td>\n",
       "      <td>29794 days 00:08:00</td>\n",
       "      <td>218528.0</td>\n",
       "      <td>carevue</td>\n",
       "      <td>CSRU</td>\n",
       "      <td>CSRU</td>\n",
       "      <td>2177-04-27 02:08:00</td>\n",
       "      <td>2177-04-27 14:03:00</td>\n",
       "      <td>0.4965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>91</td>\n",
       "      <td>121205</td>\n",
       "      <td>2177-04-23 00:08:00</td>\n",
       "      <td>2177-05-10 15:16:00</td>\n",
       "      <td></td>\n",
       "      <td>JEWISH</td>\n",
       "      <td>MARRIED</td>\n",
       "      <td>WHITE</td>\n",
       "      <td>FEVER</td>\n",
       "      <td>EMERGENCY ROOM ADMIT</td>\n",
       "      <td>...</td>\n",
       "      <td>2177-05-10 00:00:00</td>\n",
       "      <td>[20008, 2765, 5559, 99812, 51881, 4470, 135, 5...</td>\n",
       "      <td>29794 days 00:08:00</td>\n",
       "      <td>256972.0</td>\n",
       "      <td>carevue</td>\n",
       "      <td>MICU</td>\n",
       "      <td>MICU</td>\n",
       "      <td>2177-05-07 03:52:00</td>\n",
       "      <td>2177-05-10 15:16:00</td>\n",
       "      <td>3.4750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 22 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   pt_id      id            start_dt              end_dt lang      religion  \\\n",
       "0     87  190659 2191-02-25 20:30:00 2191-04-25 15:18:00       UNOBTAINABLE   \n",
       "1     88  123010 2111-08-29 03:03:00 2111-09-03 14:24:00                      \n",
       "2     89  188646 2185-06-17 05:22:00 2185-06-21 11:15:00       UNOBTAINABLE   \n",
       "3     91  121205 2177-04-23 00:08:00 2177-05-10 15:16:00             JEWISH   \n",
       "4     91  121205 2177-04-23 00:08:00 2177-05-10 15:16:00             JEWISH   \n",
       "\n",
       "  marital_status               ethnicity                              dx_info  \\\n",
       "0                  UNKNOWN/NOT SPECIFIED                              NEWBORN   \n",
       "1                 BLACK/AFRICAN AMERICAN  S/P MOTOR VEHICLE ACCIDENT-STABBING   \n",
       "2                  UNKNOWN/NOT SPECIFIED                              NEWBORN   \n",
       "3        MARRIED                   WHITE                                FEVER   \n",
       "4        MARRIED                   WHITE                                FEVER   \n",
       "\n",
       "          admission_location   ...                     dod  \\\n",
       "0  PHYS REFERRAL/NORMAL DELI   ...                    None   \n",
       "1       EMERGENCY ROOM ADMIT   ...                    None   \n",
       "2  PHYS REFERRAL/NORMAL DELI   ...                    None   \n",
       "3       EMERGENCY ROOM ADMIT   ...     2177-05-10 00:00:00   \n",
       "4       EMERGENCY ROOM ADMIT   ...     2177-05-10 00:00:00   \n",
       "\n",
       "                                           icd_codes                 age  \\\n",
       "0  [V3101, 76515, 76524, 769, 7707, 7775, 7742, 7...     0 days 20:30:00   \n",
       "1                          [86113, 5119, 4239, E966] 10467 days 03:03:00   \n",
       "2           [V3101, 7742, 76516, 76527, 22801, V290]     0 days 05:22:00   \n",
       "3  [20008, 2765, 5559, 99812, 51881, 4470, 135, 5... 29794 days 00:08:00   \n",
       "4  [20008, 2765, 5559, 99812, 51881, 4470, 135, 5... 29794 days 00:08:00   \n",
       "\n",
       "  icustay_id  dbsource  first_icu last_icu              intime  \\\n",
       "0   277633.0   carevue       NICU     NICU 2191-02-25 21:09:05   \n",
       "1   297289.0   carevue      TSICU    TSICU 2111-08-29 03:04:42   \n",
       "2   249786.0   carevue       NICU     NICU 2185-06-17 05:46:00   \n",
       "3   218528.0   carevue       CSRU     CSRU 2177-04-27 02:08:00   \n",
       "4   256972.0   carevue       MICU     MICU 2177-05-07 03:52:00   \n",
       "\n",
       "              outtime      los  \n",
       "0 2191-04-25 15:38:56  58.7707  \n",
       "1 2111-08-30 21:08:09   1.7524  \n",
       "2 2185-06-21 12:00:41   4.2602  \n",
       "3 2177-04-27 14:03:00   0.4965  \n",
       "4 2177-05-10 15:16:00   3.4750  \n",
       "\n",
       "[5 rows x 22 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_context.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "end_dt = df_temp_combined.iloc[:,0].groupby(level='id').apply(lambda x:x.sample(1))\n",
    "end_dt = end_dt.reset_index(level=0,drop=True).reset_index(level=1,drop=False).iloc[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(segmenting)\n",
    "reload(utils)\n",
    "all_before = segmenting.all_before(end_dt,df_context)\n",
    "n_hrs_before = segmenting.n_hrs_before(end_dt,10)\n",
    "periodic = segmenting.periodic(10,df_context)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\tools\\merge.py:480: UserWarning: merging between different levels can give an unintended result (3 levels on the left, 1 on the right)\n",
      "  warnings.warn(msg, UserWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degF</th>\n",
       "      <th colspan=\"2\" halign=\"left\">degC</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762</th>\n",
       "      <th>676</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>seg_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"8\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100003</th>\n",
       "      <th>2150-04-17 20:31:00</th>\n",
       "      <th>0</th>\n",
       "      <td>95.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-17 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"19\" valign=\"top\">100006</th>\n",
       "      <th>2108-04-06 16:30:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-06 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-07 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-07 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-07 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-07 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-07 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.599998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 16:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-08 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-09 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-09 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-09 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-09 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-09 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.599998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2108-04-10 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"10\" valign=\"top\">199998</th>\n",
       "      <th>2119-02-20 17:15:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.299999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 17:30:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 17:45:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.599998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 18:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.900002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 18:30:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.200001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 19:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.599998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.799999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 21:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.799999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.799999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2119-02-20 23:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.700001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"20\" valign=\"top\">199999</th>\n",
       "      <th>2136-04-06 16:07:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-06 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 02:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>101.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 05:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 15:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 18:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-07 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 09:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 16:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-08 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 05:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 12:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 16:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2136-04-09 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>839877 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                             temperature body                    \\\n",
       "units                                         degF              degC   \n",
       "description                                 223761     678    223762   \n",
       "id     datetime            seg_id                                      \n",
       "100001 2117-09-11 13:48:00 0                  98.0        NaN    NaN   \n",
       "       2117-09-11 16:00:00 0                  97.9        NaN    NaN   \n",
       "       2117-09-11 19:00:00 0                  99.5        NaN    NaN   \n",
       "       2117-09-11 22:00:00 0                 100.0        NaN    NaN   \n",
       "       2117-09-12 01:00:00 0                  99.9        NaN    NaN   \n",
       "       2117-09-12 04:00:00 0                  97.7        NaN    NaN   \n",
       "       2117-09-12 08:00:00 0                  97.8        NaN    NaN   \n",
       "       2117-09-12 12:00:00 0                  97.5        NaN    NaN   \n",
       "100003 2150-04-17 20:31:00 0                  95.9        NaN    NaN   \n",
       "       2150-04-17 22:00:00 0                  98.2        NaN    NaN   \n",
       "       2150-04-18 00:00:00 0                  97.4        NaN    NaN   \n",
       "100006 2108-04-06 16:30:00 0                   NaN  97.000000    NaN   \n",
       "       2108-04-06 20:00:00 0                   NaN  97.400002    NaN   \n",
       "       2108-04-07 00:00:00 0                   NaN  97.800003    NaN   \n",
       "       2108-04-07 04:00:00 0                   NaN  97.000000    NaN   \n",
       "       2108-04-07 08:00:00 0                   NaN  97.099998    NaN   \n",
       "       2108-04-07 12:00:00 0                   NaN  97.000000    NaN   \n",
       "       2108-04-07 20:00:00 0                   NaN  98.800003    NaN   \n",
       "       2108-04-08 00:00:00 0                   NaN  98.599998    NaN   \n",
       "       2108-04-08 04:00:00 0                   NaN  98.000000    NaN   \n",
       "       2108-04-08 08:00:00 0                   NaN  97.199997    NaN   \n",
       "       2108-04-08 12:00:00 0                   NaN  97.000000    NaN   \n",
       "       2108-04-08 16:00:00 0                   NaN  98.000000    NaN   \n",
       "       2108-04-08 20:00:00 0                   NaN  98.800003    NaN   \n",
       "       2108-04-09 00:00:00 0                   NaN  97.199997    NaN   \n",
       "       2108-04-09 04:00:00 0                   NaN  97.400002    NaN   \n",
       "       2108-04-09 08:00:00 0                   NaN  97.099998    NaN   \n",
       "       2108-04-09 12:00:00 0                   NaN  97.300003    NaN   \n",
       "       2108-04-09 20:00:00 0                   NaN  97.599998    NaN   \n",
       "       2108-04-10 00:00:00 0                   NaN  97.000000    NaN   \n",
       "...                                            ...        ...    ...   \n",
       "199998 2119-02-20 17:15:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 17:30:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 17:45:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 18:00:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 18:30:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 19:00:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 20:00:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 21:00:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 22:00:00 0                   NaN        NaN    NaN   \n",
       "       2119-02-20 23:00:00 0                   NaN        NaN    NaN   \n",
       "199999 2136-04-06 16:07:00 0                  97.8        NaN    NaN   \n",
       "       2136-04-06 20:00:00 0                  97.5        NaN    NaN   \n",
       "       2136-04-07 00:00:00 0                  97.4        NaN    NaN   \n",
       "       2136-04-07 02:00:00 0                 101.0        NaN    NaN   \n",
       "       2136-04-07 05:00:00 0                 100.5        NaN    NaN   \n",
       "       2136-04-07 08:00:00 0                  99.0        NaN    NaN   \n",
       "       2136-04-07 12:00:00 0                  99.8        NaN    NaN   \n",
       "       2136-04-07 15:00:00 0                 100.7        NaN    NaN   \n",
       "       2136-04-07 18:00:00 0                  97.6        NaN    NaN   \n",
       "       2136-04-07 22:00:00 0                  98.6        NaN    NaN   \n",
       "       2136-04-08 04:00:00 0                  98.5        NaN    NaN   \n",
       "       2136-04-08 09:00:00 0                  97.8        NaN    NaN   \n",
       "       2136-04-08 16:00:00 0                  97.7        NaN    NaN   \n",
       "       2136-04-08 20:00:00 0                  99.0        NaN    NaN   \n",
       "       2136-04-09 00:00:00 0                  99.6        NaN    NaN   \n",
       "       2136-04-09 05:00:00 0                  99.1        NaN    NaN   \n",
       "       2136-04-09 08:00:00 0                  97.8        NaN    NaN   \n",
       "       2136-04-09 12:00:00 0                  98.3        NaN    NaN   \n",
       "       2136-04-09 16:00:00 0                  98.0        NaN    NaN   \n",
       "       2136-04-09 20:00:00 0                  97.4        NaN    NaN   \n",
       "\n",
       "label                                         \n",
       "units                                         \n",
       "description                           676     \n",
       "id     datetime            seg_id             \n",
       "100001 2117-09-11 13:48:00 0             NaN  \n",
       "       2117-09-11 16:00:00 0             NaN  \n",
       "       2117-09-11 19:00:00 0             NaN  \n",
       "       2117-09-11 22:00:00 0             NaN  \n",
       "       2117-09-12 01:00:00 0             NaN  \n",
       "       2117-09-12 04:00:00 0             NaN  \n",
       "       2117-09-12 08:00:00 0             NaN  \n",
       "       2117-09-12 12:00:00 0             NaN  \n",
       "100003 2150-04-17 20:31:00 0             NaN  \n",
       "       2150-04-17 22:00:00 0             NaN  \n",
       "       2150-04-18 00:00:00 0             NaN  \n",
       "100006 2108-04-06 16:30:00 0             NaN  \n",
       "       2108-04-06 20:00:00 0             NaN  \n",
       "       2108-04-07 00:00:00 0             NaN  \n",
       "       2108-04-07 04:00:00 0             NaN  \n",
       "       2108-04-07 08:00:00 0             NaN  \n",
       "       2108-04-07 12:00:00 0             NaN  \n",
       "       2108-04-07 20:00:00 0             NaN  \n",
       "       2108-04-08 00:00:00 0             NaN  \n",
       "       2108-04-08 04:00:00 0             NaN  \n",
       "       2108-04-08 08:00:00 0             NaN  \n",
       "       2108-04-08 12:00:00 0             NaN  \n",
       "       2108-04-08 16:00:00 0             NaN  \n",
       "       2108-04-08 20:00:00 0             NaN  \n",
       "       2108-04-09 00:00:00 0             NaN  \n",
       "       2108-04-09 04:00:00 0             NaN  \n",
       "       2108-04-09 08:00:00 0             NaN  \n",
       "       2108-04-09 12:00:00 0             NaN  \n",
       "       2108-04-09 20:00:00 0             NaN  \n",
       "       2108-04-10 00:00:00 0             NaN  \n",
       "...                                      ...  \n",
       "199998 2119-02-20 17:15:00 0       36.299999  \n",
       "       2119-02-20 17:30:00 0       36.500000  \n",
       "       2119-02-20 17:45:00 0       36.599998  \n",
       "       2119-02-20 18:00:00 0       36.900002  \n",
       "       2119-02-20 18:30:00 0       37.200001  \n",
       "       2119-02-20 19:00:00 0       37.599998  \n",
       "       2119-02-20 20:00:00 0       37.799999  \n",
       "       2119-02-20 21:00:00 0       37.799999  \n",
       "       2119-02-20 22:00:00 0       37.799999  \n",
       "       2119-02-20 23:00:00 0       37.700001  \n",
       "199999 2136-04-06 16:07:00 0             NaN  \n",
       "       2136-04-06 20:00:00 0             NaN  \n",
       "       2136-04-07 00:00:00 0             NaN  \n",
       "       2136-04-07 02:00:00 0             NaN  \n",
       "       2136-04-07 05:00:00 0             NaN  \n",
       "       2136-04-07 08:00:00 0             NaN  \n",
       "       2136-04-07 12:00:00 0             NaN  \n",
       "       2136-04-07 15:00:00 0             NaN  \n",
       "       2136-04-07 18:00:00 0             NaN  \n",
       "       2136-04-07 22:00:00 0             NaN  \n",
       "       2136-04-08 04:00:00 0             NaN  \n",
       "       2136-04-08 09:00:00 0             NaN  \n",
       "       2136-04-08 16:00:00 0             NaN  \n",
       "       2136-04-08 20:00:00 0             NaN  \n",
       "       2136-04-09 00:00:00 0             NaN  \n",
       "       2136-04-09 05:00:00 0             NaN  \n",
       "       2136-04-09 08:00:00 0             NaN  \n",
       "       2136-04-09 12:00:00 0             NaN  \n",
       "       2136-04-09 16:00:00 0             NaN  \n",
       "       2136-04-09 20:00:00 0             NaN  \n",
       "\n",
       "[839877 rows x 4 columns]"
      ]
     },
     "execution_count": 138,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_all_before = all_before.transform(df_temp_cl)\n",
    "df_temp_all_before[df_temp_all_before.index.get_level_values('seg_id') > -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 262,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                            seg_id\n",
      "id     datetime                   \n",
      "100001 2117-09-11 22:00:00       0\n",
      "       2117-09-12 01:00:00       0\n",
      "       2117-09-12 04:00:00       0\n",
      "100006 2108-04-07 20:00:00       0\n",
      "100007 2145-04-04 00:00:00       0\n",
      "       2145-04-04 04:00:00       0\n",
      "100009 2162-05-19 03:00:00       0\n",
      "100010 2109-12-10 22:47:00       0\n",
      "       2109-12-11 00:00:00       0\n",
      "100011 2177-09-07 09:00:00       0\n",
      "       2177-09-07 13:00:00       0\n",
      "       2177-09-07 17:00:00       0\n",
      "label                             temperature body                           \n",
      "status                                       known                           \n",
      "variable_type                                   qn                           \n",
      "units                                         degF                           \n",
      "description                                 223761 678 223762(degC) 676(degC)\n",
      "id     datetime            seg_id                                            \n",
      "100001 2117-09-11 13:48:00 -1                 98.0 NaN          NaN       NaN\n",
      "       2117-09-11 16:00:00 -1                 97.9 NaN          NaN       NaN\n",
      "       2117-09-11 19:00:00 -1                 99.5 NaN          NaN       NaN\n",
      "       2117-09-11 22:00:00  0                100.0 NaN          NaN       NaN\n",
      "       2117-09-12 01:00:00  0                 99.9 NaN          NaN       NaN\n",
      "       2117-09-12 04:00:00  0                 97.7 NaN          NaN       NaN\n",
      "       2117-09-12 08:00:00 -1                 97.8 NaN          NaN       NaN\n",
      "       2117-09-12 12:00:00 -1                 97.5 NaN          NaN       NaN\n",
      "       2117-09-12 19:00:00 -1                 99.8 NaN          NaN       NaN\n",
      "       2117-09-13 00:00:00 -1                 99.7 NaN          NaN       NaN\n",
      "       2117-09-13 04:00:00 -1                 99.3 NaN          NaN       NaN\n",
      "       2117-09-13 08:00:00 -1                 99.4 NaN          NaN       NaN\n",
      "       2117-09-13 12:00:00 -1                 98.8 NaN          NaN       NaN\n",
      "       2117-09-13 16:00:00 -1                 99.3 NaN          NaN       NaN\n",
      "       2117-09-13 22:00:00 -1                 99.9 NaN          NaN       NaN\n",
      "       2117-09-14 00:00:00 -1                 99.4 NaN          NaN       NaN\n",
      "       2117-09-14 04:00:00 -1                 99.6 NaN          NaN       NaN\n",
      "       2117-09-14 08:00:00 -1                 99.0 NaN          NaN       NaN\n",
      "       2117-09-14 13:00:00 -1                 98.0 NaN          NaN       NaN\n",
      "       2117-09-14 19:00:00 -1                 99.5 NaN          NaN       NaN\n",
      "       2117-09-15 00:00:00 -1                 99.6 NaN          NaN       NaN\n",
      "       2117-09-15 08:00:00 -1                 98.3 NaN          NaN       NaN\n",
      "       2117-09-15 12:00:00 -1                 97.8 NaN          NaN       NaN\n",
      "       2117-09-15 16:00:00 -1                 98.6 NaN          NaN       NaN\n",
      "100003 2150-04-17 20:31:00 -1                 95.9 NaN          NaN       NaN\n",
      "       2150-04-17 22:00:00 -1                 98.2 NaN          NaN       NaN\n",
      "       2150-04-18 00:00:00 -1                 97.4 NaN          NaN       NaN\n",
      "       2150-04-18 04:00:00 -1                 98.0 NaN          NaN       NaN\n",
      "       2150-04-18 08:00:00 -1                 97.4 NaN          NaN       NaN\n",
      "       2150-04-18 11:18:00 -1                 96.3 NaN          NaN       NaN\n",
      "...                                            ...  ..          ...       ...\n",
      "100011 2177-09-04 19:00:00 -1                101.0 NaN          NaN       NaN\n",
      "       2177-09-04 20:00:00 -1                101.2 NaN          NaN       NaN\n",
      "       2177-09-04 22:00:00 -1                102.1 NaN          NaN       NaN\n",
      "       2177-09-05 00:00:00 -1                101.8 NaN          NaN       NaN\n",
      "       2177-09-05 02:00:00 -1                100.8 NaN          NaN       NaN\n",
      "       2177-09-05 04:00:00 -1                100.1 NaN          NaN       NaN\n",
      "       2177-09-05 06:00:00 -1                100.1 NaN          NaN       NaN\n",
      "       2177-09-05 08:00:00 -1                 99.5 NaN          NaN       NaN\n",
      "       2177-09-05 10:00:00 -1                101.4 NaN          NaN       NaN\n",
      "       2177-09-05 12:00:00 -1                101.1 NaN          NaN       NaN\n",
      "       2177-09-05 14:00:00 -1                 99.8 NaN          NaN       NaN\n",
      "       2177-09-05 16:00:00 -1                 97.5 NaN          NaN       NaN\n",
      "       2177-09-05 20:00:00 -1                102.0 NaN          NaN       NaN\n",
      "       2177-09-05 23:00:00 -1                100.8 NaN          NaN       NaN\n",
      "       2177-09-06 02:00:00 -1                101.0 NaN          NaN       NaN\n",
      "       2177-09-06 04:00:00 -1                101.2 NaN          NaN       NaN\n",
      "       2177-09-06 08:00:00 -1                100.1 NaN          NaN       NaN\n",
      "       2177-09-06 12:00:00 -1                100.1 NaN          NaN       NaN\n",
      "       2177-09-06 16:00:00 -1                102.7 NaN          NaN       NaN\n",
      "       2177-09-06 18:00:00 -1                102.0 NaN          NaN       NaN\n",
      "       2177-09-06 20:00:00 -1                101.3 NaN          NaN       NaN\n",
      "       2177-09-06 23:00:00 -1                101.9 NaN          NaN       NaN\n",
      "       2177-09-07 00:00:00 -1                 99.9 NaN          NaN       NaN\n",
      "       2177-09-07 04:00:00 -1                100.5 NaN          NaN       NaN\n",
      "       2177-09-07 09:00:00  0                 99.5 NaN          NaN       NaN\n",
      "       2177-09-07 13:00:00  0                 99.1 NaN          NaN       NaN\n",
      "       2177-09-07 17:00:00  0                 99.2 NaN          NaN       NaN\n",
      "       2177-09-07 19:00:00 -1                 99.9 NaN          NaN       NaN\n",
      "       2177-09-08 00:00:00 -1                 99.8 NaN          NaN       NaN\n",
      "       2177-09-08 04:00:00 -1                 99.5 NaN          NaN       NaN\n",
      "\n",
      "[200 rows x 4 columns]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">degF</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762(degC)</th>\n",
       "      <th>676(degC)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>seg_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100006</th>\n",
       "      <th>2108-04-07 20:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100007</th>\n",
       "      <th>2145-04-04 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.699997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2145-04-04 04:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100009</th>\n",
       "      <th>2162-05-19 03:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100010</th>\n",
       "      <th>2109-12-10 22:47:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2109-12-11 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100011</th>\n",
       "      <th>2177-09-07 09:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-09-07 13:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2177-09-07 17:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "label                             temperature body                          \\\n",
       "status                                       known                           \n",
       "variable_type                                   qn                           \n",
       "units                                         degF                           \n",
       "description                                 223761        678 223762(degC)   \n",
       "id     datetime            seg_id                                            \n",
       "100001 2117-09-11 22:00:00 0                 100.0        NaN          NaN   \n",
       "       2117-09-12 01:00:00 0                  99.9        NaN          NaN   \n",
       "       2117-09-12 04:00:00 0                  97.7        NaN          NaN   \n",
       "100006 2108-04-07 20:00:00 0                   NaN  98.800003          NaN   \n",
       "100007 2145-04-04 00:00:00 0                   NaN  98.699997          NaN   \n",
       "       2145-04-04 04:00:00 0                   NaN  97.400002          NaN   \n",
       "100009 2162-05-19 03:00:00 0                  98.6        NaN          NaN   \n",
       "100010 2109-12-10 22:47:00 0                 100.2        NaN          NaN   \n",
       "       2109-12-11 00:00:00 0                  98.8        NaN          NaN   \n",
       "100011 2177-09-07 09:00:00 0                  99.5        NaN          NaN   \n",
       "       2177-09-07 13:00:00 0                  99.1        NaN          NaN   \n",
       "       2177-09-07 17:00:00 0                  99.2        NaN          NaN   \n",
       "\n",
       "label                                        \n",
       "status                                       \n",
       "variable_type                                \n",
       "units                                        \n",
       "description                       676(degC)  \n",
       "id     datetime            seg_id            \n",
       "100001 2117-09-11 22:00:00 0            NaN  \n",
       "       2117-09-12 01:00:00 0            NaN  \n",
       "       2117-09-12 04:00:00 0            NaN  \n",
       "100006 2108-04-07 20:00:00 0            NaN  \n",
       "100007 2145-04-04 00:00:00 0            NaN  \n",
       "       2145-04-04 04:00:00 0            NaN  \n",
       "100009 2162-05-19 03:00:00 0            NaN  \n",
       "100010 2109-12-10 22:47:00 0            NaN  \n",
       "       2109-12-11 00:00:00 0            NaN  \n",
       "100011 2177-09-07 09:00:00 0            NaN  \n",
       "       2177-09-07 13:00:00 0            NaN  \n",
       "       2177-09-07 17:00:00 0            NaN  "
      ]
     },
     "execution_count": 262,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_Nhr_before = n_hrs_before.transform(df_temp_combined.head(200))\n",
    "df_temp_Nhr_before[df_temp_Nhr_before.index.get_level_values('seg_id') > -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 266,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th colspan=\"4\" halign=\"left\">temperature body</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"4\" halign=\"left\">known</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"4\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"4\" halign=\"left\">degF</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>223761</th>\n",
       "      <th>678</th>\n",
       "      <th>223762(degC)</th>\n",
       "      <th>676(degC)</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>seg_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"24\" valign=\"top\">100001</th>\n",
       "      <th>2117-09-11 13:48:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 16:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 19:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-11 22:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 01:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 04:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>97.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 08:00:00</th>\n",
       "      <th>2</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 12:00:00</th>\n",
       "      <th>2</th>\n",
       "      <td>97.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-12 19:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>99.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 00:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>99.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 04:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>99.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 08:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 12:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>98.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 16:00:00</th>\n",
       "      <th>5</th>\n",
       "      <td>99.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-13 22:00:00</th>\n",
       "      <th>5</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 00:00:00</th>\n",
       "      <th>6</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 04:00:00</th>\n",
       "      <th>6</th>\n",
       "      <td>99.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 08:00:00</th>\n",
       "      <th>6</th>\n",
       "      <td>99.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 13:00:00</th>\n",
       "      <th>7</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-14 19:00:00</th>\n",
       "      <th>7</th>\n",
       "      <td>99.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 00:00:00</th>\n",
       "      <th>8</th>\n",
       "      <td>99.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 08:00:00</th>\n",
       "      <th>9</th>\n",
       "      <td>98.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 12:00:00</th>\n",
       "      <th>9</th>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2117-09-15 16:00:00</th>\n",
       "      <th>10</th>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"6\" valign=\"top\">100003</th>\n",
       "      <th>2150-04-17 20:31:00</th>\n",
       "      <th>0</th>\n",
       "      <td>95.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-17 22:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 00:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 04:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 08:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2150-04-18 11:18:00</th>\n",
       "      <th>1</th>\n",
       "      <td>96.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"16\" valign=\"top\">100114</th>\n",
       "      <th>2157-09-19 18:15:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-19 18:30:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-19 20:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-19 23:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 00:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.599998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 01:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 01:15:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 02:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 06:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 08:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.699997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 09:15:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 09:30:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 10:00:00</th>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 12:00:00</th>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 15:00:00</th>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2157-09-20 18:00:00</th>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"13\" valign=\"top\">100116</th>\n",
       "      <th>2173-07-27 05:13:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 08:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 09:30:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 11:00:00</th>\n",
       "      <th>0</th>\n",
       "      <td>98.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 14:25:00</th>\n",
       "      <th>0</th>\n",
       "      <td>100.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 16:30:00</th>\n",
       "      <th>1</th>\n",
       "      <td>100.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 18:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>99.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-27 20:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>100.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-28 00:00:00</th>\n",
       "      <th>1</th>\n",
       "      <td>99.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-28 04:00:00</th>\n",
       "      <th>2</th>\n",
       "      <td>98.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-28 08:00:00</th>\n",
       "      <th>2</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-28 12:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>98.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2173-07-28 16:00:00</th>\n",
       "      <th>3</th>\n",
       "      <td>99.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100117</th>\n",
       "      <th>2166-04-30 21:39:00</th>\n",
       "      <th>0</th>\n",
       "      <td>99.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2000 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "label                             temperature body                          \\\n",
       "status                                       known                           \n",
       "variable_type                                   qn                           \n",
       "units                                         degF                           \n",
       "description                                 223761        678 223762(degC)   \n",
       "id     datetime            seg_id                                            \n",
       "100001 2117-09-11 13:48:00 0                  98.0        NaN          NaN   \n",
       "       2117-09-11 16:00:00 0                  97.9        NaN          NaN   \n",
       "       2117-09-11 19:00:00 0                  99.5        NaN          NaN   \n",
       "       2117-09-11 22:00:00 1                 100.0        NaN          NaN   \n",
       "       2117-09-12 01:00:00 1                  99.9        NaN          NaN   \n",
       "       2117-09-12 04:00:00 1                  97.7        NaN          NaN   \n",
       "       2117-09-12 08:00:00 2                  97.8        NaN          NaN   \n",
       "       2117-09-12 12:00:00 2                  97.5        NaN          NaN   \n",
       "       2117-09-12 19:00:00 3                  99.8        NaN          NaN   \n",
       "       2117-09-13 00:00:00 3                  99.7        NaN          NaN   \n",
       "       2117-09-13 04:00:00 4                  99.3        NaN          NaN   \n",
       "       2117-09-13 08:00:00 4                  99.4        NaN          NaN   \n",
       "       2117-09-13 12:00:00 4                  98.8        NaN          NaN   \n",
       "       2117-09-13 16:00:00 5                  99.3        NaN          NaN   \n",
       "       2117-09-13 22:00:00 5                  99.9        NaN          NaN   \n",
       "       2117-09-14 00:00:00 6                  99.4        NaN          NaN   \n",
       "       2117-09-14 04:00:00 6                  99.6        NaN          NaN   \n",
       "       2117-09-14 08:00:00 6                  99.0        NaN          NaN   \n",
       "       2117-09-14 13:00:00 7                  98.0        NaN          NaN   \n",
       "       2117-09-14 19:00:00 7                  99.5        NaN          NaN   \n",
       "       2117-09-15 00:00:00 8                  99.6        NaN          NaN   \n",
       "       2117-09-15 08:00:00 9                  98.3        NaN          NaN   \n",
       "       2117-09-15 12:00:00 9                  97.8        NaN          NaN   \n",
       "       2117-09-15 16:00:00 10                 98.6        NaN          NaN   \n",
       "100003 2150-04-17 20:31:00 0                  95.9        NaN          NaN   \n",
       "       2150-04-17 22:00:00 0                  98.2        NaN          NaN   \n",
       "       2150-04-18 00:00:00 0                  97.4        NaN          NaN   \n",
       "       2150-04-18 04:00:00 1                  98.0        NaN          NaN   \n",
       "       2150-04-18 08:00:00 1                  97.4        NaN          NaN   \n",
       "       2150-04-18 11:18:00 1                  96.3        NaN          NaN   \n",
       "...                                            ...        ...          ...   \n",
       "100114 2157-09-19 18:15:00 3                   NaN  98.400002          NaN   \n",
       "       2157-09-19 18:30:00 3                   NaN  98.300003          NaN   \n",
       "       2157-09-19 20:00:00 3                   NaN  98.400002          NaN   \n",
       "       2157-09-19 23:00:00 3                   NaN  98.400002          NaN   \n",
       "       2157-09-20 00:00:00 3                   NaN  98.599998          NaN   \n",
       "       2157-09-20 01:00:00 3                   NaN  98.000000          NaN   \n",
       "       2157-09-20 01:15:00 4                   NaN  98.199997          NaN   \n",
       "       2157-09-20 02:00:00 4                   NaN  97.099998          NaN   \n",
       "       2157-09-20 06:00:00 4                   NaN  98.000000          NaN   \n",
       "       2157-09-20 08:00:00 4                   NaN  96.699997          NaN   \n",
       "       2157-09-20 09:15:00 4                   NaN  96.900002          NaN   \n",
       "       2157-09-20 09:30:00 4                   NaN  97.000000          NaN   \n",
       "       2157-09-20 10:00:00 4                   NaN  96.900002          NaN   \n",
       "       2157-09-20 12:00:00 5                   NaN  97.199997          NaN   \n",
       "       2157-09-20 15:00:00 5                   NaN  98.400002          NaN   \n",
       "       2157-09-20 18:00:00 5                   NaN  97.800003          NaN   \n",
       "100116 2173-07-27 05:13:00 0                  98.9        NaN          NaN   \n",
       "       2173-07-27 08:00:00 0                 100.0        NaN          NaN   \n",
       "       2173-07-27 09:30:00 0                  99.7        NaN          NaN   \n",
       "       2173-07-27 11:00:00 0                  98.7        NaN          NaN   \n",
       "       2173-07-27 14:25:00 0                 100.4        NaN          NaN   \n",
       "       2173-07-27 16:30:00 1                 100.3        NaN          NaN   \n",
       "       2173-07-27 18:00:00 1                  99.9        NaN          NaN   \n",
       "       2173-07-27 20:00:00 1                 100.3        NaN          NaN   \n",
       "       2173-07-28 00:00:00 1                  99.7        NaN          NaN   \n",
       "       2173-07-28 04:00:00 2                  98.5        NaN          NaN   \n",
       "       2173-07-28 08:00:00 2                  99.4        NaN          NaN   \n",
       "       2173-07-28 12:00:00 3                  98.5        NaN          NaN   \n",
       "       2173-07-28 16:00:00 3                  99.4        NaN          NaN   \n",
       "100117 2166-04-30 21:39:00 0                  99.7        NaN          NaN   \n",
       "\n",
       "label                                        \n",
       "status                                       \n",
       "variable_type                                \n",
       "units                                        \n",
       "description                       676(degC)  \n",
       "id     datetime            seg_id            \n",
       "100001 2117-09-11 13:48:00 0            NaN  \n",
       "       2117-09-11 16:00:00 0            NaN  \n",
       "       2117-09-11 19:00:00 0            NaN  \n",
       "       2117-09-11 22:00:00 1            NaN  \n",
       "       2117-09-12 01:00:00 1            NaN  \n",
       "       2117-09-12 04:00:00 1            NaN  \n",
       "       2117-09-12 08:00:00 2            NaN  \n",
       "       2117-09-12 12:00:00 2            NaN  \n",
       "       2117-09-12 19:00:00 3            NaN  \n",
       "       2117-09-13 00:00:00 3            NaN  \n",
       "       2117-09-13 04:00:00 4            NaN  \n",
       "       2117-09-13 08:00:00 4            NaN  \n",
       "       2117-09-13 12:00:00 4            NaN  \n",
       "       2117-09-13 16:00:00 5            NaN  \n",
       "       2117-09-13 22:00:00 5            NaN  \n",
       "       2117-09-14 00:00:00 6            NaN  \n",
       "       2117-09-14 04:00:00 6            NaN  \n",
       "       2117-09-14 08:00:00 6            NaN  \n",
       "       2117-09-14 13:00:00 7            NaN  \n",
       "       2117-09-14 19:00:00 7            NaN  \n",
       "       2117-09-15 00:00:00 8            NaN  \n",
       "       2117-09-15 08:00:00 9            NaN  \n",
       "       2117-09-15 12:00:00 9            NaN  \n",
       "       2117-09-15 16:00:00 10           NaN  \n",
       "100003 2150-04-17 20:31:00 0            NaN  \n",
       "       2150-04-17 22:00:00 0            NaN  \n",
       "       2150-04-18 00:00:00 0            NaN  \n",
       "       2150-04-18 04:00:00 1            NaN  \n",
       "       2150-04-18 08:00:00 1            NaN  \n",
       "       2150-04-18 11:18:00 1            NaN  \n",
       "...                                     ...  \n",
       "100114 2157-09-19 18:15:00 3            NaN  \n",
       "       2157-09-19 18:30:00 3            NaN  \n",
       "       2157-09-19 20:00:00 3            NaN  \n",
       "       2157-09-19 23:00:00 3            NaN  \n",
       "       2157-09-20 00:00:00 3            NaN  \n",
       "       2157-09-20 01:00:00 3            NaN  \n",
       "       2157-09-20 01:15:00 4            NaN  \n",
       "       2157-09-20 02:00:00 4            NaN  \n",
       "       2157-09-20 06:00:00 4            NaN  \n",
       "       2157-09-20 08:00:00 4            NaN  \n",
       "       2157-09-20 09:15:00 4            NaN  \n",
       "       2157-09-20 09:30:00 4            NaN  \n",
       "       2157-09-20 10:00:00 4            NaN  \n",
       "       2157-09-20 12:00:00 5            NaN  \n",
       "       2157-09-20 15:00:00 5            NaN  \n",
       "       2157-09-20 18:00:00 5            NaN  \n",
       "100116 2173-07-27 05:13:00 0            NaN  \n",
       "       2173-07-27 08:00:00 0            NaN  \n",
       "       2173-07-27 09:30:00 0            NaN  \n",
       "       2173-07-27 11:00:00 0            NaN  \n",
       "       2173-07-27 14:25:00 0            NaN  \n",
       "       2173-07-27 16:30:00 1            NaN  \n",
       "       2173-07-27 18:00:00 1            NaN  \n",
       "       2173-07-27 20:00:00 1            NaN  \n",
       "       2173-07-28 00:00:00 1            NaN  \n",
       "       2173-07-28 04:00:00 2            NaN  \n",
       "       2173-07-28 08:00:00 2            NaN  \n",
       "       2173-07-28 12:00:00 3            NaN  \n",
       "       2173-07-28 16:00:00 3            NaN  \n",
       "100117 2166-04-30 21:39:00 0            NaN  \n",
       "\n",
       "[2000 rows x 4 columns]"
      ]
     },
     "execution_count": 266,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_temp_periodic = periodic.transform(df_temp_combined.head(2000))\n",
    "df_temp_periodic[df_temp_periodic.index.get_level_values('seg_id') > -1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 252,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "import mimic\n",
    "import transformers\n",
    "from sklearn.pipeline import Pipeline\n",
    "import icu_data_defs\n",
    "import units\n",
    "import segmenting\n",
    "from sklearn_pandas import DataFrameMapper\n",
    "import constants\n",
    "import features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 251,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "hdf5_fname = 'data/mimic_data'\n",
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df_cleaned"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "end_dt = df_temp_combined.iloc[:,0].groupby(level='id').apply(lambda x:x.sample(1))\n",
    "end_dt = end_dt.reset_index(level=0,drop=True).reset_index(level=1,drop=False).iloc[:,0]\n",
    "all_before = segmenting.all_before(end_dt,df_context)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 247,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "import transformers\n",
    "from sklearn.pipeline import Pipeline\n",
    "import units\n",
    "from sklearn_pandas import DataFrameMapper\n",
    "import constants\n",
    "import features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "ImportError",
     "evalue": "cannot import name CUSTOM_FILTER",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mImportError\u001b[0m                               Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-248-bae7a29e5c29>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mreload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mreload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mutils\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[0mreload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0munits\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[0mreload\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mconstants\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\icu_ml_project\\v5\\features.pyc\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbase\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mTransformerMixin\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mBaseEstimator\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpipeline\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mFeatureUnion\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mconstants\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mcolumn_names\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mSEG_ID\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mNO_SEGMENT\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mALL\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mCUSTOM_FILTER\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn_pandas\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mDataFrameMapper\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mImportError\u001b[0m: cannot import name CUSTOM_FILTER"
     ]
    }
   ],
   "source": [
    "reload(features)\n",
    "reload(utils)\n",
    "reload(units)\n",
    "reload(constants)\n",
    "\n",
    "df = df_temp_all_before\n",
    "ureg = units.MedicalUreg()\n",
    "\n",
    "\n",
    "\n",
    "def summable_filter(df):\n",
    "    filter_func= lambda x: (ureg.is_volume(str(x[-2])) or ureg.is_mass(str(x[-2]))) and (x[0] != 'weight body')\n",
    "    return df.loc[:,df.columns.map(filter_func)]\n",
    "\n",
    "feature_tuples = [\n",
    "    ('MEAN',features.segment_mean(),ALL),\n",
    "    ('STD',features.segment_std(),ALL),\n",
    "    ('COUNT',features.segment_count(),ALL),\n",
    "    ('LAST',features.segment_last(),ALL),\n",
    "    ('SUM',features.segment_sum(),{constants.CUSTOM_FILTER:summable_filter})\n",
    "]\n",
    "\n",
    "mapped_ft = features.make_mapper(feature_tuples,df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "feature_df = mapped_ft.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>temperature body_degF_223761_MEAN</th>\n",
       "      <th>temperature body_degF_678_MEAN</th>\n",
       "      <th>temperature body_degC_223762_MEAN</th>\n",
       "      <th>temperature body_degC_676_MEAN</th>\n",
       "      <th>temperature body_degF_223761_STD</th>\n",
       "      <th>temperature body_degF_678_STD</th>\n",
       "      <th>temperature body_degC_223762_STD</th>\n",
       "      <th>temperature body_degC_676_STD</th>\n",
       "      <th>temperature body_degF_223761_COUNT</th>\n",
       "      <th>temperature body_degF_678_COUNT</th>\n",
       "      <th>temperature body_degC_223762_COUNT</th>\n",
       "      <th>temperature body_degC_676_COUNT</th>\n",
       "      <th>temperature body_degF_223761_LAST</th>\n",
       "      <th>temperature body_degF_678_LAST</th>\n",
       "      <th>temperature body_degC_223762_LAST</th>\n",
       "      <th>temperature body_degC_676_LAST</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>98.537500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.064945</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>97.166667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.167619</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>95.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.533334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.607729</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.591667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.466263</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.699997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>98.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.458258</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>98.550000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.036822</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>98.380000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.511765</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.641872</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.790767</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.649998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.777816</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>101.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>97.975000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.875032</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>96.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.777817</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>96.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.536364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.492489</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.378571</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.260705</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.6</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>99.466667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.495650</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NaN</td>\n",
       "      <td>99.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.766037</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.374646</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>106.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.599998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.141421</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>100.772222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.719512</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.075287</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.281265</td>\n",
       "      <td>NaN</td>\n",
       "      <td>18.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>41.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.5</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.193750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.401560</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.799999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>99.345455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.864975</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>33.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>98.312500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.022986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>16.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>97.760000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.585662</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.445455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.849254</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.990909</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.190574</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>28.4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.799999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.799999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.320000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.277491</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.700000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.754119</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>24.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.787487</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.847368</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.300681</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.200001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>96.525000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>96.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44216</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.302703</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.021739</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.047082</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.997311</td>\n",
       "      <td>0.0</td>\n",
       "      <td>37.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>46.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44217</th>\n",
       "      <td>98.475000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.081280</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44218</th>\n",
       "      <td>96.275000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.420034</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>8.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>96.3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44219</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44220</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.740001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.021275</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44221</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.672309</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44222</th>\n",
       "      <td>99.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.512835</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>99.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44223</th>\n",
       "      <td>98.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.070711</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44224</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.130000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.531351</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44225</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.199997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44226</th>\n",
       "      <td>99.753333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.765195</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44227</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.071875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.449181</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.944674</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.920982</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>61.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44228</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.526316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.175605</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44229</th>\n",
       "      <td>97.580000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.740570</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>96.9</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44230</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.443518</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.332443</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>108.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.299999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44231</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.071429</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.239964</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.599998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44232</th>\n",
       "      <td>98.484375</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.070456</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>32.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44233</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.278462</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.300803</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>130.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44234</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.173333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.688131</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44235</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.074999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.056330</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.599998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44236</th>\n",
       "      <td>97.977778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.032930</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>9.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>98.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44237</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.485713</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.977118</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44238</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.264285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.759157</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.599998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44239</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.565688</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44240</th>\n",
       "      <td>95.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.8</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44241</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.670000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37.420725</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.668415</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.378023</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>193.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.699997</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44242</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.980953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.933605</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44243</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.919241</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>35.099998</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44244</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36.176191</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.159700</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>21.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>34.700001</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44245</th>\n",
       "      <td>98.655000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.147297</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>20.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>97.8</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>44246 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       temperature body_degF_223761_MEAN  temperature body_degF_678_MEAN  \\\n",
       "0                              98.537500                             NaN   \n",
       "1                              97.166667                             NaN   \n",
       "2                                    NaN                       97.533334   \n",
       "3                                    NaN                       98.591667   \n",
       "4                              98.100000                             NaN   \n",
       "5                              98.550000                             NaN   \n",
       "6                              98.380000                             NaN   \n",
       "7                                    NaN                      100.649998   \n",
       "8                              97.975000                             NaN   \n",
       "9                              96.050000                             NaN   \n",
       "10                                   NaN                       98.536364   \n",
       "11                                   NaN                             NaN   \n",
       "12                             99.466667                             NaN   \n",
       "13                                   NaN                       99.099998   \n",
       "14                                   NaN                       95.300003   \n",
       "15                             98.500000                             NaN   \n",
       "16                            100.772222                             NaN   \n",
       "17                                   NaN                             NaN   \n",
       "18                             99.345455                             NaN   \n",
       "19                                   NaN                       98.800003   \n",
       "20                             98.312500                             NaN   \n",
       "21                             97.760000                             NaN   \n",
       "22                                   NaN                       98.445455   \n",
       "23                                   NaN                             NaN   \n",
       "24                                   NaN                             NaN   \n",
       "25                                   NaN                       97.320000   \n",
       "26                                   NaN                       96.700000   \n",
       "27                                   NaN                       96.920000   \n",
       "28                                   NaN                             NaN   \n",
       "29                             96.525000                             NaN   \n",
       "...                                  ...                             ...   \n",
       "44216                                NaN                       98.302703   \n",
       "44217                          98.475000                             NaN   \n",
       "44218                          96.275000                             NaN   \n",
       "44219                                NaN                       97.800003   \n",
       "44220                                NaN                       95.740001   \n",
       "44221                                NaN                       97.920000   \n",
       "44222                          99.240000                             NaN   \n",
       "44223                          98.450000                             NaN   \n",
       "44224                                NaN                       98.130000   \n",
       "44225                                NaN                       97.900002   \n",
       "44226                          99.753333                             NaN   \n",
       "44227                                NaN                       98.071875   \n",
       "44228                                NaN                      100.526316   \n",
       "44229                          97.580000                             NaN   \n",
       "44230                                NaN                             NaN   \n",
       "44231                                NaN                             NaN   \n",
       "44232                          98.484375                             NaN   \n",
       "44233                                NaN                      100.278462   \n",
       "44234                                NaN                             NaN   \n",
       "44235                                NaN                       97.074999   \n",
       "44236                          97.977778                             NaN   \n",
       "44237                                NaN                       97.485713   \n",
       "44238                                NaN                       97.264285   \n",
       "44239                                NaN                       95.400002   \n",
       "44240                          95.800000                             NaN   \n",
       "44241                                NaN                       97.670000   \n",
       "44242                                NaN                       98.980953   \n",
       "44243                                NaN                             NaN   \n",
       "44244                                NaN                             NaN   \n",
       "44245                          98.655000                             NaN   \n",
       "\n",
       "       temperature body_degC_223762_MEAN  temperature body_degC_676_MEAN  \\\n",
       "0                                    NaN                             NaN   \n",
       "1                                    NaN                             NaN   \n",
       "2                                    NaN                             NaN   \n",
       "3                                    NaN                             NaN   \n",
       "4                                    NaN                             NaN   \n",
       "5                                    NaN                             NaN   \n",
       "6                              36.511765                             NaN   \n",
       "7                                    NaN                             NaN   \n",
       "8                                    NaN                             NaN   \n",
       "9                                    NaN                             NaN   \n",
       "10                                   NaN                             NaN   \n",
       "11                             36.378571                             NaN   \n",
       "12                                   NaN                             NaN   \n",
       "13                                   NaN                       36.766037   \n",
       "14                                   NaN                             NaN   \n",
       "15                                   NaN                             NaN   \n",
       "16                             37.719512                             NaN   \n",
       "17                                   NaN                       37.193750   \n",
       "18                                   NaN                             NaN   \n",
       "19                                   NaN                             NaN   \n",
       "20                                   NaN                             NaN   \n",
       "21                                   NaN                             NaN   \n",
       "22                                   NaN                             NaN   \n",
       "23                             34.990909                             NaN   \n",
       "24                                   NaN                       36.799999   \n",
       "25                                   NaN                             NaN   \n",
       "26                                   NaN                             NaN   \n",
       "27                                   NaN                             NaN   \n",
       "28                                   NaN                       36.847368   \n",
       "29                                   NaN                             NaN   \n",
       "...                                  ...                             ...   \n",
       "44216                                NaN                       37.021739   \n",
       "44217                                NaN                             NaN   \n",
       "44218                                NaN                             NaN   \n",
       "44219                                NaN                             NaN   \n",
       "44220                                NaN                             NaN   \n",
       "44221                                NaN                             NaN   \n",
       "44222                                NaN                             NaN   \n",
       "44223                                NaN                             NaN   \n",
       "44224                                NaN                             NaN   \n",
       "44225                                NaN                             NaN   \n",
       "44226                                NaN                             NaN   \n",
       "44227                                NaN                       37.449181   \n",
       "44228                                NaN                             NaN   \n",
       "44229                                NaN                             NaN   \n",
       "44230                                NaN                       37.443518   \n",
       "44231                                NaN                       36.071429   \n",
       "44232                                NaN                             NaN   \n",
       "44233                                NaN                             NaN   \n",
       "44234                          36.173333                             NaN   \n",
       "44235                                NaN                             NaN   \n",
       "44236                                NaN                             NaN   \n",
       "44237                                NaN                             NaN   \n",
       "44238                                NaN                             NaN   \n",
       "44239                                NaN                             NaN   \n",
       "44240                          34.800000                             NaN   \n",
       "44241                                NaN                       37.420725   \n",
       "44242                                NaN                             NaN   \n",
       "44243                                NaN                       35.750000   \n",
       "44244                                NaN                       36.176191   \n",
       "44245                                NaN                             NaN   \n",
       "\n",
       "       temperature body_degF_223761_STD  temperature body_degF_678_STD  \\\n",
       "0                              1.064945                            NaN   \n",
       "1                              1.167619                            NaN   \n",
       "2                                   NaN                       0.607729   \n",
       "3                                   NaN                       1.466263   \n",
       "4                              0.458258                            NaN   \n",
       "5                              1.036822                            NaN   \n",
       "6                              0.641872                            NaN   \n",
       "7                                   NaN                       0.777816   \n",
       "8                              0.875032                            NaN   \n",
       "9                              0.777817                            NaN   \n",
       "10                                  NaN                       0.492489   \n",
       "11                                  NaN                            NaN   \n",
       "12                             1.495650                            NaN   \n",
       "13                                  NaN                            NaN   \n",
       "14                                  NaN                            NaN   \n",
       "15                             0.141421                            NaN   \n",
       "16                             1.075287                            NaN   \n",
       "17                                  NaN                            NaN   \n",
       "18                             0.864975                            NaN   \n",
       "19                                  NaN                            NaN   \n",
       "20                             1.022986                            NaN   \n",
       "21                             0.585662                            NaN   \n",
       "22                                  NaN                       1.849254   \n",
       "23                                  NaN                            NaN   \n",
       "24                                  NaN                            NaN   \n",
       "25                                  NaN                       0.277491   \n",
       "26                                  NaN                       0.754119   \n",
       "27                                  NaN                       1.787487   \n",
       "28                                  NaN                            NaN   \n",
       "29                             0.450000                            NaN   \n",
       "...                                 ...                            ...   \n",
       "44216                               NaN                       1.047082   \n",
       "44217                          1.081280                            NaN   \n",
       "44218                          0.420034                            NaN   \n",
       "44219                               NaN                            NaN   \n",
       "44220                               NaN                       1.021275   \n",
       "44221                               NaN                       0.672309   \n",
       "44222                          0.512835                            NaN   \n",
       "44223                          0.070711                            NaN   \n",
       "44224                               NaN                       0.531351   \n",
       "44225                               NaN                       0.199997   \n",
       "44226                          0.765195                            NaN   \n",
       "44227                               NaN                       0.944674   \n",
       "44228                               NaN                       1.175605   \n",
       "44229                          0.740570                            NaN   \n",
       "44230                               NaN                            NaN   \n",
       "44231                               NaN                            NaN   \n",
       "44232                          1.070456                            NaN   \n",
       "44233                               NaN                       1.300803   \n",
       "44234                               NaN                            NaN   \n",
       "44235                               NaN                       1.056330   \n",
       "44236                          1.032930                            NaN   \n",
       "44237                               NaN                       0.977118   \n",
       "44238                               NaN                       0.759157   \n",
       "44239                               NaN                       0.565688   \n",
       "44240                               NaN                            NaN   \n",
       "44241                               NaN                       0.668415   \n",
       "44242                               NaN                       0.933605   \n",
       "44243                               NaN                            NaN   \n",
       "44244                               NaN                            NaN   \n",
       "44245                          1.147297                            NaN   \n",
       "\n",
       "       temperature body_degC_223762_STD  temperature body_degC_676_STD  \\\n",
       "0                                   NaN                            NaN   \n",
       "1                                   NaN                            NaN   \n",
       "2                                   NaN                            NaN   \n",
       "3                                   NaN                            NaN   \n",
       "4                                   NaN                            NaN   \n",
       "5                                   NaN                            NaN   \n",
       "6                              0.790767                            NaN   \n",
       "7                                   NaN                            NaN   \n",
       "8                                   NaN                            NaN   \n",
       "9                                   NaN                            NaN   \n",
       "10                                  NaN                            NaN   \n",
       "11                             0.260705                            NaN   \n",
       "12                                  NaN                            NaN   \n",
       "13                                  NaN                       0.374646   \n",
       "14                                  NaN                            NaN   \n",
       "15                                  NaN                            NaN   \n",
       "16                             0.281265                            NaN   \n",
       "17                                  NaN                       0.401560   \n",
       "18                                  NaN                            NaN   \n",
       "19                                  NaN                            NaN   \n",
       "20                                  NaN                            NaN   \n",
       "21                                  NaN                            NaN   \n",
       "22                                  NaN                            NaN   \n",
       "23                             4.190574                            NaN   \n",
       "24                                  NaN                            NaN   \n",
       "25                                  NaN                            NaN   \n",
       "26                                  NaN                            NaN   \n",
       "27                                  NaN                            NaN   \n",
       "28                                  NaN                       0.300681   \n",
       "29                                  NaN                            NaN   \n",
       "...                                 ...                            ...   \n",
       "44216                               NaN                       0.997311   \n",
       "44217                               NaN                            NaN   \n",
       "44218                               NaN                            NaN   \n",
       "44219                               NaN                            NaN   \n",
       "44220                               NaN                            NaN   \n",
       "44221                               NaN                            NaN   \n",
       "44222                               NaN                            NaN   \n",
       "44223                               NaN                            NaN   \n",
       "44224                               NaN                            NaN   \n",
       "44225                               NaN                            NaN   \n",
       "44226                               NaN                            NaN   \n",
       "44227                               NaN                       0.920982   \n",
       "44228                               NaN                            NaN   \n",
       "44229                               NaN                            NaN   \n",
       "44230                               NaN                       0.332443   \n",
       "44231                               NaN                       0.239964   \n",
       "44232                               NaN                            NaN   \n",
       "44233                               NaN                            NaN   \n",
       "44234                          0.688131                            NaN   \n",
       "44235                               NaN                            NaN   \n",
       "44236                               NaN                            NaN   \n",
       "44237                               NaN                            NaN   \n",
       "44238                               NaN                            NaN   \n",
       "44239                               NaN                            NaN   \n",
       "44240                               NaN                            NaN   \n",
       "44241                               NaN                       0.378023   \n",
       "44242                               NaN                            NaN   \n",
       "44243                               NaN                       0.919241   \n",
       "44244                               NaN                       1.159700   \n",
       "44245                               NaN                            NaN   \n",
       "\n",
       "       temperature body_degF_223761_COUNT  temperature body_degF_678_COUNT  \\\n",
       "0                                     8.0                              0.0   \n",
       "1                                     3.0                              0.0   \n",
       "2                                     0.0                             21.0   \n",
       "3                                     0.0                             12.0   \n",
       "4                                     3.0                              0.0   \n",
       "5                                     6.0                              0.0   \n",
       "6                                     5.0                              0.0   \n",
       "7                                     0.0                              2.0   \n",
       "8                                    12.0                              0.0   \n",
       "9                                     2.0                              0.0   \n",
       "10                                    0.0                             11.0   \n",
       "11                                    0.0                              0.0   \n",
       "12                                   12.0                              0.0   \n",
       "13                                    0.0                              1.0   \n",
       "14                                    0.0                              1.0   \n",
       "15                                    2.0                              0.0   \n",
       "16                                   18.0                              0.0   \n",
       "17                                    0.0                              0.0   \n",
       "18                                   33.0                              0.0   \n",
       "19                                    0.0                              1.0   \n",
       "20                                   16.0                              0.0   \n",
       "21                                    5.0                              0.0   \n",
       "22                                    0.0                             22.0   \n",
       "23                                    0.0                              0.0   \n",
       "24                                    0.0                              0.0   \n",
       "25                                    0.0                              5.0   \n",
       "26                                    0.0                             24.0   \n",
       "27                                    0.0                             10.0   \n",
       "28                                    0.0                              0.0   \n",
       "29                                    4.0                              0.0   \n",
       "...                                   ...                              ...   \n",
       "44216                                 0.0                             37.0   \n",
       "44217                                 4.0                              0.0   \n",
       "44218                                 8.0                              0.0   \n",
       "44219                                 0.0                              1.0   \n",
       "44220                                 0.0                              5.0   \n",
       "44221                                 0.0                              5.0   \n",
       "44222                                 5.0                              0.0   \n",
       "44223                                 2.0                              0.0   \n",
       "44224                                 0.0                             10.0   \n",
       "44225                                 0.0                              4.0   \n",
       "44226                                15.0                              0.0   \n",
       "44227                                 0.0                             32.0   \n",
       "44228                                 0.0                             19.0   \n",
       "44229                                10.0                              0.0   \n",
       "44230                                 0.0                              0.0   \n",
       "44231                                 0.0                              0.0   \n",
       "44232                                32.0                              0.0   \n",
       "44233                                 0.0                            130.0   \n",
       "44234                                 0.0                              0.0   \n",
       "44235                                 0.0                              4.0   \n",
       "44236                                 9.0                              0.0   \n",
       "44237                                 0.0                              7.0   \n",
       "44238                                 0.0                             14.0   \n",
       "44239                                 0.0                              2.0   \n",
       "44240                                 1.0                              0.0   \n",
       "44241                                 0.0                             10.0   \n",
       "44242                                 0.0                             21.0   \n",
       "44243                                 0.0                              0.0   \n",
       "44244                                 0.0                              0.0   \n",
       "44245                                20.0                              0.0   \n",
       "\n",
       "       temperature body_degC_223762_COUNT  temperature body_degC_676_COUNT  \\\n",
       "0                                     0.0                              0.0   \n",
       "1                                     0.0                              0.0   \n",
       "2                                     0.0                              0.0   \n",
       "3                                     0.0                              0.0   \n",
       "4                                     0.0                              0.0   \n",
       "5                                     0.0                              0.0   \n",
       "6                                    34.0                              0.0   \n",
       "7                                     0.0                              0.0   \n",
       "8                                     0.0                              0.0   \n",
       "9                                     0.0                              0.0   \n",
       "10                                    0.0                              0.0   \n",
       "11                                   14.0                              0.0   \n",
       "12                                    0.0                              0.0   \n",
       "13                                    0.0                            106.0   \n",
       "14                                    0.0                              0.0   \n",
       "15                                    0.0                              0.0   \n",
       "16                                   41.0                              0.0   \n",
       "17                                    0.0                             32.0   \n",
       "18                                    0.0                              0.0   \n",
       "19                                    0.0                              0.0   \n",
       "20                                    0.0                              0.0   \n",
       "21                                    0.0                              0.0   \n",
       "22                                    0.0                              0.0   \n",
       "23                                   11.0                              0.0   \n",
       "24                                    0.0                              1.0   \n",
       "25                                    0.0                              0.0   \n",
       "26                                    0.0                              0.0   \n",
       "27                                    0.0                              0.0   \n",
       "28                                    0.0                             19.0   \n",
       "29                                    0.0                              0.0   \n",
       "...                                   ...                              ...   \n",
       "44216                                 0.0                             46.0   \n",
       "44217                                 0.0                              0.0   \n",
       "44218                                 0.0                              0.0   \n",
       "44219                                 0.0                              0.0   \n",
       "44220                                 0.0                              0.0   \n",
       "44221                                 0.0                              0.0   \n",
       "44222                                 0.0                              0.0   \n",
       "44223                                 0.0                              0.0   \n",
       "44224                                 0.0                              0.0   \n",
       "44225                                 0.0                              0.0   \n",
       "44226                                 0.0                              0.0   \n",
       "44227                                 0.0                             61.0   \n",
       "44228                                 0.0                              0.0   \n",
       "44229                                 0.0                              0.0   \n",
       "44230                                 0.0                            108.0   \n",
       "44231                                 0.0                             14.0   \n",
       "44232                                 0.0                              0.0   \n",
       "44233                                 0.0                              0.0   \n",
       "44234                                15.0                              0.0   \n",
       "44235                                 0.0                              0.0   \n",
       "44236                                 0.0                              0.0   \n",
       "44237                                 0.0                              0.0   \n",
       "44238                                 0.0                              0.0   \n",
       "44239                                 0.0                              0.0   \n",
       "44240                                 1.0                              0.0   \n",
       "44241                                 0.0                            193.0   \n",
       "44242                                 0.0                              0.0   \n",
       "44243                                 0.0                              2.0   \n",
       "44244                                 0.0                             21.0   \n",
       "44245                                 0.0                              0.0   \n",
       "\n",
       "       temperature body_degF_223761_LAST  temperature body_degF_678_LAST  \\\n",
       "0                                   98.0                             NaN   \n",
       "1                                   95.9                             NaN   \n",
       "2                                    NaN                       97.000000   \n",
       "3                                    NaN                       95.699997   \n",
       "4                                   98.6                             NaN   \n",
       "5                                  100.2                             NaN   \n",
       "6                                    NaN                             NaN   \n",
       "7                                    NaN                      101.199997   \n",
       "8                                   97.4                             NaN   \n",
       "9                                   96.6                             NaN   \n",
       "10                                   NaN                       98.500000   \n",
       "11                                   NaN                             NaN   \n",
       "12                                  98.8                             NaN   \n",
       "13                                   NaN                             NaN   \n",
       "14                                   NaN                       95.300003   \n",
       "15                                  98.6                             NaN   \n",
       "16                                   NaN                             NaN   \n",
       "17                                   NaN                             NaN   \n",
       "18                                  98.6                             NaN   \n",
       "19                                   NaN                       98.800003   \n",
       "20                                  97.6                             NaN   \n",
       "21                                  97.8                             NaN   \n",
       "22                                   NaN                       97.800003   \n",
       "23                                   NaN                             NaN   \n",
       "24                                   NaN                             NaN   \n",
       "25                                   NaN                       97.199997   \n",
       "26                                   NaN                       97.000000   \n",
       "27                                   NaN                       94.000000   \n",
       "28                                   NaN                             NaN   \n",
       "29                                  96.9                             NaN   \n",
       "...                                  ...                             ...   \n",
       "44216                                NaN                       96.000000   \n",
       "44217                               97.1                             NaN   \n",
       "44218                               96.3                             NaN   \n",
       "44219                                NaN                       97.800003   \n",
       "44220                                NaN                       94.099998   \n",
       "44221                                NaN                       97.099998   \n",
       "44222                               99.8                             NaN   \n",
       "44223                               98.4                             NaN   \n",
       "44224                                NaN                       97.800003   \n",
       "44225                                NaN                       97.800003   \n",
       "44226                               98.7                             NaN   \n",
       "44227                                NaN                             NaN   \n",
       "44228                                NaN                       98.000000   \n",
       "44229                               96.9                             NaN   \n",
       "44230                                NaN                             NaN   \n",
       "44231                                NaN                             NaN   \n",
       "44232                              100.0                             NaN   \n",
       "44233                                NaN                       97.900002   \n",
       "44234                                NaN                             NaN   \n",
       "44235                                NaN                       95.599998   \n",
       "44236                               98.4                             NaN   \n",
       "44237                                NaN                       95.900002   \n",
       "44238                                NaN                       96.599998   \n",
       "44239                                NaN                       95.000000   \n",
       "44240                                NaN                             NaN   \n",
       "44241                                NaN                       98.699997   \n",
       "44242                                NaN                       99.400002   \n",
       "44243                                NaN                             NaN   \n",
       "44244                                NaN                             NaN   \n",
       "44245                               97.8                             NaN   \n",
       "\n",
       "       temperature body_degC_223762_LAST  temperature body_degC_676_LAST  \n",
       "0                                    NaN                             NaN  \n",
       "1                                    NaN                             NaN  \n",
       "2                                    NaN                             NaN  \n",
       "3                                    NaN                             NaN  \n",
       "4                                    NaN                             NaN  \n",
       "5                                    NaN                             NaN  \n",
       "6                                   34.5                             NaN  \n",
       "7                                    NaN                             NaN  \n",
       "8                                    NaN                             NaN  \n",
       "9                                    NaN                             NaN  \n",
       "10                                   NaN                             NaN  \n",
       "11                                  36.6                             NaN  \n",
       "12                                   NaN                             NaN  \n",
       "13                                   NaN                       36.599998  \n",
       "14                                   NaN                             NaN  \n",
       "15                                   NaN                             NaN  \n",
       "16                                  37.5                             NaN  \n",
       "17                                   NaN                       36.799999  \n",
       "18                                   NaN                             NaN  \n",
       "19                                   NaN                             NaN  \n",
       "20                                   NaN                             NaN  \n",
       "21                                   NaN                             NaN  \n",
       "22                                   NaN                             NaN  \n",
       "23                                  28.4                             NaN  \n",
       "24                                   NaN                       36.799999  \n",
       "25                                   NaN                             NaN  \n",
       "26                                   NaN                             NaN  \n",
       "27                                   NaN                             NaN  \n",
       "28                                   NaN                       36.200001  \n",
       "29                                   NaN                             NaN  \n",
       "...                                  ...                             ...  \n",
       "44216                                NaN                             NaN  \n",
       "44217                                NaN                             NaN  \n",
       "44218                                NaN                             NaN  \n",
       "44219                                NaN                             NaN  \n",
       "44220                                NaN                             NaN  \n",
       "44221                                NaN                             NaN  \n",
       "44222                                NaN                             NaN  \n",
       "44223                                NaN                             NaN  \n",
       "44224                                NaN                             NaN  \n",
       "44225                                NaN                             NaN  \n",
       "44226                                NaN                             NaN  \n",
       "44227                                NaN                       34.500000  \n",
       "44228                                NaN                             NaN  \n",
       "44229                                NaN                             NaN  \n",
       "44230                                NaN                       37.299999  \n",
       "44231                                NaN                       35.599998  \n",
       "44232                                NaN                             NaN  \n",
       "44233                                NaN                             NaN  \n",
       "44234                               35.0                             NaN  \n",
       "44235                                NaN                             NaN  \n",
       "44236                                NaN                             NaN  \n",
       "44237                                NaN                             NaN  \n",
       "44238                                NaN                             NaN  \n",
       "44239                                NaN                             NaN  \n",
       "44240                               34.8                             NaN  \n",
       "44241                                NaN                             NaN  \n",
       "44242                                NaN                             NaN  \n",
       "44243                                NaN                       35.099998  \n",
       "44244                                NaN                       34.700001  \n",
       "44245                                NaN                             NaN  \n",
       "\n",
       "[44246 rows x 16 columns]"
      ]
     },
     "execution_count": 233,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "feature_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "test_mapper_pipeline = Pipeline([\n",
    "        ('ft_mapper',mapped_ft)\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>temperature body_known_qn_degF_223761_MEAN</th>\n",
       "      <th>temperature body_known_qn_degF_678_MEAN</th>\n",
       "      <th>temperature body_known_qn_degF_223762(degC)_MEAN</th>\n",
       "      <th>temperature body_known_qn_degF_676(degC)_MEAN</th>\n",
       "      <th>temperature body_known_qn_degF_223761_STD</th>\n",
       "      <th>temperature body_known_qn_degF_678_STD</th>\n",
       "      <th>temperature body_known_qn_degF_223762(degC)_STD</th>\n",
       "      <th>temperature body_known_qn_degF_676(degC)_STD</th>\n",
       "      <th>temperature body_known_qn_degF_223761_COUNT</th>\n",
       "      <th>temperature body_known_qn_degF_678_COUNT</th>\n",
       "      <th>temperature body_known_qn_degF_223762(degC)_COUNT</th>\n",
       "      <th>temperature body_known_qn_degF_676(degC)_COUNT</th>\n",
       "      <th>temperature body_known_qn_degF_223761_LAST</th>\n",
       "      <th>temperature body_known_qn_degF_678_LAST</th>\n",
       "      <th>temperature body_known_qn_degF_223762(degC)_LAST</th>\n",
       "      <th>temperature body_known_qn_degF_676(degC)_LAST</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>98.537500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.537500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.537500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.537500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>97.166667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.166667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.166667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.166667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.533334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.533334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.533334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.533334</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.591667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.591667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.591667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.591667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>98.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>98.550000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.550000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.550000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.550000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>98.380000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.721177</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.380000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.721177</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.380000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.721177</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.380000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.721177</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.649998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.649998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.649998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.649998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>97.975000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.975000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.975000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.975000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>96.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.536364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.536364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.536364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.536364</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.481429</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.481429</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.481429</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.481429</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>99.466667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.466667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.466667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.466667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>NaN</td>\n",
       "      <td>99.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.178868</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.178868</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.178868</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.099998</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.178868</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.300003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.500000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>100.772222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.895122</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.772222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.895122</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.772222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.895122</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.772222</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.895122</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.948750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.948750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.948750</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.948750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>99.345455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.345455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.345455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.345455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>98.312500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.312500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.312500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.312500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>97.760000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.760000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.760000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.760000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.445455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.445455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.445455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.445455</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.983637</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.983637</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.983637</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.983637</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.239999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.239999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.239999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.239999</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.320000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.320000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.320000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.320000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.700000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.700000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.700000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.700000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NaN</td>\n",
       "      <td>96.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.325263</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.325263</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.325263</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.325263</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>96.525000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.525000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.525000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.525000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44216</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.302703</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.639131</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.302703</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.639131</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.302703</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.639131</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.302703</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.639131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44217</th>\n",
       "      <td>98.475000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.475000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.475000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.475000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44218</th>\n",
       "      <td>96.275000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.275000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.275000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.275000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44219</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.800003</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44220</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.740001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.740001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.740001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.740001</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44221</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.920000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44222</th>\n",
       "      <td>99.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.240000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44223</th>\n",
       "      <td>98.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.450000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44224</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.130000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.130000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.130000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.130000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44225</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.900002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44226</th>\n",
       "      <td>99.753333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.753333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.753333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.753333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44227</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.071875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.408525</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.071875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.408525</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.071875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.408525</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.071875</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.408525</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44228</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.526316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.526316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.526316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.526316</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44229</th>\n",
       "      <td>97.580000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.580000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.580000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.580000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44230</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.398333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.398333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.398333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.398333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44231</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.928572</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.928572</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.928572</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.928572</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44232</th>\n",
       "      <td>98.484375</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.484375</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.484375</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.484375</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44233</th>\n",
       "      <td>NaN</td>\n",
       "      <td>100.278462</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.278462</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.278462</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>100.278462</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44234</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.112000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.112000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.112000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.112000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44235</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.074999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.074999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.074999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.074999</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44236</th>\n",
       "      <td>97.977778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.977778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.977778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.977778</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44237</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.485713</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.485713</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.485713</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.485713</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44238</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.264285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.264285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.264285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.264285</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44239</th>\n",
       "      <td>NaN</td>\n",
       "      <td>95.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.400002</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44240</th>\n",
       "      <td>95.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.640000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.640000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.640000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>95.800000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>94.640000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44241</th>\n",
       "      <td>NaN</td>\n",
       "      <td>97.670000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.357306</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.670000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.357306</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.670000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.357306</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.670000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>99.357306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44242</th>\n",
       "      <td>NaN</td>\n",
       "      <td>98.980953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.980953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.980953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.980953</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44243</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.350000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.350000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.350000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>96.350000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44244</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.117143</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.117143</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.117143</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>97.117143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44245</th>\n",
       "      <td>98.655000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.655000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.655000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>98.655000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>44246 rows × 16 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       temperature body_known_qn_degF_223761_MEAN  \\\n",
       "0                                       98.537500   \n",
       "1                                       97.166667   \n",
       "2                                             NaN   \n",
       "3                                             NaN   \n",
       "4                                       98.100000   \n",
       "5                                       98.550000   \n",
       "6                                       98.380000   \n",
       "7                                             NaN   \n",
       "8                                       97.975000   \n",
       "9                                       96.050000   \n",
       "10                                            NaN   \n",
       "11                                            NaN   \n",
       "12                                      99.466667   \n",
       "13                                            NaN   \n",
       "14                                            NaN   \n",
       "15                                      98.500000   \n",
       "16                                     100.772222   \n",
       "17                                            NaN   \n",
       "18                                      99.345455   \n",
       "19                                            NaN   \n",
       "20                                      98.312500   \n",
       "21                                      97.760000   \n",
       "22                                            NaN   \n",
       "23                                            NaN   \n",
       "24                                            NaN   \n",
       "25                                            NaN   \n",
       "26                                            NaN   \n",
       "27                                            NaN   \n",
       "28                                            NaN   \n",
       "29                                      96.525000   \n",
       "...                                           ...   \n",
       "44216                                         NaN   \n",
       "44217                                   98.475000   \n",
       "44218                                   96.275000   \n",
       "44219                                         NaN   \n",
       "44220                                         NaN   \n",
       "44221                                         NaN   \n",
       "44222                                   99.240000   \n",
       "44223                                   98.450000   \n",
       "44224                                         NaN   \n",
       "44225                                         NaN   \n",
       "44226                                   99.753333   \n",
       "44227                                         NaN   \n",
       "44228                                         NaN   \n",
       "44229                                   97.580000   \n",
       "44230                                         NaN   \n",
       "44231                                         NaN   \n",
       "44232                                   98.484375   \n",
       "44233                                         NaN   \n",
       "44234                                         NaN   \n",
       "44235                                         NaN   \n",
       "44236                                   97.977778   \n",
       "44237                                         NaN   \n",
       "44238                                         NaN   \n",
       "44239                                         NaN   \n",
       "44240                                   95.800000   \n",
       "44241                                         NaN   \n",
       "44242                                         NaN   \n",
       "44243                                         NaN   \n",
       "44244                                         NaN   \n",
       "44245                                   98.655000   \n",
       "\n",
       "       temperature body_known_qn_degF_678_MEAN  \\\n",
       "0                                          NaN   \n",
       "1                                          NaN   \n",
       "2                                    97.533334   \n",
       "3                                    98.591667   \n",
       "4                                          NaN   \n",
       "5                                          NaN   \n",
       "6                                          NaN   \n",
       "7                                   100.649998   \n",
       "8                                          NaN   \n",
       "9                                          NaN   \n",
       "10                                   98.536364   \n",
       "11                                         NaN   \n",
       "12                                         NaN   \n",
       "13                                   99.099998   \n",
       "14                                   95.300003   \n",
       "15                                         NaN   \n",
       "16                                         NaN   \n",
       "17                                         NaN   \n",
       "18                                         NaN   \n",
       "19                                   98.800003   \n",
       "20                                         NaN   \n",
       "21                                         NaN   \n",
       "22                                   98.445455   \n",
       "23                                         NaN   \n",
       "24                                         NaN   \n",
       "25                                   97.320000   \n",
       "26                                   96.700000   \n",
       "27                                   96.920000   \n",
       "28                                         NaN   \n",
       "29                                         NaN   \n",
       "...                                        ...   \n",
       "44216                                98.302703   \n",
       "44217                                      NaN   \n",
       "44218                                      NaN   \n",
       "44219                                97.800003   \n",
       "44220                                95.740001   \n",
       "44221                                97.920000   \n",
       "44222                                      NaN   \n",
       "44223                                      NaN   \n",
       "44224                                98.130000   \n",
       "44225                                97.900002   \n",
       "44226                                      NaN   \n",
       "44227                                98.071875   \n",
       "44228                               100.526316   \n",
       "44229                                      NaN   \n",
       "44230                                      NaN   \n",
       "44231                                      NaN   \n",
       "44232                                      NaN   \n",
       "44233                               100.278462   \n",
       "44234                                      NaN   \n",
       "44235                                97.074999   \n",
       "44236                                      NaN   \n",
       "44237                                97.485713   \n",
       "44238                                97.264285   \n",
       "44239                                95.400002   \n",
       "44240                                      NaN   \n",
       "44241                                97.670000   \n",
       "44242                                98.980953   \n",
       "44243                                      NaN   \n",
       "44244                                      NaN   \n",
       "44245                                      NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223762(degC)_MEAN  \\\n",
       "0                                                   NaN   \n",
       "1                                                   NaN   \n",
       "2                                                   NaN   \n",
       "3                                                   NaN   \n",
       "4                                                   NaN   \n",
       "5                                                   NaN   \n",
       "6                                             97.721177   \n",
       "7                                                   NaN   \n",
       "8                                                   NaN   \n",
       "9                                                   NaN   \n",
       "10                                                  NaN   \n",
       "11                                            97.481429   \n",
       "12                                                  NaN   \n",
       "13                                                  NaN   \n",
       "14                                                  NaN   \n",
       "15                                                  NaN   \n",
       "16                                            99.895122   \n",
       "17                                                  NaN   \n",
       "18                                                  NaN   \n",
       "19                                                  NaN   \n",
       "20                                                  NaN   \n",
       "21                                                  NaN   \n",
       "22                                                  NaN   \n",
       "23                                            94.983637   \n",
       "24                                                  NaN   \n",
       "25                                                  NaN   \n",
       "26                                                  NaN   \n",
       "27                                                  NaN   \n",
       "28                                                  NaN   \n",
       "29                                                  NaN   \n",
       "...                                                 ...   \n",
       "44216                                               NaN   \n",
       "44217                                               NaN   \n",
       "44218                                               NaN   \n",
       "44219                                               NaN   \n",
       "44220                                               NaN   \n",
       "44221                                               NaN   \n",
       "44222                                               NaN   \n",
       "44223                                               NaN   \n",
       "44224                                               NaN   \n",
       "44225                                               NaN   \n",
       "44226                                               NaN   \n",
       "44227                                               NaN   \n",
       "44228                                               NaN   \n",
       "44229                                               NaN   \n",
       "44230                                               NaN   \n",
       "44231                                               NaN   \n",
       "44232                                               NaN   \n",
       "44233                                               NaN   \n",
       "44234                                         97.112000   \n",
       "44235                                               NaN   \n",
       "44236                                               NaN   \n",
       "44237                                               NaN   \n",
       "44238                                               NaN   \n",
       "44239                                               NaN   \n",
       "44240                                         94.640000   \n",
       "44241                                               NaN   \n",
       "44242                                               NaN   \n",
       "44243                                               NaN   \n",
       "44244                                               NaN   \n",
       "44245                                               NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_676(degC)_MEAN  \\\n",
       "0                                                NaN   \n",
       "1                                                NaN   \n",
       "2                                                NaN   \n",
       "3                                                NaN   \n",
       "4                                                NaN   \n",
       "5                                                NaN   \n",
       "6                                                NaN   \n",
       "7                                                NaN   \n",
       "8                                                NaN   \n",
       "9                                                NaN   \n",
       "10                                               NaN   \n",
       "11                                               NaN   \n",
       "12                                               NaN   \n",
       "13                                         98.178868   \n",
       "14                                               NaN   \n",
       "15                                               NaN   \n",
       "16                                               NaN   \n",
       "17                                         98.948750   \n",
       "18                                               NaN   \n",
       "19                                               NaN   \n",
       "20                                               NaN   \n",
       "21                                               NaN   \n",
       "22                                               NaN   \n",
       "23                                               NaN   \n",
       "24                                         98.239999   \n",
       "25                                               NaN   \n",
       "26                                               NaN   \n",
       "27                                               NaN   \n",
       "28                                         98.325263   \n",
       "29                                               NaN   \n",
       "...                                              ...   \n",
       "44216                                      98.639131   \n",
       "44217                                            NaN   \n",
       "44218                                            NaN   \n",
       "44219                                            NaN   \n",
       "44220                                            NaN   \n",
       "44221                                            NaN   \n",
       "44222                                            NaN   \n",
       "44223                                            NaN   \n",
       "44224                                            NaN   \n",
       "44225                                            NaN   \n",
       "44226                                            NaN   \n",
       "44227                                      99.408525   \n",
       "44228                                            NaN   \n",
       "44229                                            NaN   \n",
       "44230                                      99.398333   \n",
       "44231                                      96.928572   \n",
       "44232                                            NaN   \n",
       "44233                                            NaN   \n",
       "44234                                            NaN   \n",
       "44235                                            NaN   \n",
       "44236                                            NaN   \n",
       "44237                                            NaN   \n",
       "44238                                            NaN   \n",
       "44239                                            NaN   \n",
       "44240                                            NaN   \n",
       "44241                                      99.357306   \n",
       "44242                                            NaN   \n",
       "44243                                      96.350000   \n",
       "44244                                      97.117143   \n",
       "44245                                            NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223761_STD  \\\n",
       "0                                      98.537500   \n",
       "1                                      97.166667   \n",
       "2                                            NaN   \n",
       "3                                            NaN   \n",
       "4                                      98.100000   \n",
       "5                                      98.550000   \n",
       "6                                      98.380000   \n",
       "7                                            NaN   \n",
       "8                                      97.975000   \n",
       "9                                      96.050000   \n",
       "10                                           NaN   \n",
       "11                                           NaN   \n",
       "12                                     99.466667   \n",
       "13                                           NaN   \n",
       "14                                           NaN   \n",
       "15                                     98.500000   \n",
       "16                                    100.772222   \n",
       "17                                           NaN   \n",
       "18                                     99.345455   \n",
       "19                                           NaN   \n",
       "20                                     98.312500   \n",
       "21                                     97.760000   \n",
       "22                                           NaN   \n",
       "23                                           NaN   \n",
       "24                                           NaN   \n",
       "25                                           NaN   \n",
       "26                                           NaN   \n",
       "27                                           NaN   \n",
       "28                                           NaN   \n",
       "29                                     96.525000   \n",
       "...                                          ...   \n",
       "44216                                        NaN   \n",
       "44217                                  98.475000   \n",
       "44218                                  96.275000   \n",
       "44219                                        NaN   \n",
       "44220                                        NaN   \n",
       "44221                                        NaN   \n",
       "44222                                  99.240000   \n",
       "44223                                  98.450000   \n",
       "44224                                        NaN   \n",
       "44225                                        NaN   \n",
       "44226                                  99.753333   \n",
       "44227                                        NaN   \n",
       "44228                                        NaN   \n",
       "44229                                  97.580000   \n",
       "44230                                        NaN   \n",
       "44231                                        NaN   \n",
       "44232                                  98.484375   \n",
       "44233                                        NaN   \n",
       "44234                                        NaN   \n",
       "44235                                        NaN   \n",
       "44236                                  97.977778   \n",
       "44237                                        NaN   \n",
       "44238                                        NaN   \n",
       "44239                                        NaN   \n",
       "44240                                  95.800000   \n",
       "44241                                        NaN   \n",
       "44242                                        NaN   \n",
       "44243                                        NaN   \n",
       "44244                                        NaN   \n",
       "44245                                  98.655000   \n",
       "\n",
       "       temperature body_known_qn_degF_678_STD  \\\n",
       "0                                         NaN   \n",
       "1                                         NaN   \n",
       "2                                   97.533334   \n",
       "3                                   98.591667   \n",
       "4                                         NaN   \n",
       "5                                         NaN   \n",
       "6                                         NaN   \n",
       "7                                  100.649998   \n",
       "8                                         NaN   \n",
       "9                                         NaN   \n",
       "10                                  98.536364   \n",
       "11                                        NaN   \n",
       "12                                        NaN   \n",
       "13                                  99.099998   \n",
       "14                                  95.300003   \n",
       "15                                        NaN   \n",
       "16                                        NaN   \n",
       "17                                        NaN   \n",
       "18                                        NaN   \n",
       "19                                  98.800003   \n",
       "20                                        NaN   \n",
       "21                                        NaN   \n",
       "22                                  98.445455   \n",
       "23                                        NaN   \n",
       "24                                        NaN   \n",
       "25                                  97.320000   \n",
       "26                                  96.700000   \n",
       "27                                  96.920000   \n",
       "28                                        NaN   \n",
       "29                                        NaN   \n",
       "...                                       ...   \n",
       "44216                               98.302703   \n",
       "44217                                     NaN   \n",
       "44218                                     NaN   \n",
       "44219                               97.800003   \n",
       "44220                               95.740001   \n",
       "44221                               97.920000   \n",
       "44222                                     NaN   \n",
       "44223                                     NaN   \n",
       "44224                               98.130000   \n",
       "44225                               97.900002   \n",
       "44226                                     NaN   \n",
       "44227                               98.071875   \n",
       "44228                              100.526316   \n",
       "44229                                     NaN   \n",
       "44230                                     NaN   \n",
       "44231                                     NaN   \n",
       "44232                                     NaN   \n",
       "44233                              100.278462   \n",
       "44234                                     NaN   \n",
       "44235                               97.074999   \n",
       "44236                                     NaN   \n",
       "44237                               97.485713   \n",
       "44238                               97.264285   \n",
       "44239                               95.400002   \n",
       "44240                                     NaN   \n",
       "44241                               97.670000   \n",
       "44242                               98.980953   \n",
       "44243                                     NaN   \n",
       "44244                                     NaN   \n",
       "44245                                     NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223762(degC)_STD  \\\n",
       "0                                                  NaN   \n",
       "1                                                  NaN   \n",
       "2                                                  NaN   \n",
       "3                                                  NaN   \n",
       "4                                                  NaN   \n",
       "5                                                  NaN   \n",
       "6                                            97.721177   \n",
       "7                                                  NaN   \n",
       "8                                                  NaN   \n",
       "9                                                  NaN   \n",
       "10                                                 NaN   \n",
       "11                                           97.481429   \n",
       "12                                                 NaN   \n",
       "13                                                 NaN   \n",
       "14                                                 NaN   \n",
       "15                                                 NaN   \n",
       "16                                           99.895122   \n",
       "17                                                 NaN   \n",
       "18                                                 NaN   \n",
       "19                                                 NaN   \n",
       "20                                                 NaN   \n",
       "21                                                 NaN   \n",
       "22                                                 NaN   \n",
       "23                                           94.983637   \n",
       "24                                                 NaN   \n",
       "25                                                 NaN   \n",
       "26                                                 NaN   \n",
       "27                                                 NaN   \n",
       "28                                                 NaN   \n",
       "29                                                 NaN   \n",
       "...                                                ...   \n",
       "44216                                              NaN   \n",
       "44217                                              NaN   \n",
       "44218                                              NaN   \n",
       "44219                                              NaN   \n",
       "44220                                              NaN   \n",
       "44221                                              NaN   \n",
       "44222                                              NaN   \n",
       "44223                                              NaN   \n",
       "44224                                              NaN   \n",
       "44225                                              NaN   \n",
       "44226                                              NaN   \n",
       "44227                                              NaN   \n",
       "44228                                              NaN   \n",
       "44229                                              NaN   \n",
       "44230                                              NaN   \n",
       "44231                                              NaN   \n",
       "44232                                              NaN   \n",
       "44233                                              NaN   \n",
       "44234                                        97.112000   \n",
       "44235                                              NaN   \n",
       "44236                                              NaN   \n",
       "44237                                              NaN   \n",
       "44238                                              NaN   \n",
       "44239                                              NaN   \n",
       "44240                                        94.640000   \n",
       "44241                                              NaN   \n",
       "44242                                              NaN   \n",
       "44243                                              NaN   \n",
       "44244                                              NaN   \n",
       "44245                                              NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_676(degC)_STD  \\\n",
       "0                                               NaN   \n",
       "1                                               NaN   \n",
       "2                                               NaN   \n",
       "3                                               NaN   \n",
       "4                                               NaN   \n",
       "5                                               NaN   \n",
       "6                                               NaN   \n",
       "7                                               NaN   \n",
       "8                                               NaN   \n",
       "9                                               NaN   \n",
       "10                                              NaN   \n",
       "11                                              NaN   \n",
       "12                                              NaN   \n",
       "13                                        98.178868   \n",
       "14                                              NaN   \n",
       "15                                              NaN   \n",
       "16                                              NaN   \n",
       "17                                        98.948750   \n",
       "18                                              NaN   \n",
       "19                                              NaN   \n",
       "20                                              NaN   \n",
       "21                                              NaN   \n",
       "22                                              NaN   \n",
       "23                                              NaN   \n",
       "24                                        98.239999   \n",
       "25                                              NaN   \n",
       "26                                              NaN   \n",
       "27                                              NaN   \n",
       "28                                        98.325263   \n",
       "29                                              NaN   \n",
       "...                                             ...   \n",
       "44216                                     98.639131   \n",
       "44217                                           NaN   \n",
       "44218                                           NaN   \n",
       "44219                                           NaN   \n",
       "44220                                           NaN   \n",
       "44221                                           NaN   \n",
       "44222                                           NaN   \n",
       "44223                                           NaN   \n",
       "44224                                           NaN   \n",
       "44225                                           NaN   \n",
       "44226                                           NaN   \n",
       "44227                                     99.408525   \n",
       "44228                                           NaN   \n",
       "44229                                           NaN   \n",
       "44230                                     99.398333   \n",
       "44231                                     96.928572   \n",
       "44232                                           NaN   \n",
       "44233                                           NaN   \n",
       "44234                                           NaN   \n",
       "44235                                           NaN   \n",
       "44236                                           NaN   \n",
       "44237                                           NaN   \n",
       "44238                                           NaN   \n",
       "44239                                           NaN   \n",
       "44240                                           NaN   \n",
       "44241                                     99.357306   \n",
       "44242                                           NaN   \n",
       "44243                                     96.350000   \n",
       "44244                                     97.117143   \n",
       "44245                                           NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223761_COUNT  \\\n",
       "0                                        98.537500   \n",
       "1                                        97.166667   \n",
       "2                                              NaN   \n",
       "3                                              NaN   \n",
       "4                                        98.100000   \n",
       "5                                        98.550000   \n",
       "6                                        98.380000   \n",
       "7                                              NaN   \n",
       "8                                        97.975000   \n",
       "9                                        96.050000   \n",
       "10                                             NaN   \n",
       "11                                             NaN   \n",
       "12                                       99.466667   \n",
       "13                                             NaN   \n",
       "14                                             NaN   \n",
       "15                                       98.500000   \n",
       "16                                      100.772222   \n",
       "17                                             NaN   \n",
       "18                                       99.345455   \n",
       "19                                             NaN   \n",
       "20                                       98.312500   \n",
       "21                                       97.760000   \n",
       "22                                             NaN   \n",
       "23                                             NaN   \n",
       "24                                             NaN   \n",
       "25                                             NaN   \n",
       "26                                             NaN   \n",
       "27                                             NaN   \n",
       "28                                             NaN   \n",
       "29                                       96.525000   \n",
       "...                                            ...   \n",
       "44216                                          NaN   \n",
       "44217                                    98.475000   \n",
       "44218                                    96.275000   \n",
       "44219                                          NaN   \n",
       "44220                                          NaN   \n",
       "44221                                          NaN   \n",
       "44222                                    99.240000   \n",
       "44223                                    98.450000   \n",
       "44224                                          NaN   \n",
       "44225                                          NaN   \n",
       "44226                                    99.753333   \n",
       "44227                                          NaN   \n",
       "44228                                          NaN   \n",
       "44229                                    97.580000   \n",
       "44230                                          NaN   \n",
       "44231                                          NaN   \n",
       "44232                                    98.484375   \n",
       "44233                                          NaN   \n",
       "44234                                          NaN   \n",
       "44235                                          NaN   \n",
       "44236                                    97.977778   \n",
       "44237                                          NaN   \n",
       "44238                                          NaN   \n",
       "44239                                          NaN   \n",
       "44240                                    95.800000   \n",
       "44241                                          NaN   \n",
       "44242                                          NaN   \n",
       "44243                                          NaN   \n",
       "44244                                          NaN   \n",
       "44245                                    98.655000   \n",
       "\n",
       "       temperature body_known_qn_degF_678_COUNT  \\\n",
       "0                                           NaN   \n",
       "1                                           NaN   \n",
       "2                                     97.533334   \n",
       "3                                     98.591667   \n",
       "4                                           NaN   \n",
       "5                                           NaN   \n",
       "6                                           NaN   \n",
       "7                                    100.649998   \n",
       "8                                           NaN   \n",
       "9                                           NaN   \n",
       "10                                    98.536364   \n",
       "11                                          NaN   \n",
       "12                                          NaN   \n",
       "13                                    99.099998   \n",
       "14                                    95.300003   \n",
       "15                                          NaN   \n",
       "16                                          NaN   \n",
       "17                                          NaN   \n",
       "18                                          NaN   \n",
       "19                                    98.800003   \n",
       "20                                          NaN   \n",
       "21                                          NaN   \n",
       "22                                    98.445455   \n",
       "23                                          NaN   \n",
       "24                                          NaN   \n",
       "25                                    97.320000   \n",
       "26                                    96.700000   \n",
       "27                                    96.920000   \n",
       "28                                          NaN   \n",
       "29                                          NaN   \n",
       "...                                         ...   \n",
       "44216                                 98.302703   \n",
       "44217                                       NaN   \n",
       "44218                                       NaN   \n",
       "44219                                 97.800003   \n",
       "44220                                 95.740001   \n",
       "44221                                 97.920000   \n",
       "44222                                       NaN   \n",
       "44223                                       NaN   \n",
       "44224                                 98.130000   \n",
       "44225                                 97.900002   \n",
       "44226                                       NaN   \n",
       "44227                                 98.071875   \n",
       "44228                                100.526316   \n",
       "44229                                       NaN   \n",
       "44230                                       NaN   \n",
       "44231                                       NaN   \n",
       "44232                                       NaN   \n",
       "44233                                100.278462   \n",
       "44234                                       NaN   \n",
       "44235                                 97.074999   \n",
       "44236                                       NaN   \n",
       "44237                                 97.485713   \n",
       "44238                                 97.264285   \n",
       "44239                                 95.400002   \n",
       "44240                                       NaN   \n",
       "44241                                 97.670000   \n",
       "44242                                 98.980953   \n",
       "44243                                       NaN   \n",
       "44244                                       NaN   \n",
       "44245                                       NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223762(degC)_COUNT  \\\n",
       "0                                                    NaN   \n",
       "1                                                    NaN   \n",
       "2                                                    NaN   \n",
       "3                                                    NaN   \n",
       "4                                                    NaN   \n",
       "5                                                    NaN   \n",
       "6                                              97.721177   \n",
       "7                                                    NaN   \n",
       "8                                                    NaN   \n",
       "9                                                    NaN   \n",
       "10                                                   NaN   \n",
       "11                                             97.481429   \n",
       "12                                                   NaN   \n",
       "13                                                   NaN   \n",
       "14                                                   NaN   \n",
       "15                                                   NaN   \n",
       "16                                             99.895122   \n",
       "17                                                   NaN   \n",
       "18                                                   NaN   \n",
       "19                                                   NaN   \n",
       "20                                                   NaN   \n",
       "21                                                   NaN   \n",
       "22                                                   NaN   \n",
       "23                                             94.983637   \n",
       "24                                                   NaN   \n",
       "25                                                   NaN   \n",
       "26                                                   NaN   \n",
       "27                                                   NaN   \n",
       "28                                                   NaN   \n",
       "29                                                   NaN   \n",
       "...                                                  ...   \n",
       "44216                                                NaN   \n",
       "44217                                                NaN   \n",
       "44218                                                NaN   \n",
       "44219                                                NaN   \n",
       "44220                                                NaN   \n",
       "44221                                                NaN   \n",
       "44222                                                NaN   \n",
       "44223                                                NaN   \n",
       "44224                                                NaN   \n",
       "44225                                                NaN   \n",
       "44226                                                NaN   \n",
       "44227                                                NaN   \n",
       "44228                                                NaN   \n",
       "44229                                                NaN   \n",
       "44230                                                NaN   \n",
       "44231                                                NaN   \n",
       "44232                                                NaN   \n",
       "44233                                                NaN   \n",
       "44234                                          97.112000   \n",
       "44235                                                NaN   \n",
       "44236                                                NaN   \n",
       "44237                                                NaN   \n",
       "44238                                                NaN   \n",
       "44239                                                NaN   \n",
       "44240                                          94.640000   \n",
       "44241                                                NaN   \n",
       "44242                                                NaN   \n",
       "44243                                                NaN   \n",
       "44244                                                NaN   \n",
       "44245                                                NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_676(degC)_COUNT  \\\n",
       "0                                                 NaN   \n",
       "1                                                 NaN   \n",
       "2                                                 NaN   \n",
       "3                                                 NaN   \n",
       "4                                                 NaN   \n",
       "5                                                 NaN   \n",
       "6                                                 NaN   \n",
       "7                                                 NaN   \n",
       "8                                                 NaN   \n",
       "9                                                 NaN   \n",
       "10                                                NaN   \n",
       "11                                                NaN   \n",
       "12                                                NaN   \n",
       "13                                          98.178868   \n",
       "14                                                NaN   \n",
       "15                                                NaN   \n",
       "16                                                NaN   \n",
       "17                                          98.948750   \n",
       "18                                                NaN   \n",
       "19                                                NaN   \n",
       "20                                                NaN   \n",
       "21                                                NaN   \n",
       "22                                                NaN   \n",
       "23                                                NaN   \n",
       "24                                          98.239999   \n",
       "25                                                NaN   \n",
       "26                                                NaN   \n",
       "27                                                NaN   \n",
       "28                                          98.325263   \n",
       "29                                                NaN   \n",
       "...                                               ...   \n",
       "44216                                       98.639131   \n",
       "44217                                             NaN   \n",
       "44218                                             NaN   \n",
       "44219                                             NaN   \n",
       "44220                                             NaN   \n",
       "44221                                             NaN   \n",
       "44222                                             NaN   \n",
       "44223                                             NaN   \n",
       "44224                                             NaN   \n",
       "44225                                             NaN   \n",
       "44226                                             NaN   \n",
       "44227                                       99.408525   \n",
       "44228                                             NaN   \n",
       "44229                                             NaN   \n",
       "44230                                       99.398333   \n",
       "44231                                       96.928572   \n",
       "44232                                             NaN   \n",
       "44233                                             NaN   \n",
       "44234                                             NaN   \n",
       "44235                                             NaN   \n",
       "44236                                             NaN   \n",
       "44237                                             NaN   \n",
       "44238                                             NaN   \n",
       "44239                                             NaN   \n",
       "44240                                             NaN   \n",
       "44241                                       99.357306   \n",
       "44242                                             NaN   \n",
       "44243                                       96.350000   \n",
       "44244                                       97.117143   \n",
       "44245                                             NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223761_LAST  \\\n",
       "0                                       98.537500   \n",
       "1                                       97.166667   \n",
       "2                                             NaN   \n",
       "3                                             NaN   \n",
       "4                                       98.100000   \n",
       "5                                       98.550000   \n",
       "6                                       98.380000   \n",
       "7                                             NaN   \n",
       "8                                       97.975000   \n",
       "9                                       96.050000   \n",
       "10                                            NaN   \n",
       "11                                            NaN   \n",
       "12                                      99.466667   \n",
       "13                                            NaN   \n",
       "14                                            NaN   \n",
       "15                                      98.500000   \n",
       "16                                     100.772222   \n",
       "17                                            NaN   \n",
       "18                                      99.345455   \n",
       "19                                            NaN   \n",
       "20                                      98.312500   \n",
       "21                                      97.760000   \n",
       "22                                            NaN   \n",
       "23                                            NaN   \n",
       "24                                            NaN   \n",
       "25                                            NaN   \n",
       "26                                            NaN   \n",
       "27                                            NaN   \n",
       "28                                            NaN   \n",
       "29                                      96.525000   \n",
       "...                                           ...   \n",
       "44216                                         NaN   \n",
       "44217                                   98.475000   \n",
       "44218                                   96.275000   \n",
       "44219                                         NaN   \n",
       "44220                                         NaN   \n",
       "44221                                         NaN   \n",
       "44222                                   99.240000   \n",
       "44223                                   98.450000   \n",
       "44224                                         NaN   \n",
       "44225                                         NaN   \n",
       "44226                                   99.753333   \n",
       "44227                                         NaN   \n",
       "44228                                         NaN   \n",
       "44229                                   97.580000   \n",
       "44230                                         NaN   \n",
       "44231                                         NaN   \n",
       "44232                                   98.484375   \n",
       "44233                                         NaN   \n",
       "44234                                         NaN   \n",
       "44235                                         NaN   \n",
       "44236                                   97.977778   \n",
       "44237                                         NaN   \n",
       "44238                                         NaN   \n",
       "44239                                         NaN   \n",
       "44240                                   95.800000   \n",
       "44241                                         NaN   \n",
       "44242                                         NaN   \n",
       "44243                                         NaN   \n",
       "44244                                         NaN   \n",
       "44245                                   98.655000   \n",
       "\n",
       "       temperature body_known_qn_degF_678_LAST  \\\n",
       "0                                          NaN   \n",
       "1                                          NaN   \n",
       "2                                    97.533334   \n",
       "3                                    98.591667   \n",
       "4                                          NaN   \n",
       "5                                          NaN   \n",
       "6                                          NaN   \n",
       "7                                   100.649998   \n",
       "8                                          NaN   \n",
       "9                                          NaN   \n",
       "10                                   98.536364   \n",
       "11                                         NaN   \n",
       "12                                         NaN   \n",
       "13                                   99.099998   \n",
       "14                                   95.300003   \n",
       "15                                         NaN   \n",
       "16                                         NaN   \n",
       "17                                         NaN   \n",
       "18                                         NaN   \n",
       "19                                   98.800003   \n",
       "20                                         NaN   \n",
       "21                                         NaN   \n",
       "22                                   98.445455   \n",
       "23                                         NaN   \n",
       "24                                         NaN   \n",
       "25                                   97.320000   \n",
       "26                                   96.700000   \n",
       "27                                   96.920000   \n",
       "28                                         NaN   \n",
       "29                                         NaN   \n",
       "...                                        ...   \n",
       "44216                                98.302703   \n",
       "44217                                      NaN   \n",
       "44218                                      NaN   \n",
       "44219                                97.800003   \n",
       "44220                                95.740001   \n",
       "44221                                97.920000   \n",
       "44222                                      NaN   \n",
       "44223                                      NaN   \n",
       "44224                                98.130000   \n",
       "44225                                97.900002   \n",
       "44226                                      NaN   \n",
       "44227                                98.071875   \n",
       "44228                               100.526316   \n",
       "44229                                      NaN   \n",
       "44230                                      NaN   \n",
       "44231                                      NaN   \n",
       "44232                                      NaN   \n",
       "44233                               100.278462   \n",
       "44234                                      NaN   \n",
       "44235                                97.074999   \n",
       "44236                                      NaN   \n",
       "44237                                97.485713   \n",
       "44238                                97.264285   \n",
       "44239                                95.400002   \n",
       "44240                                      NaN   \n",
       "44241                                97.670000   \n",
       "44242                                98.980953   \n",
       "44243                                      NaN   \n",
       "44244                                      NaN   \n",
       "44245                                      NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_223762(degC)_LAST  \\\n",
       "0                                                   NaN   \n",
       "1                                                   NaN   \n",
       "2                                                   NaN   \n",
       "3                                                   NaN   \n",
       "4                                                   NaN   \n",
       "5                                                   NaN   \n",
       "6                                             97.721177   \n",
       "7                                                   NaN   \n",
       "8                                                   NaN   \n",
       "9                                                   NaN   \n",
       "10                                                  NaN   \n",
       "11                                            97.481429   \n",
       "12                                                  NaN   \n",
       "13                                                  NaN   \n",
       "14                                                  NaN   \n",
       "15                                                  NaN   \n",
       "16                                            99.895122   \n",
       "17                                                  NaN   \n",
       "18                                                  NaN   \n",
       "19                                                  NaN   \n",
       "20                                                  NaN   \n",
       "21                                                  NaN   \n",
       "22                                                  NaN   \n",
       "23                                            94.983637   \n",
       "24                                                  NaN   \n",
       "25                                                  NaN   \n",
       "26                                                  NaN   \n",
       "27                                                  NaN   \n",
       "28                                                  NaN   \n",
       "29                                                  NaN   \n",
       "...                                                 ...   \n",
       "44216                                               NaN   \n",
       "44217                                               NaN   \n",
       "44218                                               NaN   \n",
       "44219                                               NaN   \n",
       "44220                                               NaN   \n",
       "44221                                               NaN   \n",
       "44222                                               NaN   \n",
       "44223                                               NaN   \n",
       "44224                                               NaN   \n",
       "44225                                               NaN   \n",
       "44226                                               NaN   \n",
       "44227                                               NaN   \n",
       "44228                                               NaN   \n",
       "44229                                               NaN   \n",
       "44230                                               NaN   \n",
       "44231                                               NaN   \n",
       "44232                                               NaN   \n",
       "44233                                               NaN   \n",
       "44234                                         97.112000   \n",
       "44235                                               NaN   \n",
       "44236                                               NaN   \n",
       "44237                                               NaN   \n",
       "44238                                               NaN   \n",
       "44239                                               NaN   \n",
       "44240                                         94.640000   \n",
       "44241                                               NaN   \n",
       "44242                                               NaN   \n",
       "44243                                               NaN   \n",
       "44244                                               NaN   \n",
       "44245                                               NaN   \n",
       "\n",
       "       temperature body_known_qn_degF_676(degC)_LAST  \n",
       "0                                                NaN  \n",
       "1                                                NaN  \n",
       "2                                                NaN  \n",
       "3                                                NaN  \n",
       "4                                                NaN  \n",
       "5                                                NaN  \n",
       "6                                                NaN  \n",
       "7                                                NaN  \n",
       "8                                                NaN  \n",
       "9                                                NaN  \n",
       "10                                               NaN  \n",
       "11                                               NaN  \n",
       "12                                               NaN  \n",
       "13                                         98.178868  \n",
       "14                                               NaN  \n",
       "15                                               NaN  \n",
       "16                                               NaN  \n",
       "17                                         98.948750  \n",
       "18                                               NaN  \n",
       "19                                               NaN  \n",
       "20                                               NaN  \n",
       "21                                               NaN  \n",
       "22                                               NaN  \n",
       "23                                               NaN  \n",
       "24                                         98.239999  \n",
       "25                                               NaN  \n",
       "26                                               NaN  \n",
       "27                                               NaN  \n",
       "28                                         98.325263  \n",
       "29                                               NaN  \n",
       "...                                              ...  \n",
       "44216                                      98.639131  \n",
       "44217                                            NaN  \n",
       "44218                                            NaN  \n",
       "44219                                            NaN  \n",
       "44220                                            NaN  \n",
       "44221                                            NaN  \n",
       "44222                                            NaN  \n",
       "44223                                            NaN  \n",
       "44224                                            NaN  \n",
       "44225                                            NaN  \n",
       "44226                                            NaN  \n",
       "44227                                      99.408525  \n",
       "44228                                            NaN  \n",
       "44229                                            NaN  \n",
       "44230                                      99.398333  \n",
       "44231                                      96.928572  \n",
       "44232                                            NaN  \n",
       "44233                                            NaN  \n",
       "44234                                            NaN  \n",
       "44235                                            NaN  \n",
       "44236                                            NaN  \n",
       "44237                                            NaN  \n",
       "44238                                            NaN  \n",
       "44239                                            NaN  \n",
       "44240                                            NaN  \n",
       "44241                                      99.357306  \n",
       "44242                                            NaN  \n",
       "44243                                      96.350000  \n",
       "44244                                      97.117143  \n",
       "44245                                            NaN  \n",
       "\n",
       "[44246 rows x 16 columns]"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_mapper_pipeline.transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 152,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<units.MedicalUreg at 0x31082eb8>"
      ]
     },
     "execution_count": 152,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ureg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "mass = ureg.parse_units('degF')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<UnitsContainer({'[temperature]': 1.0})>"
      ]
     },
     "execution_count": 198,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mass.dimensionality"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pint.unit import UnitsContainer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 160,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "units.dimensionality == UnitsContainer({'[length]':3.0})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "1e-06 kilogram"
      ],
      "text/latex": [
       "$1e-06\\ \\mathrm{kilogram}$"
      ],
      "text/plain": [
       "<Quantity(1e-06, 'kilogram')>"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(1*units).to_base_units()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "uc = UnitsContainer({'[length]':3.0})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "uc.dim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import constants\n",
    "import mimic\n",
    "import utils\n",
    "import pandas as pd\n",
    "import icu_data_defs\n",
    "import units\n",
    "from sklearn.pipeline import Pipeline\n",
    "import transformers\n",
    "import logger\n",
    "import features\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(mimic)\n",
    "reload(units)\n",
    "reload(utils)\n",
    "reload(transformers)\n",
    "def mimic_ETL(components,data_dict,hdf5_fname,\n",
    "              hadm_ids=constants.ALL,\n",
    "              agg_func=lambda x:x.iloc[0]):\n",
    "    \n",
    "    logger.log('BEGIN ETL for {} admissions'.format(hadm_ids if hadm_ids == constants.ALL else len(hadm_ids)),new_level=True)\n",
    "    category_map = mimic.mimic_category_map(data_dict)\n",
    "    ureg = units.MedicalUreg()\n",
    "    extractor = mimic.mimic_extractor('config/mimic_item_map.csv',data_dict)\n",
    "    \n",
    "\n",
    "    transform_pipeline = mimic.transform_pipeline()\n",
    "\n",
    "    standard_clean_pipeline = Pipeline([\n",
    "        ('aggregate_same_datetime',transformers.same_index_aggregator(agg_func)),\n",
    "        ('split_dtype',transformers.split_dtype()),\n",
    "        ('standardize_columns',transformers.column_standardizer(data_dict,ureg)),\n",
    "        ('standardize_categories',transformers.standardize_categories(data_dict,category_map)),\n",
    "        ('split_bad_categories',transformers.split_bad_categories(data_dict)),\n",
    "        ('one_hotter',transformers.nominal_to_onehot()),\n",
    "        ('drop_oob_values',transformers.oob_value_remover(data_dict))\n",
    "    ])\n",
    "    \n",
    "\n",
    "    logger.log('Extract CONTEXT...')\n",
    "    df_context = mimic.get_context_data(hadm_ids)\n",
    "    utils.save_df(df_context,hdf5_fname,'context')\n",
    "\n",
    "    \n",
    "    for component in components:\n",
    "        logger.log(component.upper(),new_level=True)\n",
    "        \n",
    "        logger.log(\"Extracting...\",new_level=True)\n",
    "        df_extracted = extractor.extract_component(component,hadm_ids)\n",
    "        utils.save_df(df_extracted,hdf5_fname,'extracted/{}'.format(component))\n",
    "        logger.end_log_level()\n",
    "        \n",
    "        display(df_extracted.head())\n",
    "        \n",
    "        logger.log(\"Transforming... {}\".format(df_extracted.shape))\n",
    "        transform_pipeline.set_params(add_level__level_val=component)\n",
    "        df_transformed = transform_pipeline.transform(df_extracted)\n",
    "        utils.save_df(df_transformed,hdf5_fname,'transformed/{}'.format(component))\n",
    "\n",
    "        display(df_transformed.head())\n",
    "\n",
    "        display(df_transformed.describe())\n",
    "\n",
    "        print utils.data_loss(df_extracted.set_index('id').value.to_frame(),df_transformed)\n",
    "        \n",
    "        logger.log(\"Cleaning... {}\".format(df_transformed.shape))       \n",
    "        df_cleaned = standard_clean_pipeline.transform(df_transformed)\n",
    "        utils.save_df(df_cleaned,hdf5_fname,'cleaned/{}'.format(component))\n",
    "        \n",
    "        display(df_cleaned.head())\n",
    "\n",
    "        display(df_cleaned.describe())\n",
    "\n",
    "        print utils.data_loss(df_extracted.set_index('id').value.to_frame(),df_cleaned)\n",
    "        \n",
    "        del df_cleaned,df_transformed,df_extracted\n",
    "        logger.end_log_level()\n",
    "        \n",
    "        \n",
    "    \n",
    "    logger.end_log()\n",
    "    \n",
    "    return\n",
    "           \n",
    "\n",
    "def mimic_features(hdf5_fname,specific_path,labels,\n",
    "                   custom_cleaners,segmenter,feature_tuples):\n",
    "    \n",
    "    \n",
    "    df_all = None\n",
    "    \n",
    "    for label in labels:\n",
    "        df_base = utils.open_df(hdf5_fname,'cleaned/{}'.format(label))\n",
    "        \n",
    "        df_cleaned = custom_cleaners.transform(df_base)\n",
    "        utils.save_df(df_cleaned,hdf5_fname,'{}/cleaned/{}'.format(specific_path,label))\n",
    "            \n",
    "        if df_all is None:\n",
    "            df_all = df_cleaned\n",
    "        else:\n",
    "            df_all = df_all.join(df_cleaned,how='outer')\n",
    "            del df_cleaned\n",
    "        \n",
    "    utils.save_df(df_all,hdf5_fname,'{}/cleaned/all')\n",
    "            \n",
    "    df_segmented = segmenter.transform(df_all)\n",
    "    utils.save_df(df_segmented,hdf5_fname_target,'{}/segmented'.format(specific_path))\n",
    "    del df_all\n",
    "    \n",
    "    mapped_ft = features.make_mapper(feature_tuples,df_segmented)\n",
    "    df_features = mapped_ft.transform(df_segmented)\n",
    "    utils.save_df(df_features,hdf5_fname_target,'{}/features'.format(specific_path))\n",
    "    del df_segmented\n",
    "    \n",
    "    return df_features"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def do_ETL(data_dict,components,tag,n,seed=42):\n",
    "    hdf5_fname = 'data/mimic_{}_{}'.format(tag,n)\n",
    "    hadm_ids = n if n == constants.ALL else mimic.sample_hadm_ids(n,seed) \n",
    "    mimic_ETL(components,data_dict,hdf5_fname,hadm_ids=hadm_ids)\n",
    "    return hdf5_fname"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "reload(logger)\n",
    "\n",
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')\n",
    "components = data_dict.get_panel_defintions(12).component.unique().tolist() #12 is \"simple data\"\n",
    "tag = 'simple'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-20 10:03:09) BEGIN ETL for 1000 admissions\n",
      "(2017-06-20 10:03:09)>> Extract CONTEXT...\n",
      "(2017-06-20 10:03:17)<< DONE (8.0s)\n",
      "(2017-06-20 10:03:17)>> BLOOD PRESSURE SYSTOLIC\n",
      "(2017-06-20 10:03:17)>>>> Extracting...\n",
      "(2017-06-20 10:03:17)>>>>>> Extracting 14 items from chartevents\n",
      "51        35823\n",
      "455       26680\n",
      "220179    21957\n",
      "220050    15186\n",
      "3313       2250\n",
      "225309     1251\n",
      "3315         50\n",
      "3317         24\n",
      "3323         21\n",
      "442          19\n",
      "3321         19\n",
      "224167       12\n",
      "227243        8\n",
      "Name: itemid, dtype: int64\n",
      "Empty DataFrame\n",
      "Columns: [id, datetime, value, units, itemid]\n",
      "Index: []\n",
      "(2017-06-20 10:04:09)<<<<<< DONE (52.0s)\n",
      "(2017-06-20 10:04:09)>>>>>> Combine DF\n",
      "(2017-06-20 10:04:09)<<<<<< DONE (0.0s)\n",
      "(2017-06-20 10:04:09)>>>>>> Clean UOM\n",
      "(2017-06-20 10:04:10)<<<<<< DONE (1.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th>value</th>\n",
       "      <th>units</th>\n",
       "      <th>itemid</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>107880</td>\n",
       "      <td>2106-06-19 22:00:00</td>\n",
       "      <td>99</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>107880</td>\n",
       "      <td>2106-06-19 23:00:00</td>\n",
       "      <td>98</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>107880</td>\n",
       "      <td>2106-06-20 00:00:00</td>\n",
       "      <td>122</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>107880</td>\n",
       "      <td>2106-06-20 01:00:00</td>\n",
       "      <td>150</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>107880</td>\n",
       "      <td>2106-06-20 02:00:00</td>\n",
       "      <td>133</td>\n",
       "      <td>mmHg</td>\n",
       "      <td>51</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       id            datetime value units  itemid\n",
       "0  107880 2106-06-19 22:00:00    99  mmHg      51\n",
       "1  107880 2106-06-19 23:00:00    98  mmHg      51\n",
       "2  107880 2106-06-20 00:00:00   122  mmHg      51\n",
       "3  107880 2106-06-20 01:00:00   150  mmHg      51\n",
       "4  107880 2106-06-20 02:00:00   133  mmHg      51"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-20 10:04:10)<<<< DONE (53.0s)\n",
      "(2017-06-20 10:04:10)>>>> Transforming... (103300, 5)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>component</th>\n",
       "      <th colspan=\"13\" halign=\"left\">blood pressure systolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th>cc/min</th>\n",
       "      <th colspan=\"5\" halign=\"left\">mmHg</th>\n",
       "      <th colspan=\"4\" halign=\"left\">cc/min</th>\n",
       "      <th colspan=\"3\" halign=\"left\">mmHg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>3313</th>\n",
       "      <th>455</th>\n",
       "      <th>51</th>\n",
       "      <th>220179</th>\n",
       "      <th>442</th>\n",
       "      <th>220050</th>\n",
       "      <th>3315</th>\n",
       "      <th>3317</th>\n",
       "      <th>3321</th>\n",
       "      <th>3323</th>\n",
       "      <th>224167</th>\n",
       "      <th>227243</th>\n",
       "      <th>225309</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100304</th>\n",
       "      <th>2161-10-01 22:00:00</th>\n",
       "      <td>67</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-10-01 23:00:00</th>\n",
       "      <td>68</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100788</th>\n",
       "      <th>2120-04-10 14:00:00</th>\n",
       "      <td>None</td>\n",
       "      <td>127</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2120-04-10 15:00:00</th>\n",
       "      <td>None</td>\n",
       "      <td>127</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2120-04-10 16:00:00</th>\n",
       "      <td>None</td>\n",
       "      <td>123</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "component                  blood pressure systolic                       \\\n",
       "units                                       cc/min   mmHg                 \n",
       "description                                 3313   455    51     220179   \n",
       "id     datetime                                                           \n",
       "100304 2161-10-01 22:00:00                      67   None   None   None   \n",
       "       2161-10-01 23:00:00                      68   None   None   None   \n",
       "100788 2120-04-10 14:00:00                    None    127   None   None   \n",
       "       2120-04-10 15:00:00                    None    127   None   None   \n",
       "       2120-04-10 16:00:00                    None    123   None   None   \n",
       "\n",
       "component                                                                    \\\n",
       "units                                    cc/min                        mmHg   \n",
       "description                442    220050 3315   3317   3321   3323   224167   \n",
       "id     datetime                                                               \n",
       "100304 2161-10-01 22:00:00   None   None   None   None   None   None   None   \n",
       "       2161-10-01 23:00:00   None   None   None   None   None   None   None   \n",
       "100788 2120-04-10 14:00:00   None   None   None   None   None   None   None   \n",
       "       2120-04-10 15:00:00   None   None   None   None   None   None   None   \n",
       "       2120-04-10 16:00:00   None   None   None   None   None   None   None   \n",
       "\n",
       "component                                 \n",
       "units                                     \n",
       "description                227243 225309  \n",
       "id     datetime                           \n",
       "100304 2161-10-01 22:00:00   None   None  \n",
       "       2161-10-01 23:00:00   None   None  \n",
       "100788 2120-04-10 14:00:00   None   None  \n",
       "       2120-04-10 15:00:00   None   None  \n",
       "       2120-04-10 16:00:00   None   None  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>component</th>\n",
       "      <th colspan=\"13\" halign=\"left\">blood pressure systolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th>cc/min</th>\n",
       "      <th colspan=\"5\" halign=\"left\">mmHg</th>\n",
       "      <th colspan=\"4\" halign=\"left\">cc/min</th>\n",
       "      <th colspan=\"3\" halign=\"left\">mmHg</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>3313</th>\n",
       "      <th>455</th>\n",
       "      <th>51</th>\n",
       "      <th>220179</th>\n",
       "      <th>442</th>\n",
       "      <th>220050</th>\n",
       "      <th>3315</th>\n",
       "      <th>3317</th>\n",
       "      <th>3321</th>\n",
       "      <th>3323</th>\n",
       "      <th>224167</th>\n",
       "      <th>227243</th>\n",
       "      <th>225309</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>2241</td>\n",
       "      <td>26480</td>\n",
       "      <td>35622</td>\n",
       "      <td>21957</td>\n",
       "      <td>17</td>\n",
       "      <td>15186</td>\n",
       "      <td>22</td>\n",
       "      <td>16</td>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>1251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unique</th>\n",
       "      <td>81</td>\n",
       "      <td>181</td>\n",
       "      <td>208</td>\n",
       "      <td>176</td>\n",
       "      <td>14</td>\n",
       "      <td>172</td>\n",
       "      <td>21</td>\n",
       "      <td>13</td>\n",
       "      <td>12</td>\n",
       "      <td>11</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>top</th>\n",
       "      <td>72</td>\n",
       "      <td>106</td>\n",
       "      <td>108</td>\n",
       "      <td>112</td>\n",
       "      <td>122</td>\n",
       "      <td>120</td>\n",
       "      <td>89</td>\n",
       "      <td>74</td>\n",
       "      <td>66</td>\n",
       "      <td>80</td>\n",
       "      <td>102</td>\n",
       "      <td>110</td>\n",
       "      <td>107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>freq</th>\n",
       "      <td>91</td>\n",
       "      <td>494</td>\n",
       "      <td>641</td>\n",
       "      <td>457</td>\n",
       "      <td>2</td>\n",
       "      <td>296</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>41</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "component   blood pressure systolic                                            \\\n",
       "units                        cc/min   mmHg                             cc/min   \n",
       "description                  3313   455    51     220179 442    220050 3315     \n",
       "count                          2241  26480  35622  21957     17  15186     22   \n",
       "unique                           81    181    208    176     14    172     21   \n",
       "top                              72    106    108    112    122    120     89   \n",
       "freq                             91    494    641    457      2    296      2   \n",
       "\n",
       "component                                              \n",
       "units                              mmHg                \n",
       "description 3317   3321   3323   224167 227243 225309  \n",
       "count           16     13     14     12      8   1251  \n",
       "unique          13     12     11      9      8    126  \n",
       "top             74     66     80    102    110    107  \n",
       "freq             2      2      2      4      1     41  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((103300, 1), (97535, 13), 461L, 2, '0.2094% records')\n",
      "(2017-06-20 10:04:12)<<<< DONE (2.0s)\n",
      "(2017-06-20 10:04:12)>>>> Cleaning... (97535, 13)\n",
      "(2017-06-20 10:04:14)<<<< DONE (2.0s)\n",
      "(2017-06-20 10:04:14)>>>> Nominal to OneHot\n",
      "(2017-06-20 10:04:14)<<<< DONE (0.0s)\n",
      "(2017-06-20 10:04:14)>>>> Drop OOB data | (97527, 13)\n",
      "(2017-06-20 10:04:14)>>>>>> blood pressure systolic, mmHg, 100525\n",
      "(2017-06-20 10:04:19)<<<<<< DONE (5.0s)\n",
      "(2017-06-20 10:04:19)>>>>>> blood pressure systolic, cc/min, 2306\n",
      "(2017-06-20 10:04:19)<<<<<< DONE (0.0s)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>component</th>\n",
       "      <th colspan=\"13\" halign=\"left\">blood pressure systolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"8\" halign=\"left\">known</th>\n",
       "      <th colspan=\"5\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"8\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"5\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"8\" halign=\"left\">mmHg</th>\n",
       "      <th colspan=\"5\" halign=\"left\">cc/min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>description</th>\n",
       "      <th>220050</th>\n",
       "      <th>220179</th>\n",
       "      <th>224167</th>\n",
       "      <th>225309</th>\n",
       "      <th>227243</th>\n",
       "      <th>442</th>\n",
       "      <th>455</th>\n",
       "      <th>51</th>\n",
       "      <th>3313</th>\n",
       "      <th>3315</th>\n",
       "      <th>3317</th>\n",
       "      <th>3321</th>\n",
       "      <th>3323</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>id</th>\n",
       "      <th>datetime</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"2\" valign=\"top\">100304</th>\n",
       "      <th>2161-10-01 22:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>67.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2161-10-01 23:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>68.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">100788</th>\n",
       "      <th>2120-04-10 14:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>127.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2120-04-10 15:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>127.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2120-04-10 16:00:00</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>123.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "component                  blood pressure systolic                       \\\n",
       "status                                       known                        \n",
       "variable_type                                   qn                        \n",
       "units                                         mmHg                        \n",
       "description                                 220050 220179 224167 225309   \n",
       "id     datetime                                                           \n",
       "100304 2161-10-01 22:00:00                     NaN    NaN    NaN    NaN   \n",
       "       2161-10-01 23:00:00                     NaN    NaN    NaN    NaN   \n",
       "100788 2120-04-10 14:00:00                     NaN    NaN    NaN    NaN   \n",
       "       2120-04-10 15:00:00                     NaN    NaN    NaN    NaN   \n",
       "       2120-04-10 16:00:00                     NaN    NaN    NaN    NaN   \n",
       "\n",
       "component                                                                     \n",
       "status                                           unknown                      \n",
       "variable_type                                         qn                      \n",
       "units                                             cc/min                      \n",
       "description                227243 442    455  51    3313 3315 3317 3321 3323  \n",
       "id     datetime                                                               \n",
       "100304 2161-10-01 22:00:00    NaN NaN    NaN NaN    67.0  NaN  NaN  NaN  NaN  \n",
       "       2161-10-01 23:00:00    NaN NaN    NaN NaN    68.0  NaN  NaN  NaN  NaN  \n",
       "100788 2120-04-10 14:00:00    NaN NaN  127.0 NaN     NaN  NaN  NaN  NaN  NaN  \n",
       "       2120-04-10 15:00:00    NaN NaN  127.0 NaN     NaN  NaN  NaN  NaN  NaN  \n",
       "       2120-04-10 16:00:00    NaN NaN  123.0 NaN     NaN  NaN  NaN  NaN  NaN  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>component</th>\n",
       "      <th colspan=\"13\" halign=\"left\">blood pressure systolic</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>status</th>\n",
       "      <th colspan=\"8\" halign=\"left\">known</th>\n",
       "      <th colspan=\"5\" halign=\"left\">unknown</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>variable_type</th>\n",
       "      <th colspan=\"8\" halign=\"left\">qn</th>\n",
       "      <th colspan=\"5\" halign=\"left\">qn</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>units</th>\n",
       "      <th colspan=\"8\" halign=\"left\">mmHg</th>\n",
       "      <th colspan=\"5\" halign=\"left\">cc/min</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>description</th>\n",
       "      <th>220050</th>\n",
       "      <th>220179</th>\n",
       "      <th>224167</th>\n",
       "      <th>225309</th>\n",
       "      <th>227243</th>\n",
       "      <th>442</th>\n",
       "      <th>455</th>\n",
       "      <th>51</th>\n",
       "      <th>3313</th>\n",
       "      <th>3315</th>\n",
       "      <th>3317</th>\n",
       "      <th>3321</th>\n",
       "      <th>3323</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>15186.000000</td>\n",
       "      <td>21957.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>1251.000000</td>\n",
       "      <td>8.00000</td>\n",
       "      <td>17.000000</td>\n",
       "      <td>26478.000000</td>\n",
       "      <td>35616.000000</td>\n",
       "      <td>2241.000000</td>\n",
       "      <td>22.000000</td>\n",
       "      <td>16.00000</td>\n",
       "      <td>13.000000</td>\n",
       "      <td>14.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>118.988213</td>\n",
       "      <td>119.200483</td>\n",
       "      <td>104.416667</td>\n",
       "      <td>110.457234</td>\n",
       "      <td>111.87500</td>\n",
       "      <td>122.882353</td>\n",
       "      <td>119.150623</td>\n",
       "      <td>121.264628</td>\n",
       "      <td>70.371709</td>\n",
       "      <td>77.727273</td>\n",
       "      <td>76.68750</td>\n",
       "      <td>77.846154</td>\n",
       "      <td>75.714286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>23.366007</td>\n",
       "      <td>21.757748</td>\n",
       "      <td>15.559027</td>\n",
       "      <td>20.465707</td>\n",
       "      <td>19.05209</td>\n",
       "      <td>31.262762</td>\n",
       "      <td>23.206573</td>\n",
       "      <td>25.927916</td>\n",
       "      <td>12.969975</td>\n",
       "      <td>15.495566</td>\n",
       "      <td>17.72651</td>\n",
       "      <td>14.932326</td>\n",
       "      <td>14.514865</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>33.000000</td>\n",
       "      <td>86.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>84.00000</td>\n",
       "      <td>70.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>38.000000</td>\n",
       "      <td>54.000000</td>\n",
       "      <td>50.00000</td>\n",
       "      <td>58.000000</td>\n",
       "      <td>55.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>103.000000</td>\n",
       "      <td>104.000000</td>\n",
       "      <td>97.500000</td>\n",
       "      <td>99.000000</td>\n",
       "      <td>97.25000</td>\n",
       "      <td>96.000000</td>\n",
       "      <td>102.000000</td>\n",
       "      <td>104.000000</td>\n",
       "      <td>61.000000</td>\n",
       "      <td>66.250000</td>\n",
       "      <td>62.75000</td>\n",
       "      <td>67.000000</td>\n",
       "      <td>66.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>117.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>102.000000</td>\n",
       "      <td>108.000000</td>\n",
       "      <td>116.00000</td>\n",
       "      <td>125.000000</td>\n",
       "      <td>117.000000</td>\n",
       "      <td>119.000000</td>\n",
       "      <td>70.000000</td>\n",
       "      <td>76.000000</td>\n",
       "      <td>76.00000</td>\n",
       "      <td>75.000000</td>\n",
       "      <td>75.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>133.000000</td>\n",
       "      <td>133.000000</td>\n",
       "      <td>104.000000</td>\n",
       "      <td>120.000000</td>\n",
       "      <td>128.50000</td>\n",
       "      <td>138.000000</td>\n",
       "      <td>135.000000</td>\n",
       "      <td>137.000000</td>\n",
       "      <td>77.000000</td>\n",
       "      <td>86.750000</td>\n",
       "      <td>86.00000</td>\n",
       "      <td>84.000000</td>\n",
       "      <td>80.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>244.000000</td>\n",
       "      <td>247.000000</td>\n",
       "      <td>144.000000</td>\n",
       "      <td>273.000000</td>\n",
       "      <td>132.00000</td>\n",
       "      <td>174.000000</td>\n",
       "      <td>234.000000</td>\n",
       "      <td>284.000000</td>\n",
       "      <td>123.000000</td>\n",
       "      <td>112.000000</td>\n",
       "      <td>120.00000</td>\n",
       "      <td>108.000000</td>\n",
       "      <td>99.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "component     blood pressure systolic                                         \\\n",
       "status                          known                                          \n",
       "variable_type                      qn                                          \n",
       "units                            mmHg                                          \n",
       "description                    220050        220179      224167       225309   \n",
       "count                    15186.000000  21957.000000   12.000000  1251.000000   \n",
       "mean                       118.988213    119.200483  104.416667   110.457234   \n",
       "std                         23.366007     21.757748   15.559027    20.465707   \n",
       "min                          0.000000     33.000000   86.000000    23.000000   \n",
       "25%                        103.000000    104.000000   97.500000    99.000000   \n",
       "50%                        117.000000    117.000000  102.000000   108.000000   \n",
       "75%                        133.000000    133.000000  104.000000   120.000000   \n",
       "max                        244.000000    247.000000  144.000000   273.000000   \n",
       "\n",
       "component                                                                      \\\n",
       "status                                                                unknown   \n",
       "variable_type                                                              qn   \n",
       "units                                                                  cc/min   \n",
       "description       227243         442           455            51         3313   \n",
       "count            8.00000   17.000000  26478.000000  35616.000000  2241.000000   \n",
       "mean           111.87500  122.882353    119.150623    121.264628    70.371709   \n",
       "std             19.05209   31.262762     23.206573     25.927916    12.969975   \n",
       "min             84.00000   70.000000      0.000000      0.000000    38.000000   \n",
       "25%             97.25000   96.000000    102.000000    104.000000    61.000000   \n",
       "50%            116.00000  125.000000    117.000000    119.000000    70.000000   \n",
       "75%            128.50000  138.000000    135.000000    137.000000    77.000000   \n",
       "max            132.00000  174.000000    234.000000    284.000000   123.000000   \n",
       "\n",
       "component                                                    \n",
       "status                                                       \n",
       "variable_type                                                \n",
       "units                                                        \n",
       "description          3315       3317        3321       3323  \n",
       "count           22.000000   16.00000   13.000000  14.000000  \n",
       "mean            77.727273   76.68750   77.846154  75.714286  \n",
       "std             15.495566   17.72651   14.932326  14.514865  \n",
       "min             54.000000   50.00000   58.000000  55.000000  \n",
       "25%             66.250000   62.75000   67.000000  66.000000  \n",
       "50%             76.000000   76.00000   75.000000  75.500000  \n",
       "75%             86.750000   86.00000   84.000000  80.000000  \n",
       "max            112.000000  120.00000  108.000000  99.000000  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((103300, 1), (97527, 13), 469L, 2, '0.2094% records')\n",
      "(2017-06-20 10:04:19)<<<< DONE (5.0s)\n",
      "(2017-06-20 10:04:19)<< DONE (62.0s)\n",
      "(2017-06-20 10:04:19) DONE (70.0s)\n"
     ]
    }
   ],
   "source": [
    "n = 100\n",
    "hdf5_fname = do_ETL(data_dict,[data_dict.components.BLOOD_PRESSURE_SYSTOLIC],tag,n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-06-20 10:09:14) BEGIN ETL for all admissions\n",
      "(2017-06-20 10:09:15)>> Extract CONTEXT...\n",
      "(2017-06-20 10:09:27)<< DONE (12.0s)\n",
      "(2017-06-20 10:09:27)>> BLOOD PRESSURE SYSTOLIC\n",
      "(2017-06-20 10:09:27)>>>> Extracting...\n",
      "(2017-06-20 10:09:28)>>>>>> Extracting 14 items from chartevents\n",
      "51        2099353\n",
      "455       1586769\n",
      "220179    1290488\n",
      "220050    1149788\n",
      "3313       148105\n",
      "225309      86557\n",
      "3315         3762\n",
      "442          2565\n",
      "3317         2052\n",
      "3323         2039\n",
      "3321         2032\n",
      "224167        740\n",
      "227243        541\n",
      "6              33\n",
      "Name: itemid, dtype: int64\n",
      "              id            datetime   value units  itemid\n",
      "963709  136796.0 2139-10-06 21:00:00  113/55             6\n",
      "963711  136796.0 2139-10-06 22:00:00  126/58             6\n",
      "963713  136796.0 2139-10-06 23:00:00  139/64             6\n",
      "964245  136796.0 2139-10-07 07:00:00  132/59             6\n",
      "964247  136796.0 2139-10-07 08:00:00  143/63             6\n",
      "964301  136796.0 2139-10-07 16:00:00  152/68             6\n",
      "964304  136796.0 2139-10-07 18:00:00  141/59             6\n",
      "964710  136796.0 2139-10-07 09:00:00  133/59             6\n",
      "964743  136796.0 2139-10-07 10:00:00  131/60             6\n",
      "965088  136796.0 2139-10-07 11:00:00  123/54             6\n",
      "965090  136796.0 2139-10-07 12:00:00  185/79             6\n",
      "965092  136796.0 2139-10-07 13:00:00  116/54             6\n",
      "965094  136796.0 2139-10-07 14:00:00  122/57             6\n",
      "965096  136796.0 2139-10-07 15:00:00  120/56             6\n",
      "965438  136796.0 2139-10-06 13:00:00  122/55             6\n",
      "965440  136796.0 2139-10-06 14:00:00  110/56             6\n",
      "965854  136796.0 2139-10-06 18:00:00  108/56             6\n",
      "965856  136796.0 2139-10-06 20:00:00  115/56             6\n",
      "966407  136796.0 2139-10-07 03:00:00  129/57             6\n",
      "966409  136796.0 2139-10-07 04:00:00  121/55             6\n",
      "966411  136796.0 2139-10-07 05:00:00  125/56             6\n",
      "968122  136796.0 2139-10-06 09:00:00  119/55             6\n",
      "968125  136796.0 2139-10-06 12:00:00  113/60             6\n",
      "968615  136796.0 2139-10-07 00:00:00  120/54             6\n",
      "968617  136796.0 2139-10-07 01:00:00  130/58             6\n",
      "968619  136796.0 2139-10-07 02:00:00  124/56             6\n"
     ]
    },
    {
     "ename": "KeyError",
     "evalue": "0L",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-44-88e79271be02>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconstants\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mALL\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mhdf5_fname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdo_ETL\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomponents\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBLOOD_PRESSURE_SYSTOLIC\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'bp'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-37-093e315c4cfe>\u001b[0m in \u001b[0;36mdo_ETL\u001b[1;34m(data_dict, components, tag, n, seed)\u001b[0m\n\u001b[0;32m      2\u001b[0m     \u001b[0mhdf5_fname\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'data/mimic_{}_{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtag\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mhadm_ids\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mn\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mn\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mconstants\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mALL\u001b[0m \u001b[1;32melse\u001b[0m \u001b[0mmimic\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msample_hadm_ids\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mn\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mseed\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0mmimic_ETL\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcomponents\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhdf5_fname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhadm_ids\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mhadm_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      5\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mhdf5_fname\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-36-1619d6fa19c9>\u001b[0m in \u001b[0;36mmimic_ETL\u001b[1;34m(components, data_dict, hdf5_fname, hadm_ids, agg_func)\u001b[0m\n\u001b[0;32m     35\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m         \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlog\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Extracting...\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mnew_level\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 37\u001b[1;33m         \u001b[0mdf_extracted\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mextractor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mextract_component\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhadm_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     38\u001b[0m         \u001b[0mutils\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave_df\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_extracted\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhdf5_fname\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'extracted/{}'\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     39\u001b[0m         \u001b[0mlogger\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mend_log_level\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\icu_ml_project\\v5\\mimic.py\u001b[0m in \u001b[0;36mextract_component\u001b[1;34m(self, component, hadm_ids)\u001b[0m\n\u001b[0;32m     95\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     96\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mextract_component\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhadm_ids\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mALL\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 97\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mextract_component\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconn\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcomponent\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitem_map\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mhadm_ids\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     98\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\icu_ml_project\\v5\\mimic.py\u001b[0m in \u001b[0;36mextract_component\u001b[1;34m(mimic_conn, component, item_map, data_dict, hadm_ids)\u001b[0m\n\u001b[0;32m    132\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mbp_slice\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    133\u001b[0m                     \u001b[1;32mif\u001b[0m \u001b[0mcomponent\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomponents\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBLOOD_PRESSURE_SYSTOLIC\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 134\u001b[1;33m                         \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mis_item_6\u001b[0m \u001b[1;33m&\u001b[0m \u001b[0mhas_slash\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'value'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mis_item_6\u001b[0m \u001b[1;33m&\u001b[0m \u001b[0mhas_slash\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'value'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'/'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    135\u001b[0m                     \u001b[1;32melif\u001b[0m \u001b[0mcomponent\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdata_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcomponents\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBLOOD_PRESSURE_DIASTOLIC\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    136\u001b[0m                         \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mhas_slash\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'value'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mhas_slash\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'value'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'/'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\core\\series.pyc\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m    599\u001b[0m         \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_apply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    600\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 601\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    602\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    603\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mC:\\Users\\genkinjz\\AppData\\Local\\Continuum\\Anaconda2\\lib\\site-packages\\pandas\\indexes\\base.pyc\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m   2137\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2138\u001b[0m             return self._engine.get_value(s, k,\n\u001b[1;32m-> 2139\u001b[1;33m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m   2140\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2141\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minferred_type\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'integer'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'boolean'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas\\index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_value (pandas\\index.c:3338)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_value (pandas\\index.c:3041)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\index.pyx\u001b[0m in \u001b[0;36mpandas.index.IndexEngine.get_loc (pandas\\index.c:4024)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\src\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas.hashtable.Int64HashTable.get_item (pandas\\hashtable.c:8141)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas\\src\\hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas.hashtable.Int64HashTable.get_item (pandas\\hashtable.c:8085)\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 0L"
     ]
    }
   ],
   "source": [
    "n = constants.ALL\n",
    "hdf5_fname = do_ETL(data_dict,[data_dict.components.BLOOD_PRESSURE_SYSTOLIC],'bp',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<class 'pandas.io.pytables.HDFStore'>\n",
       "File path: data/mimic_simple_all\n",
       "/cleaned/blood pressure diastolic                        frame                            \n",
       "/cleaned/blood pressure mean                             frame                            \n",
       "/cleaned/blood pressure systolic                         frame                            \n",
       "/cleaned/glasgow coma scale eye opening                  frame                            \n",
       "/cleaned/glasgow coma scale motor                        frame                            \n",
       "/cleaned/glasgow coma scale verbal                       frame                            \n",
       "/cleaned/heart rate                                      frame                            \n",
       "/cleaned/hemoglobin                                      frame                            \n",
       "/cleaned/lactate                                         frame                            \n",
       "/cleaned/lactated ringers                                frame                            \n",
       "/cleaned/norepinephrine                                  frame                            \n",
       "/cleaned/normal saline                                   frame                            \n",
       "/cleaned/output urine                                    frame                            \n",
       "/cleaned/oxygen saturation pulse oximetry                frame                            \n",
       "/cleaned/respiratory rate                                frame                            \n",
       "/cleaned/temperature body                                frame                            \n",
       "/cleaned/vasopressin                                     frame                            \n",
       "/cleaned/weight body                                     frame                            \n",
       "/context                                                 frame        (shape->[62722,22]) \n",
       "/extracted/blood pressure diastolic                      frame        (shape->[6371249,5])\n",
       "/extracted/blood pressure mean                           frame        (shape->[2536271,5])\n",
       "/extracted/blood pressure systolic                       frame        (shape->[6374824,5])\n",
       "/extracted/glasgow coma scale eye opening                frame        (shape->[956672,5]) \n",
       "/extracted/glasgow coma scale motor                      frame        (shape->[952565,5]) \n",
       "/extracted/glasgow coma scale verbal                     frame        (shape->[954700,5]) \n",
       "/extracted/heart rate                                    frame        (shape->[7952939,5])\n",
       "/extracted/hemoglobin                                    frame        (shape->[1167921,5])\n",
       "/extracted/lactate                                       frame        (shape->[393608,5]) \n",
       "/extracted/lactated ringers                              frame        (shape->[504306,5]) \n",
       "/extracted/norepinephrine                                frame        (shape->[1136938,5])\n",
       "/extracted/normal saline                                 frame        (shape->[817373,5]) \n",
       "/extracted/output urine                                  frame        (shape->[3644639,5])\n",
       "/extracted/oxygen saturation pulse oximetry              frame        (shape->[6099827,5])\n",
       "/extracted/respiratory rate                              frame        (shape->[7810019,5])\n",
       "/extracted/temperature body                              frame        (shape->[1751447,5])\n",
       "/extracted/vasopressin                                   frame        (shape->[339184,5]) \n",
       "/extracted/weight body                                   frame        (shape->[95425,5])  \n",
       "/transformed/blood pressure diastolic                    frame                            \n",
       "/transformed/blood pressure mean                         frame                            \n",
       "/transformed/blood pressure systolic                     frame                            \n",
       "/transformed/glasgow coma scale eye opening              frame                            \n",
       "/transformed/glasgow coma scale motor                    frame                            \n",
       "/transformed/glasgow coma scale verbal                   frame                            \n",
       "/transformed/heart rate                                  frame                            \n",
       "/transformed/hemoglobin                                  frame                            \n",
       "/transformed/lactate                                     frame                            \n",
       "/transformed/lactated ringers                            frame                            \n",
       "/transformed/norepinephrine                              frame                            \n",
       "/transformed/normal saline                               frame                            \n",
       "/transformed/output urine                                frame                            \n",
       "/transformed/oxygen saturation pulse oximetry            frame                            \n",
       "/transformed/respiratory rate                            frame                            \n",
       "/transformed/temperature body                            frame                            \n",
       "/transformed/vasopressin                                 frame                            \n",
       "/transformed/weight body                                 frame                            "
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "store = pd.HDFStore(hdf5_fname)\n",
    "store"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "store.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "import mimic\n",
    "import icu_data_defs\n",
    "import transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')\n",
    "components = data_dict.get_panel_defintions(1).component.unique().tolist() #1 is vitals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[u'heart rate',\n",
       " u'blood pressure systolic',\n",
       " u'blood pressure diastolic',\n",
       " u'blood pressure mean',\n",
       " u'respiratory rate',\n",
       " u'temperature body',\n",
       " u'oxygen saturation pulse oximetry',\n",
       " u'weight body']"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2017-07-04 22:28:15)<<<<<<<< --- (25.0s)\n",
      "(2017-07-04 22:28:15)>>>>>>>> ***ETL***\n",
      "(2017-07-04 22:28:15)>>>>>>>>>> SETUP\n",
      "(2017-07-04 22:28:15)<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:28:15)>>>>>>>>>> BEGIN ETL for all admissions and 8 components: [u'heart rate', u'blood pressure systolic', u'blood pressure diastolic', u'blood pressure mean', u'respiratory rate', u'temperature body', u'oxygen saturation pulse oximetry', u'weight body']\n",
      "(2017-07-04 22:28:15)>>>>>>>>>>>> HEART RATE: 1/8\n",
      "(2017-07-04 22:28:15)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 22:28:16)>>>>>>>>>>>>>>>> Extracting 5 items from chartevents\n",
      "(2017-07-04 22:29:12)<<<<<<<<<<<<<<<< --- (56.0s)\n",
      "(2017-07-04 22:29:12)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 22:29:13)<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:29:13)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 22:29:33)<<<<<<<<<<<<<<<< --- (20.0s)\n",
      "(2017-07-04 22:29:33)<<<<<<<<<<<<<< --- (78.0s)\n",
      "(2017-07-04 22:29:33)>>>>>>>>>>>>>> Transforming... (7952939, 5)\n",
      "Data Loss (Extract > Transformed): ((7952939, 1), (7923711, 6), 29066L, 171, '0.3015% records')\n",
      "(2017-07-04 22:31:25)<<<<<<<<<<<<<< --- (112.0s)\n",
      "(2017-07-04 22:31:25)>>>>>>>>>>>>>> Cleaning... (7923711, 6)\n",
      "(2017-07-04 22:32:14)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 22:32:14)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:32:14)>>>>>>>>>>>>>>>> Drop OOB data | (7922986, 6)\n",
      "(2017-07-04 22:32:18)>>>>>>>>>>>>>>>>>> heart rate, beats/min, 7923117\n",
      "(2017-07-04 22:33:29)<<<<<<<<<<<<<<<<<< --- (71.0s)\n",
      "(2017-07-04 22:33:29)>>>>>>>>>>>>>>>>>> heart rate, no_units, 31\n",
      "(2017-07-04 22:33:29)<<<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:33:29)<<<<<<<<<<<<<<<< --- (75.0s)\n",
      "Data Loss (Extract > Cleaned): ((7952939, 1), (7922986, 6), 29804L, 171, '0.3015% records')\n",
      "(2017-07-04 22:33:31)<<<<<<<<<<<<<< --- (126.0s)\n",
      "(2017-07-04 22:33:31)>>>>>>>>>>>>>> Filter & sort - (7922986, 6)\n",
      "(2017-07-04 22:33:35)<<<<<<<<<<<<<< --- (4.0s)\n",
      "(2017-07-04 22:33:35)>>>>>>>>>>>>>> Convert to dask - (7922986, 6)\n",
      "(2017-07-04 22:33:36)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:33:36)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 22:33:36)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:33:36)<<<<<<<<<<<< --- (321.0s)\n",
      "(2017-07-04 22:33:36)>>>>>>>>>>>> BLOOD PRESSURE SYSTOLIC: 2/8\n",
      "(2017-07-04 22:33:36)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 22:33:36)>>>>>>>>>>>>>>>> Extracting 14 items from chartevents\n",
      "(2017-07-04 22:35:06)<<<<<<<<<<<<<<<< --- (90.0s)\n",
      "(2017-07-04 22:35:06)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 22:35:06)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:35:06)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 22:35:10)<<<<<<<<<<<<<<<< --- (4.0s)\n",
      "(2017-07-04 22:35:10)<<<<<<<<<<<<<< --- (94.0s)\n",
      "(2017-07-04 22:35:10)>>>>>>>>>>>>>> Transforming... (6374824, 5)\n",
      "Data Loss (Extract > Transformed): ((6374824, 1), (5974719, 15), 43236L, 174, '0.307% records')\n",
      "(2017-07-04 22:36:52)<<<<<<<<<<<<<< --- (102.0s)\n",
      "(2017-07-04 22:36:52)>>>>>>>>>>>>>> Cleaning... (5974719, 15)\n",
      "(2017-07-04 22:42:30)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 22:42:31)<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:42:31)>>>>>>>>>>>>>>>> Drop OOB data | (5974186, 14)\n",
      "(2017-07-04 22:42:34)>>>>>>>>>>>>>>>>>> blood pressure systolic, mmHg, 6177439\n",
      "(2017-07-04 22:45:18)<<<<<<<<<<<<<<<<<< --- (164.0s)\n",
      "(2017-07-04 22:45:18)>>>>>>>>>>>>>>>>>> blood pressure systolic, cc/min, 153573\n",
      "(2017-07-04 22:45:19)<<<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:45:19)<<<<<<<<<<<<<<<< --- (168.0s)\n",
      "Data Loss (Extract > Cleaned): ((6374824, 1), (5974186, 14), 43848L, 174, '0.307% records')\n",
      "(2017-07-04 22:45:20)<<<<<<<<<<<<<< --- (508.0s)\n",
      "(2017-07-04 22:45:20)>>>>>>>>>>>>>> Filter & sort - (5974186, 14)\n",
      "(2017-07-04 22:45:23)<<<<<<<<<<<<<< --- (3.0s)\n",
      "(2017-07-04 22:45:23)>>>>>>>>>>>>>> Convert to dask - (5974186, 14)\n",
      "(2017-07-04 22:45:25)<<<<<<<<<<<<<< --- (2.0s)\n",
      "(2017-07-04 22:45:25)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 22:45:25)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:45:25)<<<<<<<<<<<< --- (709.0s)\n",
      "(2017-07-04 22:45:25)>>>>>>>>>>>> BLOOD PRESSURE DIASTOLIC: 3/8\n",
      "(2017-07-04 22:45:25)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 22:45:25)>>>>>>>>>>>>>>>> Extracting 16 items from chartevents\n",
      "(2017-07-04 22:48:00)<<<<<<<<<<<<<<<< --- (155.0s)\n",
      "(2017-07-04 22:48:00)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 22:48:00)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:48:00)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 22:48:06)<<<<<<<<<<<<<<<< --- (6.0s)\n",
      "(2017-07-04 22:48:06)<<<<<<<<<<<<<< --- (161.0s)\n",
      "(2017-07-04 22:48:06)>>>>>>>>>>>>>> Transforming... (6371282, 5)\n",
      "Data Loss (Extract > Transformed): ((6371282, 1), (5976845, 17), 24410L, 170, '0.2999% records')\n",
      "(2017-07-04 22:49:52)<<<<<<<<<<<<<< --- (106.0s)\n",
      "(2017-07-04 22:49:52)>>>>>>>>>>>>>> Cleaning... (5976845, 17)\n",
      "(2017-07-04 22:56:05)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 22:56:06)<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:56:06)>>>>>>>>>>>>>>>> Drop OOB data | (5976313, 16)\n",
      "(2017-07-04 22:56:10)>>>>>>>>>>>>>>>>>> blood pressure diastolic, mmHg, 6194656\n",
      "(2017-07-04 22:59:39)<<<<<<<<<<<<<<<<<< --- (209.0s)\n",
      "(2017-07-04 22:59:39)>>>>>>>>>>>>>>>>>> blood pressure diastolic, cc/min, 151640\n",
      "(2017-07-04 22:59:39)<<<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:59:39)<<<<<<<<<<<<<<<< --- (213.0s)\n",
      "Data Loss (Extract > Cleaned): ((6371282, 1), (5976313, 16), 25238L, 170, '0.2999% records')\n",
      "(2017-07-04 22:59:41)<<<<<<<<<<<<<< --- (589.0s)\n",
      "(2017-07-04 22:59:41)>>>>>>>>>>>>>> Filter & sort - (5976313, 16)\n",
      "(2017-07-04 22:59:45)<<<<<<<<<<<<<< --- (4.0s)\n",
      "(2017-07-04 22:59:45)>>>>>>>>>>>>>> Convert to dask - (5976313, 16)\n",
      "(2017-07-04 22:59:46)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 22:59:46)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 22:59:46)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 22:59:46)<<<<<<<<<<<< --- (861.0s)\n",
      "(2017-07-04 22:59:46)>>>>>>>>>>>> BLOOD PRESSURE MEAN: 4/8\n",
      "(2017-07-04 22:59:46)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 22:59:46)>>>>>>>>>>>>>>>> Extracting 3 items from chartevents\n",
      "(2017-07-04 23:00:18)<<<<<<<<<<<<<<<< --- (32.0s)\n",
      "(2017-07-04 23:00:18)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 23:00:19)<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:00:19)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 23:00:20)<<<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:00:20)<<<<<<<<<<<<<< --- (34.0s)\n",
      "(2017-07-04 23:00:20)>>>>>>>>>>>>>> Transforming... (2536271, 5)\n",
      "Data Loss (Extract > Transformed): ((2536271, 1), (2416029, 3), 0L, 0, '0.0% records')\n",
      "(2017-07-04 23:01:04)<<<<<<<<<<<<<< --- (44.0s)\n",
      "(2017-07-04 23:01:04)>>>>>>>>>>>>>> Cleaning... (2416029, 3)\n",
      "(2017-07-04 23:01:19)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 23:01:19)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:01:19)>>>>>>>>>>>>>>>> Drop OOB data | (2415995, 3)\n",
      "(2017-07-04 23:01:20)>>>>>>>>>>>>>>>>>> blood pressure mean, mmHg, 2536236\n",
      "(2017-07-04 23:01:55)<<<<<<<<<<<<<<<<<< --- (35.0s)\n",
      "(2017-07-04 23:01:55)<<<<<<<<<<<<<<<< --- (36.0s)\n",
      "Data Loss (Extract > Cleaned): ((2536271, 1), (2415995, 3), 1873L, 0, '0.0% records')\n",
      "(2017-07-04 23:01:55)<<<<<<<<<<<<<< --- (51.0s)\n",
      "(2017-07-04 23:01:55)>>>>>>>>>>>>>> Filter & sort - (2415995, 3)\n",
      "(2017-07-04 23:01:56)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:01:56)>>>>>>>>>>>>>> Convert to dask - (2415995, 3)\n",
      "(2017-07-04 23:01:57)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:01:57)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 23:01:57)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:01:57)<<<<<<<<<<<< --- (131.0s)\n",
      "(2017-07-04 23:01:57)>>>>>>>>>>>> RESPIRATORY RATE: 5/8\n",
      "(2017-07-04 23:01:57)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 23:01:57)>>>>>>>>>>>>>>>> Extracting 4 items from chartevents\n",
      "(2017-07-04 23:06:09)<<<<<<<<<<<<<<<< --- (252.0s)\n",
      "(2017-07-04 23:06:09)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 23:06:09)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:06:09)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 23:06:24)<<<<<<<<<<<<<<<< --- (15.0s)\n",
      "(2017-07-04 23:06:24)<<<<<<<<<<<<<< --- (267.0s)\n",
      "(2017-07-04 23:06:24)>>>>>>>>>>>>>> Transforming... (7810019, 5)\n",
      "Data Loss (Extract > Transformed): ((7810019, 1), (7780717, 5), 28707L, 172, '0.3035% records')\n",
      "(2017-07-04 23:08:13)<<<<<<<<<<<<<< --- (109.0s)\n",
      "(2017-07-04 23:08:13)>>>>>>>>>>>>>> Cleaning... (7780717, 5)\n",
      "(2017-07-04 23:11:07)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 23:11:07)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:11:07)>>>>>>>>>>>>>>>> Drop OOB data | (7780015, 4)\n",
      "(2017-07-04 23:11:11)>>>>>>>>>>>>>>>>>> respiratory rate, insp/min, 6108262\n",
      "(2017-07-04 23:12:08)<<<<<<<<<<<<<<<<<< --- (57.0s)\n",
      "(2017-07-04 23:12:08)>>>>>>>>>>>>>>>>>> respiratory rate, Breath, 1671901\n",
      "(2017-07-04 23:12:08)<<<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:12:08)>>>>>>>>>>>>>>>>>> respiratory rate, no_units, 2\n",
      "(2017-07-04 23:12:08)<<<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:12:08)<<<<<<<<<<<<<<<< --- (61.0s)\n",
      "Data Loss (Extract > Cleaned): ((7810019, 1), (7780015, 4), 29907L, 172, '0.3035% records')\n",
      "(2017-07-04 23:12:09)<<<<<<<<<<<<<< --- (236.0s)\n",
      "(2017-07-04 23:12:10)>>>>>>>>>>>>>> Filter & sort - (7780015, 4)\n",
      "(2017-07-04 23:12:14)<<<<<<<<<<<<<< --- (4.0s)\n",
      "(2017-07-04 23:12:14)>>>>>>>>>>>>>> Convert to dask - (7780015, 4)\n",
      "(2017-07-04 23:12:14)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:12:14)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 23:12:14)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:12:14)<<<<<<<<<<<< --- (617.0s)\n",
      "(2017-07-04 23:12:14)>>>>>>>>>>>> TEMPERATURE BODY: 6/8\n",
      "(2017-07-04 23:12:15)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 23:12:15)>>>>>>>>>>>>>>>> Extracting 4 items from chartevents\n",
      "(2017-07-04 23:13:06)<<<<<<<<<<<<<<<< --- (51.0s)\n",
      "(2017-07-04 23:13:06)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 23:13:06)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:13:06)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 23:13:11)<<<<<<<<<<<<<<<< --- (5.0s)\n",
      "(2017-07-04 23:13:11)<<<<<<<<<<<<<< --- (56.0s)\n",
      "(2017-07-04 23:13:11)>>>>>>>>>>>>>> Transforming... (1751447, 5)\n",
      "Data Loss (Extract > Transformed): ((1751447, 1), (1731875, 4), 16612L, 156, '0.3189% records')\n",
      "(2017-07-04 23:13:42)<<<<<<<<<<<<<< --- (31.0s)\n",
      "(2017-07-04 23:13:42)>>>>>>>>>>>>>> Cleaning... (1731875, 4)\n",
      "(2017-07-04 23:13:54)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 23:13:54)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:13:54)>>>>>>>>>>>>>>>> Drop OOB data | (1731794, 4)\n",
      "(2017-07-04 23:13:55)>>>>>>>>>>>>>>>>>> temperature body, degF, 1734754\n",
      "(2017-07-04 23:14:24)<<<<<<<<<<<<<<<<<< --- (29.0s)\n",
      "(2017-07-04 23:14:24)<<<<<<<<<<<<<<<< --- (30.0s)\n",
      "Data Loss (Extract > Cleaned): ((1751447, 1), (1731794, 4), 17226L, 156, '0.3189% records')\n",
      "(2017-07-04 23:14:24)<<<<<<<<<<<<<< --- (42.0s)\n",
      "(2017-07-04 23:14:24)>>>>>>>>>>>>>> Filter & sort - (1731794, 4)\n",
      "(2017-07-04 23:14:25)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:14:25)>>>>>>>>>>>>>> Convert to dask - (1731794, 4)\n",
      "(2017-07-04 23:14:25)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:14:25)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 23:14:25)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:14:25)<<<<<<<<<<<< --- (131.0s)\n",
      "(2017-07-04 23:14:25)>>>>>>>>>>>> OXYGEN SATURATION PULSE OXIMETRY: 7/8\n",
      "(2017-07-04 23:14:25)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 23:14:25)>>>>>>>>>>>>>>>> Extracting 2 items from chartevents\n",
      "(2017-07-04 23:15:41)<<<<<<<<<<<<<<<< --- (76.0s)\n",
      "(2017-07-04 23:15:41)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 23:15:41)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:15:41)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 23:15:58)<<<<<<<<<<<<<<<< --- (17.0s)\n",
      "(2017-07-04 23:15:58)<<<<<<<<<<<<<< --- (93.0s)\n",
      "(2017-07-04 23:15:58)>>>>>>>>>>>>>> Transforming... (6099827, 5)\n",
      "Data Loss (Extract > Transformed): ((6099827, 1), (6073540, 2), 26134L, 163, '0.3326% records')\n",
      "(2017-07-04 23:17:28)<<<<<<<<<<<<<< --- (90.0s)\n",
      "(2017-07-04 23:17:28)>>>>>>>>>>>>>> Cleaning... (6073540, 2)\n",
      "(2017-07-04 23:18:01)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 23:18:01)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:18:01)>>>>>>>>>>>>>>>> Drop OOB data | (6073019, 2)\n",
      "(2017-07-04 23:18:03)>>>>>>>>>>>>>>>>>> oxygen saturation pulse oximetry, percent, 6073172\n",
      "(2017-07-04 23:18:54)<<<<<<<<<<<<<<<<<< --- (51.0s)\n",
      "(2017-07-04 23:18:54)<<<<<<<<<<<<<<<< --- (53.0s)\n",
      "Data Loss (Extract > Cleaned): ((6099827, 1), (6073019, 2), 26707L, 163, '0.3326% records')\n",
      "(2017-07-04 23:18:55)<<<<<<<<<<<<<< --- (87.0s)\n",
      "(2017-07-04 23:18:56)>>>>>>>>>>>>>> Filter & sort - (6073019, 2)\n",
      "(2017-07-04 23:18:58)<<<<<<<<<<<<<< --- (2.0s)\n",
      "(2017-07-04 23:18:58)>>>>>>>>>>>>>> Convert to dask - (6073019, 2)\n",
      "(2017-07-04 23:18:59)<<<<<<<<<<<<<< --- (1.0s)\n",
      "(2017-07-04 23:18:59)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 23:18:59)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:18:59)<<<<<<<<<<<< --- (274.0s)\n",
      "(2017-07-04 23:18:59)>>>>>>>>>>>> WEIGHT BODY: 8/8\n",
      "(2017-07-04 23:18:59)>>>>>>>>>>>>>> Extracting...\n",
      "(2017-07-04 23:18:59)>>>>>>>>>>>>>>>> Extracting 3 items from chartevents\n",
      "(2017-07-04 23:19:34)<<<<<<<<<<<<<<<< --- (35.0s)\n",
      "(2017-07-04 23:19:34)>>>>>>>>>>>>>>>> Combine DF\n",
      "(2017-07-04 23:19:34)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:34)>>>>>>>>>>>>>>>> Clean UOM\n",
      "(2017-07-04 23:19:34)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:34)<<<<<<<<<<<<<< --- (35.0s)\n",
      "(2017-07-04 23:19:34)>>>>>>>>>>>>>> Transforming... (95425, 5)\n",
      "Data Loss (Extract > Transformed): ((95425, 1), (94484, 3), 941L, 158, '0.4958% records')\n",
      "(2017-07-04 23:19:36)<<<<<<<<<<<<<< --- (2.0s)\n",
      "(2017-07-04 23:19:36)>>>>>>>>>>>>>> Cleaning... (94484, 3)\n",
      "(2017-07-04 23:19:37)>>>>>>>>>>>>>>>> Nominal to OneHot\n",
      "(2017-07-04 23:19:37)<<<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:37)>>>>>>>>>>>>>>>> Drop OOB data | (94457, 3)\n",
      "(2017-07-04 23:19:37)>>>>>>>>>>>>>>>>>> weight body, kg, 94457\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<<<<<< --- (5.0s)\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<<<< --- (5.0s)\n",
      "Data Loss (Extract > Cleaned): ((95425, 1), (94457, 3), 979L, 158, '0.4958% records')\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<< --- (6.0s)\n",
      "(2017-07-04 23:19:42)>>>>>>>>>>>>>> Filter & sort - (94457, 3)\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:42)>>>>>>>>>>>>>> Convert to dask - (94457, 3)\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:42)>>>>>>>>>>>>>> Join to big DF\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<<<< --- (0.0s)\n",
      "(2017-07-04 23:19:42)<<<<<<<<<<<< --- (43.0s)\n",
      "(2017-07-04 23:19:42)<<<<<<<<<< --- (3087.0s)\n",
      "(2017-07-04 23:19:42)<<<<<<<< --- (3087.0s)\n"
     ]
    }
   ],
   "source": [
    "df_all = mimic.ETL(mimic.mimic_extractor('config/mimic_item_map.csv',data_dict),\n",
    "                   components,\n",
    "                   data_dict,\n",
    "                   transformers.same_index_aggregator(agg_func=lambda x:x.iloc[0]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get all of the data we want and join into a single DF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import utils\n",
    "import logger\n",
    "import transformers\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "def get_big_df(hdf5_fname,components):\n",
    "    \n",
    "    df_all = None\n",
    "    logger.log('Make DF for {} components...\\n{}'.format(len(components),'\\n'.join(components)),new_level=True)\n",
    "    for component in components:\n",
    "        logger.log('{}: {}/{}'.format(component.upper(),components.index(component)+1,len(components)),new_level=True)\n",
    "\n",
    "        logger.log('Opening...')\n",
    "        df = utils.open_df(hdf5_fname,'cleaned/{}'.format(component)).sort_index(axis=1).sort_index()\n",
    "        display(df.describe(include='all'))\n",
    "\n",
    "        df_cleaned = transformers.remove_small_columns(threshold=5).fit_transform(df)\n",
    "        \n",
    "        display(df_cleaned.describe(include='all'))\n",
    "\n",
    "        print utils.data_loss(df,df_cleaned)\n",
    "        \n",
    "        logger.log('Join {} to {}'.format(df_cleaned.shape, None if df_all is None else df_all.shape))\n",
    "        if df_all is None: df_all = df_cleaned\n",
    "        else : \n",
    "            df_all = df_all.join(df_cleaned,how='outer')\n",
    "            del df,df_cleaned\n",
    "        \n",
    "        logger.end_log_level()\n",
    "    logger.end_log()\n",
    "\n",
    "    return df_all"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import icu_data_defs\n",
    "data_dict = icu_data_defs.data_dictionary('config/data_definitions.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "components = data_dict.get_panel_defintions(12).component.unique().tolist()\n",
    "components"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#everything up to urine output\n",
    "df1 = get_big_df('data/mimic_data',components[:8])\n",
    "utils.save_df(df1,'data/mimic_data','cleaned/part1')\n",
    "\n",
    "#urine output and forward\n",
    "df2 = get_big_df('data/mimic_data',components[8:])\n",
    "utils.save_df(df2,'data/mimic_data','cleaned/part2')\n",
    "\n",
    "df_combined = df1.join(df2,how='outer')\n",
    "\n",
    "del df1,df2\n",
    "df_all.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "utils.save_df(df_combined,'data/mimic_data','cleaned/all')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Start here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "def summable_filter(df):\n",
    "    ureg = units.MedicalUreg()\n",
    "    filter_func= lambda x: (ureg.is_volume(str(x[-2])) or ureg.is_mass(str(x[-2]))) and (x[0] != data_dict.labels.WEIGHT_BODY)\n",
    "    return df.loc[:,df.columns.map(filter_func)]\n",
    "\n",
    "def lactate_filter_admissions(hdf5_fname)\n",
    "\n",
    "def make_lactate_labels(hdf5_fname,custom_cleaners,data_dict):\n",
    "    df = utils.open_df(hdf5_fname,'cleaned/{}'.format(data_dict.labels.LACTATE))\n",
    "    df_cleaned = custom_cleaners.transform(df)\n",
    "    max_col_cleaner = transformers.max_col_only()\n",
    "    df_cleaned = max_col_cleaner.transform(df_cleaned)\n",
    "    df_cleaned.groupby(level=constants.column_names.ID).agg(lambda x: x.iloc[])\n",
    "    \n",
    "    \n",
    "\n",
    "basic_feature_tuples = [\n",
    "    ('MEAN',features.segment_mean(),constants.ALL),\n",
    "    ('STD',features.segment_std(),constants.ALL),\n",
    "    ('COUNT',features.segment_count(),constants.ALL),\n",
    "    ('LAST',features.segment_last(),constants.ALL),\n",
    "    ('SUM',features.segment_sum(),{constants.CUSTOM_FILTER:summable_filter})\n",
    "]\n",
    "\n",
    "custom_cleaners = Pipeline([\n",
    "        ('drop_oob_values',transformers.oob_value_remover(data_dict)),\n",
    "        ('drop_small_columns',transformers.remove_small_columns(threshold=50)),\n",
    "        ('combine_like_columns',transformers.combine_like_cols()),\n",
    "#         ('quantitative_only',transformers.filter_var_type(var_types_to_keep)),\n",
    "#         ('known_col_only',transformers.known_col_only()),\n",
    "    ])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "lactate_labels = make_lactate_labels(hdf5_fname,custom_cleaners)\n",
    "\n",
    "df_features = mimic_features(hdf5_fname,'basic_all_before',labels,custom_cleaners)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}