imminent-adm-mimic / Git / [3f1788] /analysis/data

Models:
RaymondKing/
imminent-adm-mimic
Downloads: 1
[3f1788]: / analysis / data_analysis.ipynb
History
Download this file
1423 lines (1422 with data), 41.0 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset Preparation for Prediction of Imminent ICU Admission and Prolonged Stay"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports & Inits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-26T18:54:33.858025Z",
     "start_time": "2019-08-26T18:54:33.683791Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-26T18:54:33.919510Z",
     "start_time": "2019-08-26T18:54:33.887213Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'workdir': PosixPath('../data/workdir'),\n",
       " 'figdir': PosixPath('../data/results/figures'),\n",
       " 'resultdir': PosixPath('../data/results'),\n",
       " 'dataset_csv': PosixPath('../data/proc_dataset.csv'),\n",
       " 'imminent_adm_cols': ['hadm_id', 'processed_note', 'imminent_adm_label'],\n",
       " 'prolonged_stay_cols': ['hadm_id', 'processed_note', 'prolonged_stay_label'],\n",
       " 'cols': ['hadm_id',\n",
       "  'imminent_adm_label',\n",
       "  'prolonged_stay_label',\n",
       "  'processed_note',\n",
       "  'charttime',\n",
       "  'intime',\n",
       "  'chartinterval'],\n",
       " 'dates': ['charttime', 'intime'],\n",
       " 'ia_thresh': {'lr': 0.45, 'rf': 0.27, 'gbm': 0.435, 'mlp': 0.2},\n",
       " 'ps_thresh': {'lr': 0.39, 'rf': 0.36, 'gbm': 0.324, 'mlp': 0.27}}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "import math\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import spacy\n",
    "\n",
    "import seaborn as sns\n",
    "sns.set(style = 'darkgrid')\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "from scipy import stats\n",
    "from pathlib import Path\n",
    "\n",
    "from utils.splits import set_group_splits\n",
    "from args import args\n",
    "vars(args)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "heading_collapsed": true
   },
   "source": [
    "## Stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:48:22.330446Z",
     "start_time": "2019-07-17T18:48:17.056668Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(args.dataset_csv)\n",
    "ia_df = df.loc[(df['imminent_adm_label'] != -1)][args.imminent_adm_cols].reset_index(drop=True)\n",
    "ps_df = ori_df.loc[(ori_df['chartinterval'] != 0)][args.prolonged_stay_cols].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:48:22.402048Z",
     "start_time": "2019-07-17T18:48:22.333813Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df['subject_id'].nunique(), df['hadm_id'].nunique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:48:29.519915Z",
     "start_time": "2019-07-17T18:48:29.443575Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "ages = df.groupby(['subject_id'])[['admission_age']].first().to_numpy().reshape(-1)\n",
    "ages[ages>100] = 100\n",
    "print(f\"Median age: {ages.mean():0.1f}\")\n",
    "print(f\"IQR: {np.percentile(ages, 25):0.1f} - {np.percentile(ages, 75):0.1f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:48:53.210075Z",
     "start_time": "2019-07-17T18:48:53.002656Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df['adm_to_icu_period'].describe().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:21.918238Z",
     "start_time": "2019-07-17T18:49:21.653850Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.groupby(df['admission_type'])['hadm_id'].nunique().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:22.411510Z",
     "start_time": "2019-07-17T18:49:22.249868Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.groupby(df['ethnicity'])['subject_id'].nunique().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Make sure average prevalence of random test sets is approximately same as real prevalence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:27:25.090684Z",
     "start_time": "2019-07-17T18:27:05.095345Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "ia_p = []\n",
    "ps_p = []\n",
    "\n",
    "for seed in range(127, 227):\n",
    "  sdf = set_group_splits(ia_df.copy(), group_col='hadm_id', seed=seed)\n",
    "  test_size = len(sdf.loc[(sdf['split'] == 'test')])\n",
    "  test_pos = len(sdf.loc[(sdf['split'] == 'test') & (sdf['imminent_adm_label'] == 1)])\n",
    "  ia_p.append(test_pos/test_size)  \n",
    "  \n",
    "  sdf = set_group_splits(ps_df.copy(), group_col='hadm_id', seed=seed)\n",
    "  test_size = len(sdf.loc[(sdf['split'] == 'test')])\n",
    "  test_pos = len(sdf.loc[(sdf['split'] == 'test') & (sdf['prolonged_stay_label'] == 1)])\n",
    "  ps_p.append(test_pos/test_size)  \n",
    "  \n",
    "\n",
    "ia_p = np.array(ia_p)\n",
    "ps_p = np.array(ps_p)\n",
    "\n",
    "print(f\"Prevalence of Imminent Admission: {(len(ia_df.loc[ia_df['imminent_adm_label'] == 1])/len(ia_df)):0.3f}\")\n",
    "print(f\"Average of test set = {(ia_p.mean()):0.3f}, std = {(ia_p.std()):0.3f}\")\n",
    "print(f\"Prevalence of Prolonged Stay: {(len(ps_df.loc[ps_df['prolonged_stay_label'] == 1])/len(ps_df)):0.3f}\")\n",
    "print(f\"Average of test set = {(ps_p.mean()):0.3f}, std = {(ps_p.std()):0.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:26.743454Z",
     "start_time": "2019-07-17T18:49:26.652037Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "print(f\"Average number of notes per admission for imminent admission: {ia_df.groupby('hadm_id').size().mean():0.2f}\")\n",
    "print(f\"Average number of notes per admission for prolonged stay (and entire dataset): {ps_df.groupby('hadm_id').size().mean():0.2f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:48.377622Z",
     "start_time": "2019-07-17T18:49:48.020027Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.groupby(df['deathtime'].apply(lambda x: True if pd.notnull(x) else False))['subject_id'].nunique().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:49.030699Z",
     "start_time": "2019-07-17T18:49:48.901279Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.groupby(df['gender'])['subject_id'].nunique().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Distribution of notes by category"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:51.015549Z",
     "start_time": "2019-07-17T18:49:50.883550Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.groupby(df['category']).size().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Distribution of notes by category for imminent admissions and delayed admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:40:35.638983Z",
     "start_time": "2019-07-17T18:40:35.428118Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.loc[(df['imminent_adm_label'] == 1)].groupby('category').size().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:29.970948Z",
     "start_time": "2019-07-17T18:42:29.704004Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.loc[(df['imminent_adm_label'] == 0)].groupby('category').size().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "hidden": true
   },
   "source": [
    "Distribution of notes for prolonged stay and short stay"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:30.199045Z",
     "start_time": "2019-07-17T18:42:29.974531Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.loc[(df['prolonged_stay_label'] == 1)].groupby('category').size().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:30.334632Z",
     "start_time": "2019-07-17T18:42:30.202847Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df.loc[(df['prolonged_stay_label'] == 0)].groupby('category').size().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:49:58.769470Z",
     "start_time": "2019-07-17T18:49:58.658678Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df['icu_los'].describe().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:52:01.571726Z",
     "start_time": "2019-07-17T18:52:01.262084Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df['note'].apply(len).describe().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:23.454748Z",
     "start_time": "2019-07-17T18:42:23.043855Z"
    },
    "hidden": true
   },
   "outputs": [],
   "source": [
    "df['charttime_to_icu_period'].describe().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Plots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-26T18:54:53.518650Z",
     "start_time": "2019-08-26T18:54:50.602313Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['subject_id', 'hadm_id', 'icustay_id', 'admission_type', 'admittime',\n",
       "       'dischtime', 'intime', 'outtime', 'charttime', 'icu_los', 'deathtime',\n",
       "       'adm_to_icu_period', 'charttime_to_icu_period', 'chartinterval',\n",
       "       'ethnicity', 'dob', 'gender', 'admission_age', 'category',\n",
       "       'imminent_adm_label', 'prolonged_stay_label', 'note', 'processed_note'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(args.dataset_csv)\n",
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-26T18:56:35.688519Z",
     "start_time": "2019-08-26T18:56:35.448942Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Radiology\n",
      "VEN DUP EXTEXT BIL (MAP/DVT)\n",
      "[**2162-5-17**] 8:12 AM\n",
      " [**Last Name (un) 1296**] DUP EXTEXT BIL (MAP/DVT)                                    Clip # [**Clip Number (Radiology) 18833**]\n",
      " Reason: eval for vein harvesting*****OR second case on [**2162-5-17**]******\n",
      " Admitting Diagnosis: CORONARY ARTERY DISEASE\n",
      " ______________________________________________________________________________\n",
      " [**Hospital 2**] MEDICAL CONDITION:\n",
      "  60 year old man pre-op for CABG\n",
      " REASON FOR THIS EXAMINATION:\n",
      "  eval for vein harvesting*****OR second case on [**2162-5-17**]********\n",
      " ______________________________________________________________________________\n",
      "                                 FINAL REPORT\n",
      " HISTORY:  A 60-year-old gentleman, preop for CABG.  Search for conduit.\n",
      "\n",
      " TECHNIQUE:  Venous mapping of the superficial veins in the lower extremities\n",
      " was performed with [**Doctor Last Name 37**]-scale and Doppler ultrasound.\n",
      "\n",
      " FINDINGS:  Right great saphenous vein is patent and compressible with\n",
      " diameters ranging between 0.15 and 0.21 cm.  The right small saphenous vein is\n",
      " patent and compressible with diameters ranging between 0.14 and 0.20 cm.\n",
      "\n",
      " The left great saphenous vein presented with diameters ranging between 0.06\n",
      " and 0.49 cm.  It was not visualized below the calf.  The left small saphenous\n",
      " vein presented thick walled and calcified.\n",
      "\n",
      " COMPARISON:  None available.\n",
      "\n",
      " IMPRESSION:  Patent right great and small saphenous veins, with diameters\n",
      " described above.  Left great saphenous vein with small diameters below the mid\n",
      " thigh and not visualized below the calf.  The left small saphenous vein\n",
      " presented with thick walls and calcifications.\n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df.iloc[0]['note'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-08-26T18:56:41.114437Z",
     "start_time": "2019-08-26T18:56:41.086224Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Radiology \n",
      " VEN DUP EXTEXT BIL ( MAP/DVT ) \n",
      " [ * * 2162 - 5 - 17 * * ] 8:12 AM \n",
      "  [ * * Last Name ( un ) 1296 * * ] DUP EXTEXT BIL ( MAP/DVT )                                     Clip # [ * * Clip Number ( Radiology ) 18833 * * ] \n",
      "  Reason : eval for vein harvesting*****OR second case on [ * * 2162 - 5 - 17 * * ] * * * * * * \n",
      "  Admitting Diagnosis : CORONARY ARTERY DISEASE \n",
      "  _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n",
      "  [ * * Hospital 2 * * ] MEDICAL CONDITION : \n",
      "   60 year old man pre-op for CABG \n",
      "  REASON FOR THIS EXAMINATION : \n",
      "   eval for vein harvesting*****OR second case on [ * * 2162 - 5 - 17 * * ] * * * * * * * * \n",
      "  _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ \n",
      "                                  FINAL REPORT \n",
      "  HISTORY :   A 60-year-old gentleman , preop for CABG .   Search for conduit . \n",
      "\n",
      "  TECHNIQUE :   Venous mapping of the superficial veins in the lower extremities \n",
      "  was performed with [ * * Doctor Last Name 37**]-scale and Doppler ultrasound . \n",
      "\n",
      "  FINDINGS :   Right great saphenous vein is patent and compressible with \n",
      "  diameters ranging between 0.15 and 0.21 cm .   The right small saphenous vein is \n",
      "  patent and compressible with diameters ranging between 0.14 and 0.20 cm . \n",
      "\n",
      "  The left great saphenous vein presented with diameters ranging between 0.06 \n",
      "  and 0.49 cm .   It was not visualized below the calf .   The left small saphenous \n",
      "  vein presented thick walled and calcified . \n",
      "\n",
      "  COMPARISON :   None available . \n",
      "\n",
      "  IMPRESSION :   Patent right great and small saphenous veins , with diameters \n",
      "  described above .   Left great saphenous vein with small diameters below the mid \n",
      "  thigh and not visualized below the calf .   The left small saphenous vein \n",
      "  presented with thick walls and calcifications . \n",
      "\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(df.iloc[0]['processed_note'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:36.032162Z",
     "start_time": "2019-07-17T18:42:36.008573Z"
    }
   },
   "outputs": [],
   "source": [
    "intervals = ['-1 ≤ t ≤ 0']\n",
    "intervals += [f'-{i+1} ≤ t ≤ -{i}' for i in range(1, 15)]\n",
    "intervals.append('t ≤ -15')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Bar Plot of Notes Over Days"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### All Notes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:38.585605Z",
     "start_time": "2019-07-17T18:42:38.475792Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = pd.DataFrame(df.groupby(['chartinterval']).size(), columns=['n_notes'])\n",
    "plot_df.reset_index(inplace=True, drop=True)\n",
    "plot_df['days'] = intervals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:42:41.669474Z",
     "start_time": "2019-07-17T18:42:40.376671Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(15, 8))\n",
    "sns.barplot(x='days', y='n_notes', data=plot_df, ax=ax)\n",
    "ax.set_xticklabels(ax.get_xticklabels(),rotation=45, ha='right')\n",
    "ax.set_xlabel('Time to ICU Admission (days)')\n",
    "ax.set_ylabel('# notes')\n",
    "for index, row in plot_df.iterrows():\n",
    "    ax.text(index, row['n_notes'], str(row['n_notes']), color='black', ha='center', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-27T00:38:13.014421Z",
     "start_time": "2019-06-27T00:38:12.991010Z"
    }
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'note_bp.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### By Category"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:43:10.933200Z",
     "start_time": "2019-07-17T18:43:10.805412Z"
    }
   },
   "outputs": [],
   "source": [
    "def plot_intervals(ax, df, cat):\n",
    "  sns.barplot(x='days', y='n_notes', data=df, ax=ax)\n",
    "  ax.set_xticklabels(ax.get_xticklabels(),rotation=45, ha='right')\n",
    "  ax.set_xlabel('')\n",
    "  ax.set_ylabel('')\n",
    "  ax.set_title(f\"Note Category: {cat}\\n# notes: {df['n_notes'].sum()}\")   \n",
    "\n",
    "  for index, (_, row) in enumerate(df.iterrows()):\n",
    "      ax.text(index, row['n_notes'], str(row['n_notes']), color='black', ha='center', va='bottom')    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:43:12.728434Z",
     "start_time": "2019-07-17T18:43:12.610095Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = pd.DataFrame(df.groupby(['category', 'chartinterval']).size(), columns=['n_notes'])\n",
    "plot_df.reset_index(inplace=True)\n",
    "plot_df['days'] = plot_df['chartinterval'].apply(lambda x: intervals[x])\n",
    "plot_df.drop(['chartinterval'], inplace=True, axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:43:26.011143Z",
     "start_time": "2019-07-17T18:43:15.024678Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(6, 2, figsize=(20, 50))\n",
    "plot_intervals(ax[0][0], plot_df.loc[plot_df['category'] == 'Case Management ', ['n_notes', 'days']], 'Case Management')\n",
    "plot_intervals(ax[0][1], plot_df.loc[plot_df['category'] == 'Consult', ['n_notes', 'days']], 'Consult')\n",
    "\n",
    "plot_intervals(ax[1][0], plot_df.loc[plot_df['category'] == 'General', ['n_notes', 'days']], 'General')\n",
    "plot_intervals(ax[1][1], plot_df.loc[plot_df['category'] == 'Nursing', ['n_notes', 'days']], 'Nursing')\n",
    "\n",
    "plot_intervals(ax[2][0], plot_df.loc[plot_df['category'] == 'Nursing/other', ['n_notes', 'days']], 'Nursing/other')\n",
    "plot_intervals(ax[2][1], plot_df.loc[plot_df['category'] == 'Nutrition', ['n_notes', 'days']], 'Nutrition')\n",
    "\n",
    "plot_intervals(ax[3][0], plot_df.loc[plot_df['category'] == 'Pharmacy', ['n_notes', 'days']], 'Pharmacy')\n",
    "plot_intervals(ax[3][1], plot_df.loc[plot_df['category'] == 'Physician ', ['n_notes', 'days',]], 'Physician')\n",
    "\n",
    "plot_intervals(ax[4][0], plot_df.loc[plot_df['category'] == 'Radiology', ['n_notes', 'days']], 'Radiology')\n",
    "plot_intervals(ax[4][1], plot_df.loc[plot_df['category'] == 'Rehab Services', ['n_notes', 'days']], 'Rehab Services')\n",
    "\n",
    "plot_intervals(ax[5][0], plot_df.loc[plot_df['category'] == 'Respiratory ', ['n_notes', 'days']], 'Respiratory')\n",
    "plot_intervals(ax[5][1], plot_df.loc[plot_df['category'] == 'Social Work', ['n_notes', 'days']], 'Social Work')\n",
    "\n",
    "fig.text(0.5, 0.1, 'Time to ICU Admission (days)', ha='center')\n",
    "fig.text(0.08, 0.5, '# notes', va='center', rotation='vertical')\n",
    "\n",
    "plt.subplots_adjust(hspace = 0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-27T00:42:13.420913Z",
     "start_time": "2019-06-27T00:42:13.395654Z"
    }
   },
   "outputs": [],
   "source": [
    "# cats = sorted(list(df['category'].unique()))\n",
    "\n",
    "# n = 0\n",
    "# fig, ax = plt.subplots(1, 1, figsize=(10, 8))\n",
    "# plot_intervals(ax, plot_df.loc[plot_df['category'] == cats[n], ['n_notes', 'days']], cats[n])\n",
    "# ax.set_xlabel('Time to ICU Admission (days)')\n",
    "# ax.set_ylabel('# notes')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-26T19:55:34.228896Z",
     "start_time": "2019-06-26T19:55:34.204962Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'note_cats_bp.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Note Chart Time to ICU Admission Period Histogram"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### All Notes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:43:56.456943Z",
     "start_time": "2019-07-17T18:43:56.330361Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = df[['category', 'charttime_to_icu_period']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:44:05.284861Z",
     "start_time": "2019-07-17T18:44:03.949948Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "\n",
    "sns.distplot(plot_df['charttime_to_icu_period'], kde=False, ax=ax, bins=80)\n",
    "ax.set_xlabel('Period between Note Chart Time and ICU Admission Time (days)')\n",
    "ax.set_ylabel('# notes')\n",
    "ax.set_xlim(0, 60)\n",
    "\n",
    "# ax.text(ax.get_xlim()[1]*0.50, ax.get_ylim()[1]*0.80, f\"Min: {mdf['chart_icu_period'].min()}, Avg: {mdf['chart_icu_period'].mean(): 0.2f}, Max: {mdf['chart_icu_period'].max()}\", fontweight='bold', fontsize=15, ha='center', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-26T19:55:34.712151Z",
     "start_time": "2019-06-26T19:55:34.686551Z"
    }
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'note_icu_period_hist.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### By Category"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:44:13.676427Z",
     "start_time": "2019-07-17T18:44:13.571725Z"
    }
   },
   "outputs": [],
   "source": [
    "def plot_period(ax, df, cat):\n",
    "  sns.distplot(df, kde=False, ax=ax, bins=10)\n",
    "  ax.set_xlabel('')\n",
    "  ax.set_ylabel('')\n",
    "  ax.set_title(f\"Note Category: {cat}\")   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:45:21.689010Z",
     "start_time": "2019-07-17T18:45:12.353337Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(6, 2, figsize=(20, 50))\n",
    "plot_period(ax[0][0], plot_df.loc[plot_df['category'] == 'Case Management ', ['charttime_to_icu_period']], 'Case Management')\n",
    "plot_period(ax[0][1], plot_df.loc[plot_df['category'] == 'Consult', ['charttime_to_icu_period']], 'Consult')\n",
    "\n",
    "plot_period(ax[1][0], plot_df.loc[plot_df['category'] == 'General', ['charttime_to_icu_period']], 'General')\n",
    "plot_period(ax[1][1], plot_df.loc[plot_df['category'] == 'Nursing', ['charttime_to_icu_period']], 'Nursing')\n",
    "\n",
    "plot_period(ax[2][0], plot_df.loc[plot_df['category'] == 'Nursing/other', ['charttime_to_icu_period']], 'Nursing/other')\n",
    "plot_period(ax[2][1], plot_df.loc[plot_df['category'] == 'Nutrition', ['charttime_to_icu_period']], 'Nutrition')\n",
    "\n",
    "plot_period(ax[3][0], plot_df.loc[plot_df['category'] == 'Pharmacy', ['charttime_to_icu_period']], 'Pharmacy')\n",
    "plot_period(ax[3][1], plot_df.loc[plot_df['category'] == 'Physician ', ['charttime_to_icu_period',]], 'Physician')\n",
    "\n",
    "plot_period(ax[4][0], plot_df.loc[plot_df['category'] == 'Radiology', ['charttime_to_icu_period']], 'Radiology')\n",
    "plot_period(ax[4][1], plot_df.loc[plot_df['category'] == 'Rehab Services', ['charttime_to_icu_period']], 'Rehab Services')\n",
    "\n",
    "plot_period(ax[5][0], plot_df.loc[plot_df['category'] == 'Respiratory ', ['charttime_to_icu_period']], 'Respiratory')\n",
    "plot_period(ax[5][1], plot_df.loc[plot_df['category'] == 'Social Work', ['charttime_to_icu_period']], 'Social Work')\n",
    "\n",
    "fig.text(0.5, 0.11, 'Period between Note Chart Time and ICU Admission Time (days)', ha='center')\n",
    "fig.text(0.08, 0.5, '# notes', va='center', rotation='vertical')\n",
    "\n",
    "plt.subplots_adjust(hspace = 0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-27T00:43:24.745337Z",
     "start_time": "2019-06-27T00:43:24.720208Z"
    }
   },
   "outputs": [],
   "source": [
    "# cats = sorted(list(df['category'].unique()))\n",
    "\n",
    "# n = 0\n",
    "# fig, ax = plt.subplots(1, 1, figsize=(10, 8))\n",
    "# plot_period(ax, plot_df.loc[plot_df['category'] == cats[n], ['chart_icu_period']], cats[n])\n",
    "# ax.set_xlabel('Time to ICU Admission (days)')\n",
    "# ax.set_ylabel('# notes')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-26T19:35:38.476961Z",
     "start_time": "2019-06-26T19:35:38.451886Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'note_cat_icu_period_hist.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Hospital Admission to ICU Admission Period Histogram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:45:44.547021Z",
     "start_time": "2019-07-17T18:45:44.519812Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = df[['adm_to_icu_period']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:45:46.580796Z",
     "start_time": "2019-07-17T18:45:45.217784Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "\n",
    "sns.distplot(plot_df, kde=False, ax=ax, bins=80)\n",
    "ax.set_xlabel('Time between hospital admission and ICU admission (days)')\n",
    "ax.set_ylabel('# notes')\n",
    "ax.set_xlim(0, 70)\n",
    "# ax.text(ax.get_xlim()[1]*0.50, ax.get_ylim()[1]*0.80, f\"Min: {mdf['adm_icu_period'].min()}, Avg: {mdf['adm_icu_period'].mean(): 0.2f}, Max: {mdf['adm_icu_period'].max()}\", fontweight='bold', fontsize=15, ha='center', va='bottom')    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'adm_icu_period_hist.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Note Length Histogram"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:45:50.860829Z",
     "start_time": "2019-07-17T18:45:49.137114Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "sns.distplot(df['note'].apply(len), kde=False, ax=ax, bins=100)\n",
    "ax.set_xlabel('Length of Note (characters)')\n",
    "ax.set_ylabel('# notes')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-26T19:59:38.291139Z",
     "start_time": "2019-06-26T19:59:38.267860Z"
    }
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'note_len_hist.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Imminent ICU Prediction Class Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:53:53.526861Z",
     "start_time": "2019-07-17T18:53:53.429558Z"
    }
   },
   "outputs": [],
   "source": [
    "desc = ['Unused', 'Delayed Admission', 'Imminent Admission']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Without Admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:53:54.551036Z",
     "start_time": "2019-07-17T18:53:54.423540Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = pd.DataFrame(df.groupby(['imminent_adm_label']).size(), columns=['n_notes']).reset_index()\n",
    "plot_df['imminent_adm_label'] = desc\n",
    "plot_df = plot_df.reindex([2, 1, 0])\n",
    "plot_df.reset_index(inplace=True, drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:53:55.840770Z",
     "start_time": "2019-07-17T18:53:54.913513Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "sns.barplot(x='imminent_adm_label', y='n_notes', data=plot_df, ax=ax)\n",
    "ax.set_xlabel('Imminent Class Label')\n",
    "ax.set_ylabel('# notes')\n",
    "for index, row in plot_df.iterrows():\n",
    "  ax.text(index+0.05, row['n_notes']+50, str(row['n_notes']), color='black', ha='right', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-27T01:07:18.818779Z",
     "start_time": "2019-06-27T01:07:18.795768Z"
    }
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'imminent_label_bp.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### With Admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:54:20.657113Z",
     "start_time": "2019-07-17T18:54:20.298763Z"
    }
   },
   "outputs": [],
   "source": [
    "p1 = pd.DataFrame(df.groupby(['imminent_adm_label']).size(), columns=['n_notes']).reset_index()\n",
    "p2 = df.groupby(['imminent_adm_label'])['hadm_id'].nunique().reset_index()\n",
    "p = p1.merge(p2, on=['imminent_adm_label'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:54:20.757964Z",
     "start_time": "2019-07-17T18:54:20.660979Z"
    }
   },
   "outputs": [],
   "source": [
    "p['imminent_adm_label'] = desc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:54:21.287840Z",
     "start_time": "2019-07-17T18:54:21.204792Z"
    }
   },
   "outputs": [],
   "source": [
    "p = p.reindex([2,1,0])\n",
    "p.reset_index(inplace=True, drop=True)\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:54:29.367296Z",
     "start_time": "2019-07-17T18:54:29.263198Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = p.copy()\n",
    "plot_df.rename(columns={'hadm_id':'# Admissions', 'n_notes':'# Notes'}, inplace=True)\n",
    "plot_df = pd.melt(plot_df, id_vars='imminent_adm_label', var_name='Legend', value_name='counts')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T18:54:35.592328Z",
     "start_time": "2019-07-17T18:54:34.576044Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "\n",
    "sns.barplot(x='imminent_adm_label', y='counts', hue='Legend', data=plot_df, ax=ax)\n",
    "ax.set_xticklabels(ax.get_xticklabels(), ha='right')\n",
    "ax.set_xlabel('Imminent Class Label')\n",
    "ax.set_ylabel('# notes')\n",
    "\n",
    "for index, row in plot_df.iterrows():\n",
    "    if index < len(plot_df)//2:\n",
    "        ax.text(index-0.13, row['counts']+50, str(row['counts']), color='black', ha='right', va='bottom')\n",
    "    else:\n",
    "        ax.text(index % (len(plot_df)//2)+0.25, row['counts']+50, str(row['counts']), color='black', ha='right', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'imminent_label_adms_bp.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Prolonged Stay Class Distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:00:53.843117Z",
     "start_time": "2019-07-17T19:00:53.541066Z"
    }
   },
   "outputs": [],
   "source": [
    "desc = ['Short Stay', 'Prolonged Stay']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Without Admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:01:08.738416Z",
     "start_time": "2019-07-17T19:01:08.586921Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = pd.DataFrame(df.groupby(['prolonged_stay_label']).size(), columns=['n_notes']).reset_index()\n",
    "plot_df['prolonged_stay_label'] = desc\n",
    "plot_df = plot_df.reindex([1, 0])\n",
    "plot_df.reset_index(inplace=True, drop=True)\n",
    "plot_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:01:18.296482Z",
     "start_time": "2019-07-17T19:01:17.775519Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "sns.barplot(x='prolonged_stay_label', y='n_notes', data=plot_df, ax=ax)\n",
    "ax.set_xlabel('5 Day Discharge Class Label')\n",
    "ax.set_ylabel('# notes')\n",
    "for index, row in plot_df.iterrows():\n",
    "  ax.text(index+0.05, row['n_notes']+50, str(row['n_notes']), color='black', ha='right', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-06-30T21:09:10.237355Z",
     "start_time": "2019-06-30T21:09:10.163Z"
    }
   },
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'discharge_label_bp.tif', dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### With Admissions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:01:34.791633Z",
     "start_time": "2019-07-17T19:01:34.568783Z"
    }
   },
   "outputs": [],
   "source": [
    "p1 = pd.DataFrame(df.groupby(['prolonged_stay_label']).size(), columns=['n_notes']).reset_index()\n",
    "p2 = df.groupby(['prolonged_stay_label'])['hadm_id'].nunique().reset_index()\n",
    "p = p1.merge(p2, on=['prolonged_stay_label'])\n",
    "p['prolonged_stay_label'] = desc\n",
    "p = p.reindex([1,0])\n",
    "p.reset_index(inplace=True, drop=True)\n",
    "p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:01:42.249351Z",
     "start_time": "2019-07-17T19:01:42.137270Z"
    }
   },
   "outputs": [],
   "source": [
    "plot_df = p.copy()\n",
    "plot_df.rename(columns={'hadm_id':'# Admissions', 'n_notes':'# Notes'}, inplace=True)\n",
    "plot_df = pd.melt(plot_df, id_vars='prolonged_stay_label', var_name='Legend', value_name='counts')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-17T19:01:47.756030Z",
     "start_time": "2019-07-17T19:01:47.553253Z"
    }
   },
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(10, 8))\n",
    "\n",
    "sns.barplot(x='prolonged_stay_label', y='counts', hue='Legend', data=plot_df, ax=ax)\n",
    "ax.set_xticklabels(ax.get_xticklabels(), ha='right')\n",
    "ax.set_xlabel('5 Day Discharge Class Label')\n",
    "ax.set_ylabel('# notes')\n",
    "\n",
    "for index, row in plot_df.iterrows():\n",
    "    if index < len(plot_df)//2:\n",
    "        ax.text(index-0.13, row['counts']+50, str(row['counts']), color='black', ha='right', va='bottom')\n",
    "    else:\n",
    "        ax.text(index % (len(plot_df)//2)+0.25, row['counts']+50, str(row['counts']), color='black', ha='right', va='bottom')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fig.savefig(args.figdir/'discharge_label_adms_bp.tif', dpi=300)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "165px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}