--- a
+++ b/2-preprocess-pickle.ipynb
@@ -0,0 +1,1276 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "from collections import defaultdict\n",
+    "import pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stage = \"stage_2\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Train dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(f\"data/{stage}_train_dicom_diags.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>BitsAllocated</th>\n",
+       "      <th>BitsStored</th>\n",
+       "      <th>Columns</th>\n",
+       "      <th>HighBit</th>\n",
+       "      <th>ImageOrientationPatient_0</th>\n",
+       "      <th>ImageOrientationPatient_1</th>\n",
+       "      <th>ImageOrientationPatient_2</th>\n",
+       "      <th>ImageOrientationPatient_3</th>\n",
+       "      <th>ImageOrientationPatient_4</th>\n",
+       "      <th>...</th>\n",
+       "      <th>WindowWidth</th>\n",
+       "      <th>WindowWidth_0</th>\n",
+       "      <th>WindowWidth_1</th>\n",
+       "      <th>fid</th>\n",
+       "      <th>any</th>\n",
+       "      <th>epidural</th>\n",
+       "      <th>intraparenchymal</th>\n",
+       "      <th>intraventricular</th>\n",
+       "      <th>subarachnoid</th>\n",
+       "      <th>subdural</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.927184</td>\n",
+       "      <td>...</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_000012eaf</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.968148</td>\n",
+       "      <td>...</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_000039fa0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_00005679d</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>16</td>\n",
+       "      <td>12</td>\n",
+       "      <td>512</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.994522</td>\n",
+       "      <td>...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>ID_00008ce3c</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_0000950d7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 41 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  BitsAllocated  BitsStored  Columns  HighBit  \\\n",
+       "0           0             16          16      512       15   \n",
+       "1           1             16          16      512       15   \n",
+       "2           2             16          16      512       15   \n",
+       "3           3             16          12      512       11   \n",
+       "4           4             16          16      512       15   \n",
+       "\n",
+       "   ImageOrientationPatient_0  ImageOrientationPatient_1  \\\n",
+       "0                        1.0                        0.0   \n",
+       "1                        1.0                        0.0   \n",
+       "2                        1.0                        0.0   \n",
+       "3                        1.0                        0.0   \n",
+       "4                        1.0                        0.0   \n",
+       "\n",
+       "   ImageOrientationPatient_2  ImageOrientationPatient_3  \\\n",
+       "0                        0.0                        0.0   \n",
+       "1                        0.0                        0.0   \n",
+       "2                        0.0                        0.0   \n",
+       "3                        0.0                        0.0   \n",
+       "4                        0.0                        0.0   \n",
+       "\n",
+       "   ImageOrientationPatient_4  ...  WindowWidth  WindowWidth_0  WindowWidth_1  \\\n",
+       "0                   0.927184  ...         80.0            NaN            NaN   \n",
+       "1                   0.968148  ...         80.0            NaN            NaN   \n",
+       "2                   1.000000  ...        100.0            NaN            NaN   \n",
+       "3                   0.994522  ...          NaN           80.0           80.0   \n",
+       "4                   1.000000  ...        135.0            NaN            NaN   \n",
+       "\n",
+       "            fid any epidural intraparenchymal  intraventricular  subarachnoid  \\\n",
+       "0  ID_000012eaf   0        0                0                 0             0   \n",
+       "1  ID_000039fa0   0        0                0                 0             0   \n",
+       "2  ID_00005679d   0        0                0                 0             0   \n",
+       "3  ID_00008ce3c   0        0                0                 0             0   \n",
+       "4  ID_0000950d7   0        0                0                 0             0   \n",
+       "\n",
+       "   subdural  \n",
+       "0         0  \n",
+       "1         0  \n",
+       "2         0  \n",
+       "3         0  \n",
+       "4         0  \n",
+       "\n",
+       "[5 rows x 41 columns]"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "21744"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# sort, then group by (order is preserved within groups)\n",
+    "gs = df.sort_values('ImagePositionPatient_2').groupby('SeriesInstanceUID')\n",
+    "len(gs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ImagePositionPatient_2</th>\n",
+       "      <th>fid</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>577964</th>\n",
+       "      <td>193.542489</td>\n",
+       "      <td>ID_c45659d3d</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>229790</th>\n",
+       "      <td>198.214051</td>\n",
+       "      <td>ID_4e0bdd2ba</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22395</th>\n",
+       "      <td>202.885613</td>\n",
+       "      <td>ID_079945c27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>746126</th>\n",
+       "      <td>207.557174</td>\n",
+       "      <td>ID_fdbfb2c17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>253266</th>\n",
+       "      <td>212.228736</td>\n",
+       "      <td>ID_55f7bbbf2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        ImagePositionPatient_2           fid\n",
+       "577964              193.542489  ID_c45659d3d\n",
+       "229790              198.214051  ID_4e0bdd2ba\n",
+       "22395               202.885613  ID_079945c27\n",
+       "746126              207.557174  ID_fdbfb2c17\n",
+       "253266              212.228736  ID_55f7bbbf2"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# see if it worked\n",
+    "gs.get_group('ID_fa19cd5ea9')[['ImagePositionPatient_2', 'fid']].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g = gs.get_group('ID_fa19cd5ea9')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "subg = g[['SeriesInstanceUID', 'fid', 'any', 'epidural', \n",
+    "          'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SeriesInstanceUID</th>\n",
+       "      <th>fid</th>\n",
+       "      <th>any</th>\n",
+       "      <th>epidural</th>\n",
+       "      <th>intraparenchymal</th>\n",
+       "      <th>intraventricular</th>\n",
+       "      <th>subarachnoid</th>\n",
+       "      <th>subdural</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>577964</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_c45659d3d</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>229790</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_4e0bdd2ba</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22395</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_079945c27</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>746126</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_fdbfb2c17</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>253266</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_55f7bbbf2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>549211</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_ba7080372</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>592856</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_c964e4096</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>183149</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_3e31d57d0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>306771</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_680b2194c</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>540358</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_b76b13444</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>645217</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_db48a633d</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>270974</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_5bf2ca43f</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>672814</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_e4b636907</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>350834</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_7714ead69</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>749886</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_ff012ee5b</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>523978</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_b1cea5abb</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>464942</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_9dad2eb09</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>229881</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_4e14d0fe8</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>186237</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_3f422852d</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>599624</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_cbbb50e6d</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>347055</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_75cbdae68</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>359450</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_7a02fdbea</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>127205</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_2b3671dd9</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>148587</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_3274f5977</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>413641</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_8c5fc9e44</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>688538</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_ea2861e9a</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>318670</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_6c19c9f7b</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>630472</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_d6435f3bf</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>202656</th>\n",
+       "      <td>ID_fa19cd5ea9</td>\n",
+       "      <td>ID_44d57858e</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       SeriesInstanceUID           fid  any  epidural  intraparenchymal  \\\n",
+       "577964     ID_fa19cd5ea9  ID_c45659d3d    0         0                 0   \n",
+       "229790     ID_fa19cd5ea9  ID_4e0bdd2ba    0         0                 0   \n",
+       "22395      ID_fa19cd5ea9  ID_079945c27    1         0                 0   \n",
+       "746126     ID_fa19cd5ea9  ID_fdbfb2c17    1         0                 0   \n",
+       "253266     ID_fa19cd5ea9  ID_55f7bbbf2    1         0                 0   \n",
+       "549211     ID_fa19cd5ea9  ID_ba7080372    1         0                 0   \n",
+       "592856     ID_fa19cd5ea9  ID_c964e4096    1         0                 0   \n",
+       "183149     ID_fa19cd5ea9  ID_3e31d57d0    1         0                 0   \n",
+       "306771     ID_fa19cd5ea9  ID_680b2194c    1         0                 0   \n",
+       "540358     ID_fa19cd5ea9  ID_b76b13444    1         0                 0   \n",
+       "645217     ID_fa19cd5ea9  ID_db48a633d    1         0                 0   \n",
+       "270974     ID_fa19cd5ea9  ID_5bf2ca43f    1         0                 0   \n",
+       "672814     ID_fa19cd5ea9  ID_e4b636907    1         0                 0   \n",
+       "350834     ID_fa19cd5ea9  ID_7714ead69    1         0                 0   \n",
+       "749886     ID_fa19cd5ea9  ID_ff012ee5b    1         0                 0   \n",
+       "523978     ID_fa19cd5ea9  ID_b1cea5abb    1         0                 0   \n",
+       "464942     ID_fa19cd5ea9  ID_9dad2eb09    1         0                 0   \n",
+       "229881     ID_fa19cd5ea9  ID_4e14d0fe8    1         0                 0   \n",
+       "186237     ID_fa19cd5ea9  ID_3f422852d    1         0                 0   \n",
+       "599624     ID_fa19cd5ea9  ID_cbbb50e6d    1         0                 0   \n",
+       "347055     ID_fa19cd5ea9  ID_75cbdae68    1         0                 0   \n",
+       "359450     ID_fa19cd5ea9  ID_7a02fdbea    1         0                 0   \n",
+       "127205     ID_fa19cd5ea9  ID_2b3671dd9    1         0                 0   \n",
+       "148587     ID_fa19cd5ea9  ID_3274f5977    0         0                 0   \n",
+       "413641     ID_fa19cd5ea9  ID_8c5fc9e44    0         0                 0   \n",
+       "688538     ID_fa19cd5ea9  ID_ea2861e9a    0         0                 0   \n",
+       "318670     ID_fa19cd5ea9  ID_6c19c9f7b    0         0                 0   \n",
+       "630472     ID_fa19cd5ea9  ID_d6435f3bf    0         0                 0   \n",
+       "202656     ID_fa19cd5ea9  ID_44d57858e    0         0                 0   \n",
+       "\n",
+       "        intraventricular  subarachnoid  subdural  \n",
+       "577964                 0             0         0  \n",
+       "229790                 0             0         0  \n",
+       "22395                  0             0         1  \n",
+       "746126                 0             0         1  \n",
+       "253266                 0             0         1  \n",
+       "549211                 0             0         1  \n",
+       "592856                 0             0         1  \n",
+       "183149                 0             0         1  \n",
+       "306771                 0             0         1  \n",
+       "540358                 0             0         1  \n",
+       "645217                 0             0         1  \n",
+       "270974                 0             0         1  \n",
+       "672814                 0             0         1  \n",
+       "350834                 0             0         1  \n",
+       "749886                 0             0         1  \n",
+       "523978                 0             0         1  \n",
+       "464942                 0             0         1  \n",
+       "229881                 0             0         1  \n",
+       "186237                 0             0         1  \n",
+       "599624                 0             0         1  \n",
+       "347055                 0             0         1  \n",
+       "359450                 0             0         1  \n",
+       "127205                 0             0         1  \n",
+       "148587                 0             0         0  \n",
+       "413641                 0             0         0  \n",
+       "688538                 0             0         0  \n",
+       "318670                 0             0         0  \n",
+       "630472                 0             0         0  \n",
+       "202656                 0             0         0  "
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "subg"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# You can use a btrfs snapshot and rename files by study_id and z-pos through the brain\n",
+    "def rename_train_group(subg):\n",
+    "    ix = 0\n",
+    "    total = len(subg)\n",
+    "    for index, row in subg.iterrows():\n",
+    "        cur_fn = row['fid']\n",
+    "        new_fn = f\"{row['SeriesInstanceUID']}_{ix:03}_{total:03}_{row['any']}_{row['epidural']}_{row['intraparenchymal']}_{row['intraventricular']}_{row['subarachnoid']}_{row['subdural']}_{cur_fn}\"\n",
+    "        ix += 1\n",
+    "        Path(f'data/unzip_renamed/{stage}_train_images/{cur_fn}.dcm').rename(f'data/unzip_renamed/{stage}_train_images/{new_fn}.dcm')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def index_group(subg, study_ix_to_fn, fn_to_study_ix):\n",
+    "    ix = 0\n",
+    "    for index, row in subg.iterrows():\n",
+    "        fn = row['SOPInstanceUID']\n",
+    "        study = row['SeriesInstanceUID']\n",
+    "        study_ix_to_fn[study].append(fn)\n",
+    "        fn_to_study_ix[fn] = (study, ix)\n",
+    "        ix += 1  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "labels = [ 'any', 'epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural' ]\n",
+    "\n",
+    "def label_group(subg, fn_to_labels):\n",
+    "    for index, row in subg.iterrows():\n",
+    "        fn = row['SOPInstanceUID']\n",
+    "        fn_to_labels[fn] = [ label for label in labels if row[label] == 1 ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_study_ix_to_fn = defaultdict(list)\n",
+    "train_fn_to_study_ix = {}\n",
+    "train_fn_to_labels = {}\n",
+    "\n",
+    "for name, subg in gs:\n",
+    "    #rename_train_group(subg)\n",
+    "    index_group(subg, train_study_ix_to_fn, train_fn_to_study_ix)\n",
+    "    label_group(subg, train_fn_to_labels)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Do not pickle yet, merge with test\n",
+    "pickle.dump(train_study_ix_to_fn, open(f\"data/{stage}_train_study_ix_to_fn.pickle\", \"wb\" ))\n",
+    "pickle.dump(train_fn_to_study_ix, open(f\"data/{stage}_train_fn_to_study_ix.pickle\", \"wb\" ))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['any', 'subdural']"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_fn_to_labels['ID_079945c27']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pickle.dump(train_fn_to_labels, open(f\"data/{stage}_train_fn_to_labels.pickle\", 'wb'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Test dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(f\"data/{stage}_test_dicom.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Unnamed: 0</th>\n",
+       "      <th>BitsAllocated</th>\n",
+       "      <th>BitsStored</th>\n",
+       "      <th>Columns</th>\n",
+       "      <th>HighBit</th>\n",
+       "      <th>ImageOrientationPatient_0</th>\n",
+       "      <th>ImageOrientationPatient_1</th>\n",
+       "      <th>ImageOrientationPatient_2</th>\n",
+       "      <th>ImageOrientationPatient_3</th>\n",
+       "      <th>ImageOrientationPatient_4</th>\n",
+       "      <th>...</th>\n",
+       "      <th>SamplesPerPixel</th>\n",
+       "      <th>SeriesInstanceUID</th>\n",
+       "      <th>StudyID</th>\n",
+       "      <th>StudyInstanceUID</th>\n",
+       "      <th>WindowCenter</th>\n",
+       "      <th>WindowCenter_0</th>\n",
+       "      <th>WindowCenter_1</th>\n",
+       "      <th>WindowWidth</th>\n",
+       "      <th>WindowWidth_0</th>\n",
+       "      <th>WindowWidth_1</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>16</td>\n",
+       "      <td>12</td>\n",
+       "      <td>512</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.981627</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ID_4d28912ba6</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_1f6d1e8aeb</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>40.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>80.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.987688</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ID_acabdeee86</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_4a8d7ec19f</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.927184</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ID_d00cee7f0c</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_a6ca244172</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>16</td>\n",
+       "      <td>16</td>\n",
+       "      <td>512</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.986286</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ID_a52a0112d5</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_fa950a03af</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>16</td>\n",
+       "      <td>12</td>\n",
+       "      <td>512</td>\n",
+       "      <td>11</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>ID_f552d3b922</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>ID_965d8b3d8e</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>80.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 34 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Unnamed: 0  BitsAllocated  BitsStored  Columns  HighBit  \\\n",
+       "0           0             16          12      512       11   \n",
+       "1           1             16          16      512       15   \n",
+       "2           2             16          16      512       15   \n",
+       "3           3             16          16      512       15   \n",
+       "4           4             16          12      512       11   \n",
+       "\n",
+       "   ImageOrientationPatient_0  ImageOrientationPatient_1  \\\n",
+       "0                        1.0                        0.0   \n",
+       "1                        1.0                        0.0   \n",
+       "2                        1.0                        0.0   \n",
+       "3                        1.0                        0.0   \n",
+       "4                        1.0                        0.0   \n",
+       "\n",
+       "   ImageOrientationPatient_2  ImageOrientationPatient_3  \\\n",
+       "0                        0.0                        0.0   \n",
+       "1                        0.0                        0.0   \n",
+       "2                        0.0                        0.0   \n",
+       "3                        0.0                        0.0   \n",
+       "4                        0.0                        0.0   \n",
+       "\n",
+       "   ImageOrientationPatient_4  ...  SamplesPerPixel  SeriesInstanceUID  \\\n",
+       "0                   0.981627  ...                1      ID_4d28912ba6   \n",
+       "1                   0.987688  ...                1      ID_acabdeee86   \n",
+       "2                   0.927184  ...                1      ID_d00cee7f0c   \n",
+       "3                   0.986286  ...                1      ID_a52a0112d5   \n",
+       "4                   1.000000  ...                1      ID_f552d3b922   \n",
+       "\n",
+       "   StudyID  StudyInstanceUID WindowCenter WindowCenter_0 WindowCenter_1  \\\n",
+       "0      NaN     ID_1f6d1e8aeb          NaN           40.0           40.0   \n",
+       "1      NaN     ID_4a8d7ec19f         30.0            NaN            NaN   \n",
+       "2      NaN     ID_a6ca244172         30.0            NaN            NaN   \n",
+       "3      NaN     ID_fa950a03af         30.0            NaN            NaN   \n",
+       "4      NaN     ID_965d8b3d8e          NaN           36.0           36.0   \n",
+       "\n",
+       "   WindowWidth  WindowWidth_0  WindowWidth_1  \n",
+       "0          NaN           80.0           80.0  \n",
+       "1         80.0            NaN            NaN  \n",
+       "2         80.0            NaN            NaN  \n",
+       "3         80.0            NaN            NaN  \n",
+       "4          NaN           80.0           80.0  \n",
+       "\n",
+       "[5 rows x 34 columns]"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3518"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# sort, then group by (order is preserver within groups)\n",
+    "gs = df.sort_values('ImagePositionPatient_2').groupby('SeriesInstanceUID')\n",
+    "len(gs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def rename_test_group(subg):\n",
+    "    ix = 0\n",
+    "    total = len(subg)\n",
+    "    for index, row in subg.iterrows():\n",
+    "        cur_fn = row['SOPInstanceUID']\n",
+    "        new_fn = f\"{row['SeriesInstanceUID']}_{ix:03}_{total:03}_{cur_fn}\"\n",
+    "        ix += 1\n",
+    "        Path(f'data/unzip_renamed/{stage}_test_images/{cur_fn}.dcm').rename(f'data/unzip_renamed/{stage}_test_images/{new_fn}.dcm')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_study_ix_to_fn = defaultdict(list)\n",
+    "test_fn_to_study_ix = {}\n",
+    "\n",
+    "for name, subg in gs:\n",
+    "    #rename_test_group(subg)\n",
+    "    index_group(subg, test_study_ix_to_fn, test_fn_to_study_ix)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pickle.dump(test_study_ix_to_fn, open(f\"data/{stage}_test_study_ix_to_fn.pickle\", \"wb\" ))\n",
+    "pickle.dump(test_fn_to_study_ix, open(f\"data/{stage}_test_fn_to_study_ix.pickle\", \"wb\" ))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "study_ix_to_fn = { **train_study_ix_to_fn, **test_study_ix_to_fn }\n",
+    "fn_to_study_ix = { **train_fn_to_study_ix, **test_fn_to_study_ix }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pickle.dump(study_ix_to_fn, open(f\"data/{stage}_study_ix_to_fn.pickle\", \"wb\" ))\n",
+    "pickle.dump(fn_to_study_ix, open(f\"data/{stage}_fn_to_study_ix.pickle\", \"wb\" ))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}