[fc9ccf]: / 1-preprocess-brain_norm.ipynb

Download this file

2077 lines (2076 with data), 77.2 kB

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Brain translational and rotational normalization\n",
    "* Zero z at the slice with max area\n",
    "* Use analytic minimization of moment of inertia to find orientation angle\n",
    "\n",
    "Generates:\n",
    "* `train_dicom_diags_norm.csv` and `test_dicom_norm.csv` with 5 new columns added: \n",
    "  * `normz` normalized z\n",
    "  * `xcm`, `ycm` center of mass\n",
    "  * `theta` (rads) the CW angle the slice must be rotated to straighten it\n",
    "  * `pct_tissue` percentage of brain tissue pixels in the radiography"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "stage = \"stage_2\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pydicom\n",
    "import math\n",
    "from pathlib import Path\n",
    "from fastai.vision import *\n",
    "from matplotlib import pyplot as plt\n",
    "from scipy import ndimage, misc\n",
    "from itertools import repeat\n",
    "import pandas as pd\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "tqdm().pandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def dcm_to_np(dcm):\n",
    "    ''' Dicom to numpy array\n",
    "    :param dcm: dicom object\n",
    "    '''\n",
    "    rescale_slope, rescale_intercept = float(dcm.RescaleSlope), float(dcm.RescaleIntercept)\n",
    "    t = dcm.pixel_array.astype(np.float)\n",
    "    t = t * rescale_slope + rescale_intercept # rescale\n",
    "    return t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_fid_with_max_area(dir, dfs):\n",
    "    ''' Given a single study return the fid (fileid) with biggest brain area\n",
    "    :param dir: datset directory\n",
    "    :param dfs: pandas dataframe with 1 study\n",
    "    '''\n",
    "    brain_area_vs_z = []\n",
    "    pcts_tissue = []\n",
    "    max_area = 0\n",
    "    for index, row in dfs.iterrows():\n",
    "        fid = row['SOPInstanceUID'] # don't do this inside f'' array or jupyter bugs out\n",
    "        try:\n",
    "            dcm = pydicom.dcmread(f\"{dir}/{fid}.dcm\")\n",
    "            t = dcm_to_np(dcm)\n",
    "        except Exception as e:\n",
    "            print(e,fid)\n",
    "        pts = np.argwhere(np.logical_and(0 < t, t < 100)) # select only brain matter\n",
    "        if len(pts) > max_area:\n",
    "            max_area = len(pts)\n",
    "            fid_max_area = row['SOPInstanceUID']\n",
    "        brain_area_vs_z.append([float(row['ImagePositionPatient_2']), len(pts)])\n",
    "        pcts_tissue.append(len(pts) / t.size)\n",
    "    return fid_max_area, pcts_tissue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_axis(brain):\n",
    "    ''' \n",
    "    Get orientation axis.\n",
    "    :param brain: numpy array (brain)\n",
    "    :return: center of masses (xcm, ycm), coefficients (m,  n) of the axis equation y=mx+n and angle (th)\n",
    "        the brain is rotated counter-clockwise (rotate clockwise to normalize)\n",
    "    '''\n",
    "    pts_org = np.argwhere(np.logical_and(0 < brain, brain < 100))\n",
    "\n",
    "    # Center of masses\n",
    "    xcm = pts_org[:,1].mean()\n",
    "    ycm = pts_org[:,0].mean()\n",
    "\n",
    "    # shift brain to center\n",
    "    xs = pts_org[:,1] - xcm\n",
    "    ys = pts_org[:,0] - ycm\n",
    "    \n",
    "    # coefficients of quad eq am^2+bm+c=0\n",
    "    a = (xs * ys).sum()\n",
    "    b = (xs ** 2 - ys ** 2).sum()\n",
    "    c = -a\n",
    "\n",
    "    # solve for m1 (max I), m2 (min I)\n",
    "    m1 = (-b + (b**2-4*a*c) ** (.5)) / (2 * a)\n",
    "    m2 = (-b - (b**2-4*a*c) ** (.5)) / (2 * a)\n",
    "    \n",
    "    # y-ycm = m*x - m*xcm -> y = m*x - m*xcm + ycm\n",
    "    n1 = ycm - m1 * xcm\n",
    "    n2 = ycm - m2 * xcm\n",
    "    \n",
    "    th = math.atan(m2)\n",
    "    \n",
    "    return xcm, ycm, m2, n2, th"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def norm_study(dir, group_name):\n",
    "    ''' Normalize study given by pandas DataFrame g\n",
    "    :param dir: dataset directory\n",
    "    :param g: single study to normalize\n",
    "    '''\n",
    "    global dfg\n",
    "    g = dfg.get_group(group_name)\n",
    "    fid_max_area, pcts_tissue = get_fid_with_max_area(dir, g)\n",
    "    f = f'{dir}/{fid_max_area}.dcm'\n",
    "    try:\n",
    "        dcm = pydicom.dcmread(str(f))\n",
    "    except Exceptione as e:\n",
    "        print(e,f)\n",
    "    brain = dcm_to_np(dcm)\n",
    "    xcm, ycm, m, n, th = get_axis(brain)\n",
    "    eg = g.copy()\n",
    "    z0 = g[g['SOPInstanceUID'] == fid_max_area].iloc[0]['ImagePositionPatient_2']\n",
    "    eg['normz'] = eg['ImagePositionPatient_2'] - z0\n",
    "    eg['xcm'] = xcm\n",
    "    eg['ycm'] = ycm\n",
    "    eg['theta'] = th\n",
    "    eg['pct_tissue'] = pcts_tissue\n",
    "    return eg"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(f'data/{stage}_train_dicom_diags.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfg = df.groupby('SeriesInstanceUID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "group_names = list(dfg.groups.keys())\n",
    "#group_names = group_names[:64] # test with small subset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4bf5a2291e32453ca38813214dbe1a11",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=21744), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "with ProcessPoolExecutor(max_workers=32) as e:\n",
    "     extended_df = pd.concat(\n",
    "         tqdm(e.map(norm_study, repeat(f'data/unzip/{stage}_train_images'), group_names), total=len(group_names)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>556142</th>\n",
       "      <th>561465</th>\n",
       "      <th>580918</th>\n",
       "      <th>584578</th>\n",
       "      <th>617029</th>\n",
       "      <th>617400</th>\n",
       "      <th>645424</th>\n",
       "      <th>661661</th>\n",
       "      <th>665938</th>\n",
       "      <th>748282</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <td>556142</td>\n",
       "      <td>561465</td>\n",
       "      <td>580918</td>\n",
       "      <td>584578</td>\n",
       "      <td>617029</td>\n",
       "      <td>617400</td>\n",
       "      <td>645424</td>\n",
       "      <td>661661</td>\n",
       "      <td>665938</td>\n",
       "      <td>748282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsAllocated</th>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsStored</th>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Columns</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HighBit</th>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_4</th>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "      <td>0.979925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_5</th>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "      <td>-0.199368</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_0</th>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_1</th>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "      <td>-122.491</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_2</th>\n",
       "      <td>166.528</td>\n",
       "      <td>156.323</td>\n",
       "      <td>176.733</td>\n",
       "      <td>49.171</td>\n",
       "      <td>89.991</td>\n",
       "      <td>186.938</td>\n",
       "      <td>125.708</td>\n",
       "      <td>146.118</td>\n",
       "      <td>74.6834</td>\n",
       "      <td>141.016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Modality</th>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PatientID</th>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "      <td>ID_160aea75</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PhotometricInterpretation</th>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelRepresentation</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_0</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_1</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleIntercept</th>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleSlope</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rows</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SOPInstanceUID</th>\n",
       "      <td>ID_bcdecac1b</td>\n",
       "      <td>ID_beb4aacbe</td>\n",
       "      <td>ID_c554939b0</td>\n",
       "      <td>ID_c6909d6fa</td>\n",
       "      <td>ID_d1ad467b0</td>\n",
       "      <td>ID_d1d25ea8c</td>\n",
       "      <td>ID_db59bb95e</td>\n",
       "      <td>ID_e0ed751c2</td>\n",
       "      <td>ID_e262d76e2</td>\n",
       "      <td>ID_fe77ec61b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SamplesPerPixel</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SeriesInstanceUID</th>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "      <td>ID_000a935543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyID</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyInstanceUID</th>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "      <td>ID_41d976e5d3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter</th>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth</th>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_1</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fid</th>\n",
       "      <td>ID_bcdecac1b</td>\n",
       "      <td>ID_beb4aacbe</td>\n",
       "      <td>ID_c554939b0</td>\n",
       "      <td>ID_c6909d6fa</td>\n",
       "      <td>ID_d1ad467b0</td>\n",
       "      <td>ID_d1d25ea8c</td>\n",
       "      <td>ID_db59bb95e</td>\n",
       "      <td>ID_e0ed751c2</td>\n",
       "      <td>ID_e262d76e2</td>\n",
       "      <td>ID_fe77ec61b</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>any</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>epidural</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>intraparenchymal</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>intraventricular</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>subarachnoid</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>subdural</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>normz</th>\n",
       "      <td>61.23</td>\n",
       "      <td>51.0251</td>\n",
       "      <td>71.4351</td>\n",
       "      <td>-56.1273</td>\n",
       "      <td>-15.3073</td>\n",
       "      <td>81.64</td>\n",
       "      <td>20.41</td>\n",
       "      <td>40.82</td>\n",
       "      <td>-30.6149</td>\n",
       "      <td>35.7176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>xcm</th>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "      <td>249.719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ycm</th>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "      <td>249.632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>theta</th>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "      <td>-0.0280377</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pct_tissue</th>\n",
       "      <td>0.140537</td>\n",
       "      <td>0.194378</td>\n",
       "      <td>0.0751762</td>\n",
       "      <td>0.170967</td>\n",
       "      <td>0.320599</td>\n",
       "      <td>0.0209999</td>\n",
       "      <td>0.305298</td>\n",
       "      <td>0.242409</td>\n",
       "      <td>0.247257</td>\n",
       "      <td>0.262268</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  556142         561465         580918  \\\n",
       "Unnamed: 0                        556142         561465         580918   \n",
       "BitsAllocated                         16             16             16   \n",
       "BitsStored                            16             16             16   \n",
       "Columns                              512            512            512   \n",
       "HighBit                               15             15             15   \n",
       "ImageOrientationPatient_0              1              1              1   \n",
       "ImageOrientationPatient_1              0              0              0   \n",
       "ImageOrientationPatient_2              0              0              0   \n",
       "ImageOrientationPatient_3              0              0              0   \n",
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
       "ImagePositionPatient_0              -125           -125           -125   \n",
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
       "ImagePositionPatient_2           166.528        156.323        176.733   \n",
       "Modality                              CT             CT             CT   \n",
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
       "PixelRepresentation                    1              1              1   \n",
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
       "RescaleIntercept                   -1024          -1024          -1024   \n",
       "RescaleSlope                           1              1              1   \n",
       "Rows                                 512            512            512   \n",
       "SOPInstanceUID              ID_bcdecac1b   ID_beb4aacbe   ID_c554939b0   \n",
       "SamplesPerPixel                        1              1              1   \n",
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
       "StudyID                              NaN            NaN            NaN   \n",
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
       "WindowCenter                          30             30             30   \n",
       "WindowCenter_0                       NaN            NaN            NaN   \n",
       "WindowCenter_1                       NaN            NaN            NaN   \n",
       "WindowWidth                           80             80             80   \n",
       "WindowWidth_0                        NaN            NaN            NaN   \n",
       "WindowWidth_1                        NaN            NaN            NaN   \n",
       "fid                         ID_bcdecac1b   ID_beb4aacbe   ID_c554939b0   \n",
       "any                                    0              0              0   \n",
       "epidural                               0              0              0   \n",
       "intraparenchymal                       0              0              0   \n",
       "intraventricular                       0              0              0   \n",
       "subarachnoid                           0              0              0   \n",
       "subdural                               0              0              0   \n",
       "normz                              61.23        51.0251        71.4351   \n",
       "xcm                              249.719        249.719        249.719   \n",
       "ycm                              249.632        249.632        249.632   \n",
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
       "pct_tissue                      0.140537       0.194378      0.0751762   \n",
       "\n",
       "                                  584578         617029         617400  \\\n",
       "Unnamed: 0                        584578         617029         617400   \n",
       "BitsAllocated                         16             16             16   \n",
       "BitsStored                            16             16             16   \n",
       "Columns                              512            512            512   \n",
       "HighBit                               15             15             15   \n",
       "ImageOrientationPatient_0              1              1              1   \n",
       "ImageOrientationPatient_1              0              0              0   \n",
       "ImageOrientationPatient_2              0              0              0   \n",
       "ImageOrientationPatient_3              0              0              0   \n",
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
       "ImagePositionPatient_0              -125           -125           -125   \n",
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
       "ImagePositionPatient_2            49.171         89.991        186.938   \n",
       "Modality                              CT             CT             CT   \n",
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
       "PixelRepresentation                    1              1              1   \n",
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
       "RescaleIntercept                   -1024          -1024          -1024   \n",
       "RescaleSlope                           1              1              1   \n",
       "Rows                                 512            512            512   \n",
       "SOPInstanceUID              ID_c6909d6fa   ID_d1ad467b0   ID_d1d25ea8c   \n",
       "SamplesPerPixel                        1              1              1   \n",
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
       "StudyID                              NaN            NaN            NaN   \n",
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
       "WindowCenter                          30             30             30   \n",
       "WindowCenter_0                       NaN            NaN            NaN   \n",
       "WindowCenter_1                       NaN            NaN            NaN   \n",
       "WindowWidth                           80             80             80   \n",
       "WindowWidth_0                        NaN            NaN            NaN   \n",
       "WindowWidth_1                        NaN            NaN            NaN   \n",
       "fid                         ID_c6909d6fa   ID_d1ad467b0   ID_d1d25ea8c   \n",
       "any                                    0              0              0   \n",
       "epidural                               0              0              0   \n",
       "intraparenchymal                       0              0              0   \n",
       "intraventricular                       0              0              0   \n",
       "subarachnoid                           0              0              0   \n",
       "subdural                               0              0              0   \n",
       "normz                           -56.1273       -15.3073          81.64   \n",
       "xcm                              249.719        249.719        249.719   \n",
       "ycm                              249.632        249.632        249.632   \n",
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
       "pct_tissue                      0.170967       0.320599      0.0209999   \n",
       "\n",
       "                                  645424         661661         665938  \\\n",
       "Unnamed: 0                        645424         661661         665938   \n",
       "BitsAllocated                         16             16             16   \n",
       "BitsStored                            16             16             16   \n",
       "Columns                              512            512            512   \n",
       "HighBit                               15             15             15   \n",
       "ImageOrientationPatient_0              1              1              1   \n",
       "ImageOrientationPatient_1              0              0              0   \n",
       "ImageOrientationPatient_2              0              0              0   \n",
       "ImageOrientationPatient_3              0              0              0   \n",
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
       "ImagePositionPatient_0              -125           -125           -125   \n",
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
       "ImagePositionPatient_2           125.708        146.118        74.6834   \n",
       "Modality                              CT             CT             CT   \n",
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
       "PixelRepresentation                    1              1              1   \n",
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
       "RescaleIntercept                   -1024          -1024          -1024   \n",
       "RescaleSlope                           1              1              1   \n",
       "Rows                                 512            512            512   \n",
       "SOPInstanceUID              ID_db59bb95e   ID_e0ed751c2   ID_e262d76e2   \n",
       "SamplesPerPixel                        1              1              1   \n",
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
       "StudyID                              NaN            NaN            NaN   \n",
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
       "WindowCenter                          30             30             30   \n",
       "WindowCenter_0                       NaN            NaN            NaN   \n",
       "WindowCenter_1                       NaN            NaN            NaN   \n",
       "WindowWidth                           80             80             80   \n",
       "WindowWidth_0                        NaN            NaN            NaN   \n",
       "WindowWidth_1                        NaN            NaN            NaN   \n",
       "fid                         ID_db59bb95e   ID_e0ed751c2   ID_e262d76e2   \n",
       "any                                    0              0              0   \n",
       "epidural                               0              0              0   \n",
       "intraparenchymal                       0              0              0   \n",
       "intraventricular                       0              0              0   \n",
       "subarachnoid                           0              0              0   \n",
       "subdural                               0              0              0   \n",
       "normz                              20.41          40.82       -30.6149   \n",
       "xcm                              249.719        249.719        249.719   \n",
       "ycm                              249.632        249.632        249.632   \n",
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
       "pct_tissue                      0.305298       0.242409       0.247257   \n",
       "\n",
       "                                  748282  \n",
       "Unnamed: 0                        748282  \n",
       "BitsAllocated                         16  \n",
       "BitsStored                            16  \n",
       "Columns                              512  \n",
       "HighBit                               15  \n",
       "ImageOrientationPatient_0              1  \n",
       "ImageOrientationPatient_1              0  \n",
       "ImageOrientationPatient_2              0  \n",
       "ImageOrientationPatient_3              0  \n",
       "ImageOrientationPatient_4       0.979925  \n",
       "ImageOrientationPatient_5      -0.199368  \n",
       "ImagePositionPatient_0              -125  \n",
       "ImagePositionPatient_1          -122.491  \n",
       "ImagePositionPatient_2           141.016  \n",
       "Modality                              CT  \n",
       "PatientID                    ID_160aea75  \n",
       "PhotometricInterpretation    MONOCHROME2  \n",
       "PixelRepresentation                    1  \n",
       "PixelSpacing_0                  0.488281  \n",
       "PixelSpacing_1                  0.488281  \n",
       "RescaleIntercept                   -1024  \n",
       "RescaleSlope                           1  \n",
       "Rows                                 512  \n",
       "SOPInstanceUID              ID_fe77ec61b  \n",
       "SamplesPerPixel                        1  \n",
       "SeriesInstanceUID          ID_000a935543  \n",
       "StudyID                              NaN  \n",
       "StudyInstanceUID           ID_41d976e5d3  \n",
       "WindowCenter                          30  \n",
       "WindowCenter_0                       NaN  \n",
       "WindowCenter_1                       NaN  \n",
       "WindowWidth                           80  \n",
       "WindowWidth_0                        NaN  \n",
       "WindowWidth_1                        NaN  \n",
       "fid                         ID_fe77ec61b  \n",
       "any                                    0  \n",
       "epidural                               0  \n",
       "intraparenchymal                       0  \n",
       "intraventricular                       0  \n",
       "subarachnoid                           0  \n",
       "subdural                               0  \n",
       "normz                            35.7176  \n",
       "xcm                              249.719  \n",
       "ycm                              249.632  \n",
       "theta                         -0.0280377  \n",
       "pct_tissue                      0.262268  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extended_df[90:100].T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "752802"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "assert len(df) == len(extended_df)\n",
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "extended_df.to_csv(f'data/{stage}_train_dicom_diags_norm.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(f'data/{stage}_test_dicom.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfg = df.groupby('SeriesInstanceUID')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3518"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group_names = list(dfg.groups.keys())\n",
    "len(group_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsAllocated</th>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsStored</th>\n",
       "      <td>12</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Columns</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HighBit</th>\n",
       "      <td>11</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>15</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_4</th>\n",
       "      <td>0.981627</td>\n",
       "      <td>0.987688</td>\n",
       "      <td>0.927184</td>\n",
       "      <td>0.986286</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_5</th>\n",
       "      <td>-0.190809</td>\n",
       "      <td>-0.156434</td>\n",
       "      <td>-0.374607</td>\n",
       "      <td>-0.165048</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_0</th>\n",
       "      <td>-112</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "      <td>-125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_1</th>\n",
       "      <td>-9.80803</td>\n",
       "      <td>-127.061</td>\n",
       "      <td>-119.998</td>\n",
       "      <td>-120.286</td>\n",
       "      <td>-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_2</th>\n",
       "      <td>267.219</td>\n",
       "      <td>197.807</td>\n",
       "      <td>78.6604</td>\n",
       "      <td>131.737</td>\n",
       "      <td>98.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Modality</th>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PatientID</th>\n",
       "      <td>ID_52c1ab5a</td>\n",
       "      <td>ID_39e44bce</td>\n",
       "      <td>ID_6f87831a</td>\n",
       "      <td>ID_7a7c9c9e</td>\n",
       "      <td>ID_ab5e477f</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PhotometricInterpretation</th>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelRepresentation</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_0</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_1</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleIntercept</th>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleSlope</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rows</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SOPInstanceUID</th>\n",
       "      <td>ID_000000e27</td>\n",
       "      <td>ID_000009146</td>\n",
       "      <td>ID_00007b8cb</td>\n",
       "      <td>ID_000134952</td>\n",
       "      <td>ID_000176f2a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SamplesPerPixel</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SeriesInstanceUID</th>\n",
       "      <td>ID_4d28912ba6</td>\n",
       "      <td>ID_acabdeee86</td>\n",
       "      <td>ID_d00cee7f0c</td>\n",
       "      <td>ID_a52a0112d5</td>\n",
       "      <td>ID_f552d3b922</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyID</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyInstanceUID</th>\n",
       "      <td>ID_1f6d1e8aeb</td>\n",
       "      <td>ID_4a8d7ec19f</td>\n",
       "      <td>ID_a6ca244172</td>\n",
       "      <td>ID_fa950a03af</td>\n",
       "      <td>ID_965d8b3d8e</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter</th>\n",
       "      <td>NaN</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>30</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_0</th>\n",
       "      <td>40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_1</th>\n",
       "      <td>40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth</th>\n",
       "      <td>NaN</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_0</th>\n",
       "      <td>80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_1</th>\n",
       "      <td>80</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       0              1              2  \\\n",
       "Unnamed: 0                             0              1              2   \n",
       "BitsAllocated                         16             16             16   \n",
       "BitsStored                            12             16             16   \n",
       "Columns                              512            512            512   \n",
       "HighBit                               11             15             15   \n",
       "ImageOrientationPatient_0              1              1              1   \n",
       "ImageOrientationPatient_1              0              0              0   \n",
       "ImageOrientationPatient_2              0              0              0   \n",
       "ImageOrientationPatient_3              0              0              0   \n",
       "ImageOrientationPatient_4       0.981627       0.987688       0.927184   \n",
       "ImageOrientationPatient_5      -0.190809      -0.156434      -0.374607   \n",
       "ImagePositionPatient_0              -112           -125           -125   \n",
       "ImagePositionPatient_1          -9.80803       -127.061       -119.998   \n",
       "ImagePositionPatient_2           267.219        197.807        78.6604   \n",
       "Modality                              CT             CT             CT   \n",
       "PatientID                    ID_52c1ab5a    ID_39e44bce    ID_6f87831a   \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
       "PixelRepresentation                    0              1              1   \n",
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
       "RescaleIntercept                   -1024          -1024          -1024   \n",
       "RescaleSlope                           1              1              1   \n",
       "Rows                                 512            512            512   \n",
       "SOPInstanceUID              ID_000000e27   ID_000009146   ID_00007b8cb   \n",
       "SamplesPerPixel                        1              1              1   \n",
       "SeriesInstanceUID          ID_4d28912ba6  ID_acabdeee86  ID_d00cee7f0c   \n",
       "StudyID                              NaN            NaN            NaN   \n",
       "StudyInstanceUID           ID_1f6d1e8aeb  ID_4a8d7ec19f  ID_a6ca244172   \n",
       "WindowCenter                         NaN             30             30   \n",
       "WindowCenter_0                        40            NaN            NaN   \n",
       "WindowCenter_1                        40            NaN            NaN   \n",
       "WindowWidth                          NaN             80             80   \n",
       "WindowWidth_0                         80            NaN            NaN   \n",
       "WindowWidth_1                         80            NaN            NaN   \n",
       "\n",
       "                                       3              4  \n",
       "Unnamed: 0                             3              4  \n",
       "BitsAllocated                         16             16  \n",
       "BitsStored                            16             12  \n",
       "Columns                              512            512  \n",
       "HighBit                               15             11  \n",
       "ImageOrientationPatient_0              1              1  \n",
       "ImageOrientationPatient_1              0              0  \n",
       "ImageOrientationPatient_2              0              0  \n",
       "ImageOrientationPatient_3              0              0  \n",
       "ImageOrientationPatient_4       0.986286              1  \n",
       "ImageOrientationPatient_5      -0.165048              0  \n",
       "ImagePositionPatient_0              -125           -125  \n",
       "ImagePositionPatient_1          -120.286            -16  \n",
       "ImagePositionPatient_2           131.737           98.2  \n",
       "Modality                              CT             CT  \n",
       "PatientID                    ID_7a7c9c9e    ID_ab5e477f  \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2  \n",
       "PixelRepresentation                    1              0  \n",
       "PixelSpacing_0                  0.488281       0.488281  \n",
       "PixelSpacing_1                  0.488281       0.488281  \n",
       "RescaleIntercept                   -1024          -1024  \n",
       "RescaleSlope                           1              1  \n",
       "Rows                                 512            512  \n",
       "SOPInstanceUID              ID_000134952   ID_000176f2a  \n",
       "SamplesPerPixel                        1              1  \n",
       "SeriesInstanceUID          ID_a52a0112d5  ID_f552d3b922  \n",
       "StudyID                              NaN            NaN  \n",
       "StudyInstanceUID           ID_fa950a03af  ID_965d8b3d8e  \n",
       "WindowCenter                          30            NaN  \n",
       "WindowCenter_0                       NaN             36  \n",
       "WindowCenter_1                       NaN             36  \n",
       "WindowWidth                           80            NaN  \n",
       "WindowWidth_0                        NaN             80  \n",
       "WindowWidth_1                        NaN             80  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cce4c6aa74ec4e65adc8cafc672f8ba1",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=3518), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "with ProcessPoolExecutor(max_workers=32) as e:\n",
    "     extended_df = pd.concat(tqdm(\n",
    "         e.map(norm_study, repeat(f'data/unzip/{stage}_test_images'), group_names), total=len(group_names)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>13820</th>\n",
       "      <th>18901</th>\n",
       "      <th>20132</th>\n",
       "      <th>24764</th>\n",
       "      <th>36224</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <td>13820</td>\n",
       "      <td>18901</td>\n",
       "      <td>20132</td>\n",
       "      <td>24764</td>\n",
       "      <td>36224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsAllocated</th>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BitsStored</th>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Columns</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HighBit</th>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_1</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_3</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_4</th>\n",
       "      <td>0.939693</td>\n",
       "      <td>0.939693</td>\n",
       "      <td>0.939693</td>\n",
       "      <td>0.939693</td>\n",
       "      <td>0.939693</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImageOrientationPatient_5</th>\n",
       "      <td>-0.34202</td>\n",
       "      <td>-0.34202</td>\n",
       "      <td>-0.34202</td>\n",
       "      <td>-0.34202</td>\n",
       "      <td>-0.34202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_0</th>\n",
       "      <td>-167</td>\n",
       "      <td>-167</td>\n",
       "      <td>-167</td>\n",
       "      <td>-167</td>\n",
       "      <td>-167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_1</th>\n",
       "      <td>17.8026</td>\n",
       "      <td>17.8026</td>\n",
       "      <td>17.8026</td>\n",
       "      <td>17.8026</td>\n",
       "      <td>17.8026</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ImagePositionPatient_2</th>\n",
       "      <td>340.091</td>\n",
       "      <td>302.953</td>\n",
       "      <td>324.153</td>\n",
       "      <td>387.753</td>\n",
       "      <td>265.891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Modality</th>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "      <td>CT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PatientID</th>\n",
       "      <td>ID_f997418a</td>\n",
       "      <td>ID_f997418a</td>\n",
       "      <td>ID_f997418a</td>\n",
       "      <td>ID_f997418a</td>\n",
       "      <td>ID_f997418a</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PhotometricInterpretation</th>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "      <td>MONOCHROME2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelRepresentation</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_0</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PixelSpacing_1</th>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "      <td>0.488281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleIntercept</th>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "      <td>-1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RescaleSlope</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rows</th>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "      <td>512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SOPInstanceUID</th>\n",
       "      <td>ID_1cf8d2973</td>\n",
       "      <td>ID_2775d5917</td>\n",
       "      <td>ID_29fb61fbb</td>\n",
       "      <td>ID_33ba827f2</td>\n",
       "      <td>ID_4c1ea3745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SamplesPerPixel</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SeriesInstanceUID</th>\n",
       "      <td>ID_0018be306d</td>\n",
       "      <td>ID_0018be306d</td>\n",
       "      <td>ID_0018be306d</td>\n",
       "      <td>ID_0018be306d</td>\n",
       "      <td>ID_0018be306d</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyID</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>StudyInstanceUID</th>\n",
       "      <td>ID_16aac16e79</td>\n",
       "      <td>ID_16aac16e79</td>\n",
       "      <td>ID_16aac16e79</td>\n",
       "      <td>ID_16aac16e79</td>\n",
       "      <td>ID_16aac16e79</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_0</th>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowCenter_1</th>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_0</th>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WindowWidth_1</th>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "      <td>80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>normz</th>\n",
       "      <td>37.1382</td>\n",
       "      <td>0</td>\n",
       "      <td>21.2001</td>\n",
       "      <td>84.8</td>\n",
       "      <td>-37.0619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>xcm</th>\n",
       "      <td>255.127</td>\n",
       "      <td>255.127</td>\n",
       "      <td>255.127</td>\n",
       "      <td>255.127</td>\n",
       "      <td>255.127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ycm</th>\n",
       "      <td>241.378</td>\n",
       "      <td>241.378</td>\n",
       "      <td>241.378</td>\n",
       "      <td>241.378</td>\n",
       "      <td>241.378</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>theta</th>\n",
       "      <td>0.132157</td>\n",
       "      <td>0.132157</td>\n",
       "      <td>0.132157</td>\n",
       "      <td>0.132157</td>\n",
       "      <td>0.132157</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pct_tissue</th>\n",
       "      <td>0.191586</td>\n",
       "      <td>0.271954</td>\n",
       "      <td>0.242237</td>\n",
       "      <td>0.00454712</td>\n",
       "      <td>0.143639</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   13820          18901          20132  \\\n",
       "Unnamed: 0                         13820          18901          20132   \n",
       "BitsAllocated                         16             16             16   \n",
       "BitsStored                            12             12             12   \n",
       "Columns                              512            512            512   \n",
       "HighBit                               11             11             11   \n",
       "ImageOrientationPatient_0              1              1              1   \n",
       "ImageOrientationPatient_1              0              0              0   \n",
       "ImageOrientationPatient_2              0              0              0   \n",
       "ImageOrientationPatient_3              0              0              0   \n",
       "ImageOrientationPatient_4       0.939693       0.939693       0.939693   \n",
       "ImageOrientationPatient_5       -0.34202       -0.34202       -0.34202   \n",
       "ImagePositionPatient_0              -167           -167           -167   \n",
       "ImagePositionPatient_1           17.8026        17.8026        17.8026   \n",
       "ImagePositionPatient_2           340.091        302.953        324.153   \n",
       "Modality                              CT             CT             CT   \n",
       "PatientID                    ID_f997418a    ID_f997418a    ID_f997418a   \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
       "PixelRepresentation                    0              0              0   \n",
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
       "RescaleIntercept                   -1024          -1024          -1024   \n",
       "RescaleSlope                           1              1              1   \n",
       "Rows                                 512            512            512   \n",
       "SOPInstanceUID              ID_1cf8d2973   ID_2775d5917   ID_29fb61fbb   \n",
       "SamplesPerPixel                        1              1              1   \n",
       "SeriesInstanceUID          ID_0018be306d  ID_0018be306d  ID_0018be306d   \n",
       "StudyID                              NaN            NaN            NaN   \n",
       "StudyInstanceUID           ID_16aac16e79  ID_16aac16e79  ID_16aac16e79   \n",
       "WindowCenter                         NaN            NaN            NaN   \n",
       "WindowCenter_0                        40             40             40   \n",
       "WindowCenter_1                        40             40             40   \n",
       "WindowWidth                          NaN            NaN            NaN   \n",
       "WindowWidth_0                         80             80             80   \n",
       "WindowWidth_1                         80             80             80   \n",
       "normz                            37.1382              0        21.2001   \n",
       "xcm                              255.127        255.127        255.127   \n",
       "ycm                              241.378        241.378        241.378   \n",
       "theta                           0.132157       0.132157       0.132157   \n",
       "pct_tissue                      0.191586       0.271954       0.242237   \n",
       "\n",
       "                                   24764          36224  \n",
       "Unnamed: 0                         24764          36224  \n",
       "BitsAllocated                         16             16  \n",
       "BitsStored                            12             12  \n",
       "Columns                              512            512  \n",
       "HighBit                               11             11  \n",
       "ImageOrientationPatient_0              1              1  \n",
       "ImageOrientationPatient_1              0              0  \n",
       "ImageOrientationPatient_2              0              0  \n",
       "ImageOrientationPatient_3              0              0  \n",
       "ImageOrientationPatient_4       0.939693       0.939693  \n",
       "ImageOrientationPatient_5       -0.34202       -0.34202  \n",
       "ImagePositionPatient_0              -167           -167  \n",
       "ImagePositionPatient_1           17.8026        17.8026  \n",
       "ImagePositionPatient_2           387.753        265.891  \n",
       "Modality                              CT             CT  \n",
       "PatientID                    ID_f997418a    ID_f997418a  \n",
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2  \n",
       "PixelRepresentation                    0              0  \n",
       "PixelSpacing_0                  0.488281       0.488281  \n",
       "PixelSpacing_1                  0.488281       0.488281  \n",
       "RescaleIntercept                   -1024          -1024  \n",
       "RescaleSlope                           1              1  \n",
       "Rows                                 512            512  \n",
       "SOPInstanceUID              ID_33ba827f2   ID_4c1ea3745  \n",
       "SamplesPerPixel                        1              1  \n",
       "SeriesInstanceUID          ID_0018be306d  ID_0018be306d  \n",
       "StudyID                              NaN            NaN  \n",
       "StudyInstanceUID           ID_16aac16e79  ID_16aac16e79  \n",
       "WindowCenter                         NaN            NaN  \n",
       "WindowCenter_0                        40             40  \n",
       "WindowCenter_1                        40             40  \n",
       "WindowWidth                          NaN            NaN  \n",
       "WindowWidth_0                         80             80  \n",
       "WindowWidth_1                         80             80  \n",
       "normz                               84.8       -37.0619  \n",
       "xcm                              255.127        255.127  \n",
       "ycm                              241.378        241.378  \n",
       "theta                           0.132157       0.132157  \n",
       "pct_tissue                    0.00454712       0.143639  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "extended_df.head().T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "121232"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "assert len(df) == len(extended_df)\n",
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "extended_df.to_csv(f'data/{stage}_test_dicom_norm.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}