2077 lines (2076 with data), 77.2 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Brain translational and rotational normalization\n",
"* Zero z at the slice with max area\n",
"* Use analytic minimization of moment of inertia to find orientation angle\n",
"\n",
"Generates:\n",
"* `train_dicom_diags_norm.csv` and `test_dicom_norm.csv` with 5 new columns added: \n",
" * `normz` normalized z\n",
" * `xcm`, `ycm` center of mass\n",
" * `theta` (rads) the CW angle the slice must be rotated to straighten it\n",
" * `pct_tissue` percentage of brain tissue pixels in the radiography"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"stage = \"stage_2\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pydicom\n",
"import math\n",
"from pathlib import Path\n",
"from fastai.vision import *\n",
"from matplotlib import pyplot as plt\n",
"from scipy import ndimage, misc\n",
"from itertools import repeat\n",
"import pandas as pd\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"from tqdm import tqdm_notebook as tqdm\n",
"tqdm().pandas()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def dcm_to_np(dcm):\n",
" ''' Dicom to numpy array\n",
" :param dcm: dicom object\n",
" '''\n",
" rescale_slope, rescale_intercept = float(dcm.RescaleSlope), float(dcm.RescaleIntercept)\n",
" t = dcm.pixel_array.astype(np.float)\n",
" t = t * rescale_slope + rescale_intercept # rescale\n",
" return t"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def get_fid_with_max_area(dir, dfs):\n",
" ''' Given a single study return the fid (fileid) with biggest brain area\n",
" :param dir: datset directory\n",
" :param dfs: pandas dataframe with 1 study\n",
" '''\n",
" brain_area_vs_z = []\n",
" pcts_tissue = []\n",
" max_area = 0\n",
" for index, row in dfs.iterrows():\n",
" fid = row['SOPInstanceUID'] # don't do this inside f'' array or jupyter bugs out\n",
" try:\n",
" dcm = pydicom.dcmread(f\"{dir}/{fid}.dcm\")\n",
" t = dcm_to_np(dcm)\n",
" except Exception as e:\n",
" print(e,fid)\n",
" pts = np.argwhere(np.logical_and(0 < t, t < 100)) # select only brain matter\n",
" if len(pts) > max_area:\n",
" max_area = len(pts)\n",
" fid_max_area = row['SOPInstanceUID']\n",
" brain_area_vs_z.append([float(row['ImagePositionPatient_2']), len(pts)])\n",
" pcts_tissue.append(len(pts) / t.size)\n",
" return fid_max_area, pcts_tissue"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def get_axis(brain):\n",
" ''' \n",
" Get orientation axis.\n",
" :param brain: numpy array (brain)\n",
" :return: center of masses (xcm, ycm), coefficients (m, n) of the axis equation y=mx+n and angle (th)\n",
" the brain is rotated counter-clockwise (rotate clockwise to normalize)\n",
" '''\n",
" pts_org = np.argwhere(np.logical_and(0 < brain, brain < 100))\n",
"\n",
" # Center of masses\n",
" xcm = pts_org[:,1].mean()\n",
" ycm = pts_org[:,0].mean()\n",
"\n",
" # shift brain to center\n",
" xs = pts_org[:,1] - xcm\n",
" ys = pts_org[:,0] - ycm\n",
" \n",
" # coefficients of quad eq am^2+bm+c=0\n",
" a = (xs * ys).sum()\n",
" b = (xs ** 2 - ys ** 2).sum()\n",
" c = -a\n",
"\n",
" # solve for m1 (max I), m2 (min I)\n",
" m1 = (-b + (b**2-4*a*c) ** (.5)) / (2 * a)\n",
" m2 = (-b - (b**2-4*a*c) ** (.5)) / (2 * a)\n",
" \n",
" # y-ycm = m*x - m*xcm -> y = m*x - m*xcm + ycm\n",
" n1 = ycm - m1 * xcm\n",
" n2 = ycm - m2 * xcm\n",
" \n",
" th = math.atan(m2)\n",
" \n",
" return xcm, ycm, m2, n2, th"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def norm_study(dir, group_name):\n",
" ''' Normalize study given by pandas DataFrame g\n",
" :param dir: dataset directory\n",
" :param g: single study to normalize\n",
" '''\n",
" global dfg\n",
" g = dfg.get_group(group_name)\n",
" fid_max_area, pcts_tissue = get_fid_with_max_area(dir, g)\n",
" f = f'{dir}/{fid_max_area}.dcm'\n",
" try:\n",
" dcm = pydicom.dcmread(str(f))\n",
" except Exceptione as e:\n",
" print(e,f)\n",
" brain = dcm_to_np(dcm)\n",
" xcm, ycm, m, n, th = get_axis(brain)\n",
" eg = g.copy()\n",
" z0 = g[g['SOPInstanceUID'] == fid_max_area].iloc[0]['ImagePositionPatient_2']\n",
" eg['normz'] = eg['ImagePositionPatient_2'] - z0\n",
" eg['xcm'] = xcm\n",
" eg['ycm'] = ycm\n",
" eg['theta'] = th\n",
" eg['pct_tissue'] = pcts_tissue\n",
" return eg"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Train dataset"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(f'data/{stage}_train_dicom_diags.csv')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"dfg = df.groupby('SeriesInstanceUID')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"group_names = list(dfg.groups.keys())\n",
"#group_names = group_names[:64] # test with small subset"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "4bf5a2291e32453ca38813214dbe1a11",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=21744), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"with ProcessPoolExecutor(max_workers=32) as e:\n",
" extended_df = pd.concat(\n",
" tqdm(e.map(norm_study, repeat(f'data/unzip/{stage}_train_images'), group_names), total=len(group_names)))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>556142</th>\n",
" <th>561465</th>\n",
" <th>580918</th>\n",
" <th>584578</th>\n",
" <th>617029</th>\n",
" <th>617400</th>\n",
" <th>645424</th>\n",
" <th>661661</th>\n",
" <th>665938</th>\n",
" <th>748282</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Unnamed: 0</th>\n",
" <td>556142</td>\n",
" <td>561465</td>\n",
" <td>580918</td>\n",
" <td>584578</td>\n",
" <td>617029</td>\n",
" <td>617400</td>\n",
" <td>645424</td>\n",
" <td>661661</td>\n",
" <td>665938</td>\n",
" <td>748282</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsAllocated</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsStored</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Columns</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HighBit</th>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_4</th>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" <td>0.979925</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_5</th>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" <td>-0.199368</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_0</th>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_1</th>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" <td>-122.491</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_2</th>\n",
" <td>166.528</td>\n",
" <td>156.323</td>\n",
" <td>176.733</td>\n",
" <td>49.171</td>\n",
" <td>89.991</td>\n",
" <td>186.938</td>\n",
" <td>125.708</td>\n",
" <td>146.118</td>\n",
" <td>74.6834</td>\n",
" <td>141.016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Modality</th>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PatientID</th>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" <td>ID_160aea75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PhotometricInterpretation</th>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelRepresentation</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_0</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_1</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleIntercept</th>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleSlope</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rows</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SOPInstanceUID</th>\n",
" <td>ID_bcdecac1b</td>\n",
" <td>ID_beb4aacbe</td>\n",
" <td>ID_c554939b0</td>\n",
" <td>ID_c6909d6fa</td>\n",
" <td>ID_d1ad467b0</td>\n",
" <td>ID_d1d25ea8c</td>\n",
" <td>ID_db59bb95e</td>\n",
" <td>ID_e0ed751c2</td>\n",
" <td>ID_e262d76e2</td>\n",
" <td>ID_fe77ec61b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SamplesPerPixel</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SeriesInstanceUID</th>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" <td>ID_000a935543</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyID</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyInstanceUID</th>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" <td>ID_41d976e5d3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter</th>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_0</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_1</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth</th>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_0</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_1</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fid</th>\n",
" <td>ID_bcdecac1b</td>\n",
" <td>ID_beb4aacbe</td>\n",
" <td>ID_c554939b0</td>\n",
" <td>ID_c6909d6fa</td>\n",
" <td>ID_d1ad467b0</td>\n",
" <td>ID_d1d25ea8c</td>\n",
" <td>ID_db59bb95e</td>\n",
" <td>ID_e0ed751c2</td>\n",
" <td>ID_e262d76e2</td>\n",
" <td>ID_fe77ec61b</td>\n",
" </tr>\n",
" <tr>\n",
" <th>any</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>epidural</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intraparenchymal</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>intraventricular</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>subarachnoid</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>subdural</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normz</th>\n",
" <td>61.23</td>\n",
" <td>51.0251</td>\n",
" <td>71.4351</td>\n",
" <td>-56.1273</td>\n",
" <td>-15.3073</td>\n",
" <td>81.64</td>\n",
" <td>20.41</td>\n",
" <td>40.82</td>\n",
" <td>-30.6149</td>\n",
" <td>35.7176</td>\n",
" </tr>\n",
" <tr>\n",
" <th>xcm</th>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" <td>249.719</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ycm</th>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" <td>249.632</td>\n",
" </tr>\n",
" <tr>\n",
" <th>theta</th>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" <td>-0.0280377</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pct_tissue</th>\n",
" <td>0.140537</td>\n",
" <td>0.194378</td>\n",
" <td>0.0751762</td>\n",
" <td>0.170967</td>\n",
" <td>0.320599</td>\n",
" <td>0.0209999</td>\n",
" <td>0.305298</td>\n",
" <td>0.242409</td>\n",
" <td>0.247257</td>\n",
" <td>0.262268</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 556142 561465 580918 \\\n",
"Unnamed: 0 556142 561465 580918 \n",
"BitsAllocated 16 16 16 \n",
"BitsStored 16 16 16 \n",
"Columns 512 512 512 \n",
"HighBit 15 15 15 \n",
"ImageOrientationPatient_0 1 1 1 \n",
"ImageOrientationPatient_1 0 0 0 \n",
"ImageOrientationPatient_2 0 0 0 \n",
"ImageOrientationPatient_3 0 0 0 \n",
"ImageOrientationPatient_4 0.979925 0.979925 0.979925 \n",
"ImageOrientationPatient_5 -0.199368 -0.199368 -0.199368 \n",
"ImagePositionPatient_0 -125 -125 -125 \n",
"ImagePositionPatient_1 -122.491 -122.491 -122.491 \n",
"ImagePositionPatient_2 166.528 156.323 176.733 \n",
"Modality CT CT CT \n",
"PatientID ID_160aea75 ID_160aea75 ID_160aea75 \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 1 1 1 \n",
"PixelSpacing_0 0.488281 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 -1024 \n",
"RescaleSlope 1 1 1 \n",
"Rows 512 512 512 \n",
"SOPInstanceUID ID_bcdecac1b ID_beb4aacbe ID_c554939b0 \n",
"SamplesPerPixel 1 1 1 \n",
"SeriesInstanceUID ID_000a935543 ID_000a935543 ID_000a935543 \n",
"StudyID NaN NaN NaN \n",
"StudyInstanceUID ID_41d976e5d3 ID_41d976e5d3 ID_41d976e5d3 \n",
"WindowCenter 30 30 30 \n",
"WindowCenter_0 NaN NaN NaN \n",
"WindowCenter_1 NaN NaN NaN \n",
"WindowWidth 80 80 80 \n",
"WindowWidth_0 NaN NaN NaN \n",
"WindowWidth_1 NaN NaN NaN \n",
"fid ID_bcdecac1b ID_beb4aacbe ID_c554939b0 \n",
"any 0 0 0 \n",
"epidural 0 0 0 \n",
"intraparenchymal 0 0 0 \n",
"intraventricular 0 0 0 \n",
"subarachnoid 0 0 0 \n",
"subdural 0 0 0 \n",
"normz 61.23 51.0251 71.4351 \n",
"xcm 249.719 249.719 249.719 \n",
"ycm 249.632 249.632 249.632 \n",
"theta -0.0280377 -0.0280377 -0.0280377 \n",
"pct_tissue 0.140537 0.194378 0.0751762 \n",
"\n",
" 584578 617029 617400 \\\n",
"Unnamed: 0 584578 617029 617400 \n",
"BitsAllocated 16 16 16 \n",
"BitsStored 16 16 16 \n",
"Columns 512 512 512 \n",
"HighBit 15 15 15 \n",
"ImageOrientationPatient_0 1 1 1 \n",
"ImageOrientationPatient_1 0 0 0 \n",
"ImageOrientationPatient_2 0 0 0 \n",
"ImageOrientationPatient_3 0 0 0 \n",
"ImageOrientationPatient_4 0.979925 0.979925 0.979925 \n",
"ImageOrientationPatient_5 -0.199368 -0.199368 -0.199368 \n",
"ImagePositionPatient_0 -125 -125 -125 \n",
"ImagePositionPatient_1 -122.491 -122.491 -122.491 \n",
"ImagePositionPatient_2 49.171 89.991 186.938 \n",
"Modality CT CT CT \n",
"PatientID ID_160aea75 ID_160aea75 ID_160aea75 \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 1 1 1 \n",
"PixelSpacing_0 0.488281 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 -1024 \n",
"RescaleSlope 1 1 1 \n",
"Rows 512 512 512 \n",
"SOPInstanceUID ID_c6909d6fa ID_d1ad467b0 ID_d1d25ea8c \n",
"SamplesPerPixel 1 1 1 \n",
"SeriesInstanceUID ID_000a935543 ID_000a935543 ID_000a935543 \n",
"StudyID NaN NaN NaN \n",
"StudyInstanceUID ID_41d976e5d3 ID_41d976e5d3 ID_41d976e5d3 \n",
"WindowCenter 30 30 30 \n",
"WindowCenter_0 NaN NaN NaN \n",
"WindowCenter_1 NaN NaN NaN \n",
"WindowWidth 80 80 80 \n",
"WindowWidth_0 NaN NaN NaN \n",
"WindowWidth_1 NaN NaN NaN \n",
"fid ID_c6909d6fa ID_d1ad467b0 ID_d1d25ea8c \n",
"any 0 0 0 \n",
"epidural 0 0 0 \n",
"intraparenchymal 0 0 0 \n",
"intraventricular 0 0 0 \n",
"subarachnoid 0 0 0 \n",
"subdural 0 0 0 \n",
"normz -56.1273 -15.3073 81.64 \n",
"xcm 249.719 249.719 249.719 \n",
"ycm 249.632 249.632 249.632 \n",
"theta -0.0280377 -0.0280377 -0.0280377 \n",
"pct_tissue 0.170967 0.320599 0.0209999 \n",
"\n",
" 645424 661661 665938 \\\n",
"Unnamed: 0 645424 661661 665938 \n",
"BitsAllocated 16 16 16 \n",
"BitsStored 16 16 16 \n",
"Columns 512 512 512 \n",
"HighBit 15 15 15 \n",
"ImageOrientationPatient_0 1 1 1 \n",
"ImageOrientationPatient_1 0 0 0 \n",
"ImageOrientationPatient_2 0 0 0 \n",
"ImageOrientationPatient_3 0 0 0 \n",
"ImageOrientationPatient_4 0.979925 0.979925 0.979925 \n",
"ImageOrientationPatient_5 -0.199368 -0.199368 -0.199368 \n",
"ImagePositionPatient_0 -125 -125 -125 \n",
"ImagePositionPatient_1 -122.491 -122.491 -122.491 \n",
"ImagePositionPatient_2 125.708 146.118 74.6834 \n",
"Modality CT CT CT \n",
"PatientID ID_160aea75 ID_160aea75 ID_160aea75 \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 1 1 1 \n",
"PixelSpacing_0 0.488281 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 -1024 \n",
"RescaleSlope 1 1 1 \n",
"Rows 512 512 512 \n",
"SOPInstanceUID ID_db59bb95e ID_e0ed751c2 ID_e262d76e2 \n",
"SamplesPerPixel 1 1 1 \n",
"SeriesInstanceUID ID_000a935543 ID_000a935543 ID_000a935543 \n",
"StudyID NaN NaN NaN \n",
"StudyInstanceUID ID_41d976e5d3 ID_41d976e5d3 ID_41d976e5d3 \n",
"WindowCenter 30 30 30 \n",
"WindowCenter_0 NaN NaN NaN \n",
"WindowCenter_1 NaN NaN NaN \n",
"WindowWidth 80 80 80 \n",
"WindowWidth_0 NaN NaN NaN \n",
"WindowWidth_1 NaN NaN NaN \n",
"fid ID_db59bb95e ID_e0ed751c2 ID_e262d76e2 \n",
"any 0 0 0 \n",
"epidural 0 0 0 \n",
"intraparenchymal 0 0 0 \n",
"intraventricular 0 0 0 \n",
"subarachnoid 0 0 0 \n",
"subdural 0 0 0 \n",
"normz 20.41 40.82 -30.6149 \n",
"xcm 249.719 249.719 249.719 \n",
"ycm 249.632 249.632 249.632 \n",
"theta -0.0280377 -0.0280377 -0.0280377 \n",
"pct_tissue 0.305298 0.242409 0.247257 \n",
"\n",
" 748282 \n",
"Unnamed: 0 748282 \n",
"BitsAllocated 16 \n",
"BitsStored 16 \n",
"Columns 512 \n",
"HighBit 15 \n",
"ImageOrientationPatient_0 1 \n",
"ImageOrientationPatient_1 0 \n",
"ImageOrientationPatient_2 0 \n",
"ImageOrientationPatient_3 0 \n",
"ImageOrientationPatient_4 0.979925 \n",
"ImageOrientationPatient_5 -0.199368 \n",
"ImagePositionPatient_0 -125 \n",
"ImagePositionPatient_1 -122.491 \n",
"ImagePositionPatient_2 141.016 \n",
"Modality CT \n",
"PatientID ID_160aea75 \n",
"PhotometricInterpretation MONOCHROME2 \n",
"PixelRepresentation 1 \n",
"PixelSpacing_0 0.488281 \n",
"PixelSpacing_1 0.488281 \n",
"RescaleIntercept -1024 \n",
"RescaleSlope 1 \n",
"Rows 512 \n",
"SOPInstanceUID ID_fe77ec61b \n",
"SamplesPerPixel 1 \n",
"SeriesInstanceUID ID_000a935543 \n",
"StudyID NaN \n",
"StudyInstanceUID ID_41d976e5d3 \n",
"WindowCenter 30 \n",
"WindowCenter_0 NaN \n",
"WindowCenter_1 NaN \n",
"WindowWidth 80 \n",
"WindowWidth_0 NaN \n",
"WindowWidth_1 NaN \n",
"fid ID_fe77ec61b \n",
"any 0 \n",
"epidural 0 \n",
"intraparenchymal 0 \n",
"intraventricular 0 \n",
"subarachnoid 0 \n",
"subdural 0 \n",
"normz 35.7176 \n",
"xcm 249.719 \n",
"ycm 249.632 \n",
"theta -0.0280377 \n",
"pct_tissue 0.262268 "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extended_df[90:100].T"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"752802"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"assert len(df) == len(extended_df)\n",
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"extended_df.to_csv(f'data/{stage}_train_dicom_diags_norm.csv')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(f'data/{stage}_test_dicom.csv')"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"dfg = df.groupby('SeriesInstanceUID')"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3518"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"group_names = list(dfg.groups.keys())\n",
"len(group_names)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Unnamed: 0</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsAllocated</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsStored</th>\n",
" <td>12</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Columns</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HighBit</th>\n",
" <td>11</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>15</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_4</th>\n",
" <td>0.981627</td>\n",
" <td>0.987688</td>\n",
" <td>0.927184</td>\n",
" <td>0.986286</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_5</th>\n",
" <td>-0.190809</td>\n",
" <td>-0.156434</td>\n",
" <td>-0.374607</td>\n",
" <td>-0.165048</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_0</th>\n",
" <td>-112</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" <td>-125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_1</th>\n",
" <td>-9.80803</td>\n",
" <td>-127.061</td>\n",
" <td>-119.998</td>\n",
" <td>-120.286</td>\n",
" <td>-16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_2</th>\n",
" <td>267.219</td>\n",
" <td>197.807</td>\n",
" <td>78.6604</td>\n",
" <td>131.737</td>\n",
" <td>98.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Modality</th>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PatientID</th>\n",
" <td>ID_52c1ab5a</td>\n",
" <td>ID_39e44bce</td>\n",
" <td>ID_6f87831a</td>\n",
" <td>ID_7a7c9c9e</td>\n",
" <td>ID_ab5e477f</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PhotometricInterpretation</th>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelRepresentation</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_0</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_1</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleIntercept</th>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleSlope</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rows</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SOPInstanceUID</th>\n",
" <td>ID_000000e27</td>\n",
" <td>ID_000009146</td>\n",
" <td>ID_00007b8cb</td>\n",
" <td>ID_000134952</td>\n",
" <td>ID_000176f2a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SamplesPerPixel</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SeriesInstanceUID</th>\n",
" <td>ID_4d28912ba6</td>\n",
" <td>ID_acabdeee86</td>\n",
" <td>ID_d00cee7f0c</td>\n",
" <td>ID_a52a0112d5</td>\n",
" <td>ID_f552d3b922</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyID</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyInstanceUID</th>\n",
" <td>ID_1f6d1e8aeb</td>\n",
" <td>ID_4a8d7ec19f</td>\n",
" <td>ID_a6ca244172</td>\n",
" <td>ID_fa950a03af</td>\n",
" <td>ID_965d8b3d8e</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter</th>\n",
" <td>NaN</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>30</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_0</th>\n",
" <td>40</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_1</th>\n",
" <td>40</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth</th>\n",
" <td>NaN</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_0</th>\n",
" <td>80</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_1</th>\n",
" <td>80</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>80</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 \\\n",
"Unnamed: 0 0 1 2 \n",
"BitsAllocated 16 16 16 \n",
"BitsStored 12 16 16 \n",
"Columns 512 512 512 \n",
"HighBit 11 15 15 \n",
"ImageOrientationPatient_0 1 1 1 \n",
"ImageOrientationPatient_1 0 0 0 \n",
"ImageOrientationPatient_2 0 0 0 \n",
"ImageOrientationPatient_3 0 0 0 \n",
"ImageOrientationPatient_4 0.981627 0.987688 0.927184 \n",
"ImageOrientationPatient_5 -0.190809 -0.156434 -0.374607 \n",
"ImagePositionPatient_0 -112 -125 -125 \n",
"ImagePositionPatient_1 -9.80803 -127.061 -119.998 \n",
"ImagePositionPatient_2 267.219 197.807 78.6604 \n",
"Modality CT CT CT \n",
"PatientID ID_52c1ab5a ID_39e44bce ID_6f87831a \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 0 1 1 \n",
"PixelSpacing_0 0.488281 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 -1024 \n",
"RescaleSlope 1 1 1 \n",
"Rows 512 512 512 \n",
"SOPInstanceUID ID_000000e27 ID_000009146 ID_00007b8cb \n",
"SamplesPerPixel 1 1 1 \n",
"SeriesInstanceUID ID_4d28912ba6 ID_acabdeee86 ID_d00cee7f0c \n",
"StudyID NaN NaN NaN \n",
"StudyInstanceUID ID_1f6d1e8aeb ID_4a8d7ec19f ID_a6ca244172 \n",
"WindowCenter NaN 30 30 \n",
"WindowCenter_0 40 NaN NaN \n",
"WindowCenter_1 40 NaN NaN \n",
"WindowWidth NaN 80 80 \n",
"WindowWidth_0 80 NaN NaN \n",
"WindowWidth_1 80 NaN NaN \n",
"\n",
" 3 4 \n",
"Unnamed: 0 3 4 \n",
"BitsAllocated 16 16 \n",
"BitsStored 16 12 \n",
"Columns 512 512 \n",
"HighBit 15 11 \n",
"ImageOrientationPatient_0 1 1 \n",
"ImageOrientationPatient_1 0 0 \n",
"ImageOrientationPatient_2 0 0 \n",
"ImageOrientationPatient_3 0 0 \n",
"ImageOrientationPatient_4 0.986286 1 \n",
"ImageOrientationPatient_5 -0.165048 0 \n",
"ImagePositionPatient_0 -125 -125 \n",
"ImagePositionPatient_1 -120.286 -16 \n",
"ImagePositionPatient_2 131.737 98.2 \n",
"Modality CT CT \n",
"PatientID ID_7a7c9c9e ID_ab5e477f \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 1 0 \n",
"PixelSpacing_0 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 \n",
"RescaleSlope 1 1 \n",
"Rows 512 512 \n",
"SOPInstanceUID ID_000134952 ID_000176f2a \n",
"SamplesPerPixel 1 1 \n",
"SeriesInstanceUID ID_a52a0112d5 ID_f552d3b922 \n",
"StudyID NaN NaN \n",
"StudyInstanceUID ID_fa950a03af ID_965d8b3d8e \n",
"WindowCenter 30 NaN \n",
"WindowCenter_0 NaN 36 \n",
"WindowCenter_1 NaN 36 \n",
"WindowWidth 80 NaN \n",
"WindowWidth_0 NaN 80 \n",
"WindowWidth_1 NaN 80 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head().T"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cce4c6aa74ec4e65adc8cafc672f8ba1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(IntProgress(value=0, max=3518), HTML(value='')))"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"with ProcessPoolExecutor(max_workers=32) as e:\n",
" extended_df = pd.concat(tqdm(\n",
" e.map(norm_study, repeat(f'data/unzip/{stage}_test_images'), group_names), total=len(group_names)))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>13820</th>\n",
" <th>18901</th>\n",
" <th>20132</th>\n",
" <th>24764</th>\n",
" <th>36224</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Unnamed: 0</th>\n",
" <td>13820</td>\n",
" <td>18901</td>\n",
" <td>20132</td>\n",
" <td>24764</td>\n",
" <td>36224</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsAllocated</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>BitsStored</th>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Columns</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>HighBit</th>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" <td>11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_4</th>\n",
" <td>0.939693</td>\n",
" <td>0.939693</td>\n",
" <td>0.939693</td>\n",
" <td>0.939693</td>\n",
" <td>0.939693</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImageOrientationPatient_5</th>\n",
" <td>-0.34202</td>\n",
" <td>-0.34202</td>\n",
" <td>-0.34202</td>\n",
" <td>-0.34202</td>\n",
" <td>-0.34202</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_0</th>\n",
" <td>-167</td>\n",
" <td>-167</td>\n",
" <td>-167</td>\n",
" <td>-167</td>\n",
" <td>-167</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_1</th>\n",
" <td>17.8026</td>\n",
" <td>17.8026</td>\n",
" <td>17.8026</td>\n",
" <td>17.8026</td>\n",
" <td>17.8026</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ImagePositionPatient_2</th>\n",
" <td>340.091</td>\n",
" <td>302.953</td>\n",
" <td>324.153</td>\n",
" <td>387.753</td>\n",
" <td>265.891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Modality</th>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" <td>CT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PatientID</th>\n",
" <td>ID_f997418a</td>\n",
" <td>ID_f997418a</td>\n",
" <td>ID_f997418a</td>\n",
" <td>ID_f997418a</td>\n",
" <td>ID_f997418a</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PhotometricInterpretation</th>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" <td>MONOCHROME2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelRepresentation</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_0</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PixelSpacing_1</th>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" <td>0.488281</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleIntercept</th>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" <td>-1024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>RescaleSlope</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Rows</th>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" <td>512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SOPInstanceUID</th>\n",
" <td>ID_1cf8d2973</td>\n",
" <td>ID_2775d5917</td>\n",
" <td>ID_29fb61fbb</td>\n",
" <td>ID_33ba827f2</td>\n",
" <td>ID_4c1ea3745</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SamplesPerPixel</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SeriesInstanceUID</th>\n",
" <td>ID_0018be306d</td>\n",
" <td>ID_0018be306d</td>\n",
" <td>ID_0018be306d</td>\n",
" <td>ID_0018be306d</td>\n",
" <td>ID_0018be306d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyID</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>StudyInstanceUID</th>\n",
" <td>ID_16aac16e79</td>\n",
" <td>ID_16aac16e79</td>\n",
" <td>ID_16aac16e79</td>\n",
" <td>ID_16aac16e79</td>\n",
" <td>ID_16aac16e79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_0</th>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowCenter_1</th>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" <td>40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_0</th>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WindowWidth_1</th>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>normz</th>\n",
" <td>37.1382</td>\n",
" <td>0</td>\n",
" <td>21.2001</td>\n",
" <td>84.8</td>\n",
" <td>-37.0619</td>\n",
" </tr>\n",
" <tr>\n",
" <th>xcm</th>\n",
" <td>255.127</td>\n",
" <td>255.127</td>\n",
" <td>255.127</td>\n",
" <td>255.127</td>\n",
" <td>255.127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ycm</th>\n",
" <td>241.378</td>\n",
" <td>241.378</td>\n",
" <td>241.378</td>\n",
" <td>241.378</td>\n",
" <td>241.378</td>\n",
" </tr>\n",
" <tr>\n",
" <th>theta</th>\n",
" <td>0.132157</td>\n",
" <td>0.132157</td>\n",
" <td>0.132157</td>\n",
" <td>0.132157</td>\n",
" <td>0.132157</td>\n",
" </tr>\n",
" <tr>\n",
" <th>pct_tissue</th>\n",
" <td>0.191586</td>\n",
" <td>0.271954</td>\n",
" <td>0.242237</td>\n",
" <td>0.00454712</td>\n",
" <td>0.143639</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 13820 18901 20132 \\\n",
"Unnamed: 0 13820 18901 20132 \n",
"BitsAllocated 16 16 16 \n",
"BitsStored 12 12 12 \n",
"Columns 512 512 512 \n",
"HighBit 11 11 11 \n",
"ImageOrientationPatient_0 1 1 1 \n",
"ImageOrientationPatient_1 0 0 0 \n",
"ImageOrientationPatient_2 0 0 0 \n",
"ImageOrientationPatient_3 0 0 0 \n",
"ImageOrientationPatient_4 0.939693 0.939693 0.939693 \n",
"ImageOrientationPatient_5 -0.34202 -0.34202 -0.34202 \n",
"ImagePositionPatient_0 -167 -167 -167 \n",
"ImagePositionPatient_1 17.8026 17.8026 17.8026 \n",
"ImagePositionPatient_2 340.091 302.953 324.153 \n",
"Modality CT CT CT \n",
"PatientID ID_f997418a ID_f997418a ID_f997418a \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 0 0 0 \n",
"PixelSpacing_0 0.488281 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 -1024 \n",
"RescaleSlope 1 1 1 \n",
"Rows 512 512 512 \n",
"SOPInstanceUID ID_1cf8d2973 ID_2775d5917 ID_29fb61fbb \n",
"SamplesPerPixel 1 1 1 \n",
"SeriesInstanceUID ID_0018be306d ID_0018be306d ID_0018be306d \n",
"StudyID NaN NaN NaN \n",
"StudyInstanceUID ID_16aac16e79 ID_16aac16e79 ID_16aac16e79 \n",
"WindowCenter NaN NaN NaN \n",
"WindowCenter_0 40 40 40 \n",
"WindowCenter_1 40 40 40 \n",
"WindowWidth NaN NaN NaN \n",
"WindowWidth_0 80 80 80 \n",
"WindowWidth_1 80 80 80 \n",
"normz 37.1382 0 21.2001 \n",
"xcm 255.127 255.127 255.127 \n",
"ycm 241.378 241.378 241.378 \n",
"theta 0.132157 0.132157 0.132157 \n",
"pct_tissue 0.191586 0.271954 0.242237 \n",
"\n",
" 24764 36224 \n",
"Unnamed: 0 24764 36224 \n",
"BitsAllocated 16 16 \n",
"BitsStored 12 12 \n",
"Columns 512 512 \n",
"HighBit 11 11 \n",
"ImageOrientationPatient_0 1 1 \n",
"ImageOrientationPatient_1 0 0 \n",
"ImageOrientationPatient_2 0 0 \n",
"ImageOrientationPatient_3 0 0 \n",
"ImageOrientationPatient_4 0.939693 0.939693 \n",
"ImageOrientationPatient_5 -0.34202 -0.34202 \n",
"ImagePositionPatient_0 -167 -167 \n",
"ImagePositionPatient_1 17.8026 17.8026 \n",
"ImagePositionPatient_2 387.753 265.891 \n",
"Modality CT CT \n",
"PatientID ID_f997418a ID_f997418a \n",
"PhotometricInterpretation MONOCHROME2 MONOCHROME2 \n",
"PixelRepresentation 0 0 \n",
"PixelSpacing_0 0.488281 0.488281 \n",
"PixelSpacing_1 0.488281 0.488281 \n",
"RescaleIntercept -1024 -1024 \n",
"RescaleSlope 1 1 \n",
"Rows 512 512 \n",
"SOPInstanceUID ID_33ba827f2 ID_4c1ea3745 \n",
"SamplesPerPixel 1 1 \n",
"SeriesInstanceUID ID_0018be306d ID_0018be306d \n",
"StudyID NaN NaN \n",
"StudyInstanceUID ID_16aac16e79 ID_16aac16e79 \n",
"WindowCenter NaN NaN \n",
"WindowCenter_0 40 40 \n",
"WindowCenter_1 40 40 \n",
"WindowWidth NaN NaN \n",
"WindowWidth_0 80 80 \n",
"WindowWidth_1 80 80 \n",
"normz 84.8 -37.0619 \n",
"xcm 255.127 255.127 \n",
"ycm 241.378 241.378 \n",
"theta 0.132157 0.132157 \n",
"pct_tissue 0.00454712 0.143639 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extended_df.head().T"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"121232"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"assert len(df) == len(extended_df)\n",
"len(df)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"extended_df.to_csv(f'data/{stage}_test_dicom_norm.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}