Switch to unified view

a b/1-preprocess-brain_norm.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "metadata": {},
6
   "source": [
7
    "# Brain translational and rotational normalization\n",
8
    "* Zero z at the slice with max area\n",
9
    "* Use analytic minimization of moment of inertia to find orientation angle\n",
10
    "\n",
11
    "Generates:\n",
12
    "* `train_dicom_diags_norm.csv` and `test_dicom_norm.csv` with 5 new columns added: \n",
13
    "  * `normz` normalized z\n",
14
    "  * `xcm`, `ycm` center of mass\n",
15
    "  * `theta` (rads) the CW angle the slice must be rotated to straighten it\n",
16
    "  * `pct_tissue` percentage of brain tissue pixels in the radiography"
17
   ]
18
  },
19
  {
20
   "cell_type": "code",
21
   "execution_count": 1,
22
   "metadata": {},
23
   "outputs": [],
24
   "source": [
25
    "stage = \"stage_2\""
26
   ]
27
  },
28
  {
29
   "cell_type": "code",
30
   "execution_count": 2,
31
   "metadata": {},
32
   "outputs": [],
33
   "source": [
34
    "import pydicom\n",
35
    "import math\n",
36
    "from pathlib import Path\n",
37
    "from fastai.vision import *\n",
38
    "from matplotlib import pyplot as plt\n",
39
    "from scipy import ndimage, misc\n",
40
    "from itertools import repeat\n",
41
    "import pandas as pd\n",
42
    "%matplotlib inline"
43
   ]
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": 3,
48
   "metadata": {},
49
   "outputs": [],
50
   "source": [
51
    "%%capture\n",
52
    "from tqdm import tqdm_notebook as tqdm\n",
53
    "tqdm().pandas()"
54
   ]
55
  },
56
  {
57
   "cell_type": "code",
58
   "execution_count": 4,
59
   "metadata": {},
60
   "outputs": [],
61
   "source": [
62
    "def dcm_to_np(dcm):\n",
63
    "    ''' Dicom to numpy array\n",
64
    "    :param dcm: dicom object\n",
65
    "    '''\n",
66
    "    rescale_slope, rescale_intercept = float(dcm.RescaleSlope), float(dcm.RescaleIntercept)\n",
67
    "    t = dcm.pixel_array.astype(np.float)\n",
68
    "    t = t * rescale_slope + rescale_intercept # rescale\n",
69
    "    return t"
70
   ]
71
  },
72
  {
73
   "cell_type": "code",
74
   "execution_count": 5,
75
   "metadata": {},
76
   "outputs": [],
77
   "source": [
78
    "def get_fid_with_max_area(dir, dfs):\n",
79
    "    ''' Given a single study return the fid (fileid) with biggest brain area\n",
80
    "    :param dir: datset directory\n",
81
    "    :param dfs: pandas dataframe with 1 study\n",
82
    "    '''\n",
83
    "    brain_area_vs_z = []\n",
84
    "    pcts_tissue = []\n",
85
    "    max_area = 0\n",
86
    "    for index, row in dfs.iterrows():\n",
87
    "        fid = row['SOPInstanceUID'] # don't do this inside f'' array or jupyter bugs out\n",
88
    "        try:\n",
89
    "            dcm = pydicom.dcmread(f\"{dir}/{fid}.dcm\")\n",
90
    "            t = dcm_to_np(dcm)\n",
91
    "        except Exception as e:\n",
92
    "            print(e,fid)\n",
93
    "        pts = np.argwhere(np.logical_and(0 < t, t < 100)) # select only brain matter\n",
94
    "        if len(pts) > max_area:\n",
95
    "            max_area = len(pts)\n",
96
    "            fid_max_area = row['SOPInstanceUID']\n",
97
    "        brain_area_vs_z.append([float(row['ImagePositionPatient_2']), len(pts)])\n",
98
    "        pcts_tissue.append(len(pts) / t.size)\n",
99
    "    return fid_max_area, pcts_tissue"
100
   ]
101
  },
102
  {
103
   "cell_type": "code",
104
   "execution_count": 6,
105
   "metadata": {},
106
   "outputs": [],
107
   "source": [
108
    "def get_axis(brain):\n",
109
    "    ''' \n",
110
    "    Get orientation axis.\n",
111
    "    :param brain: numpy array (brain)\n",
112
    "    :return: center of masses (xcm, ycm), coefficients (m,  n) of the axis equation y=mx+n and angle (th)\n",
113
    "        the brain is rotated counter-clockwise (rotate clockwise to normalize)\n",
114
    "    '''\n",
115
    "    pts_org = np.argwhere(np.logical_and(0 < brain, brain < 100))\n",
116
    "\n",
117
    "    # Center of masses\n",
118
    "    xcm = pts_org[:,1].mean()\n",
119
    "    ycm = pts_org[:,0].mean()\n",
120
    "\n",
121
    "    # shift brain to center\n",
122
    "    xs = pts_org[:,1] - xcm\n",
123
    "    ys = pts_org[:,0] - ycm\n",
124
    "    \n",
125
    "    # coefficients of quad eq am^2+bm+c=0\n",
126
    "    a = (xs * ys).sum()\n",
127
    "    b = (xs ** 2 - ys ** 2).sum()\n",
128
    "    c = -a\n",
129
    "\n",
130
    "    # solve for m1 (max I), m2 (min I)\n",
131
    "    m1 = (-b + (b**2-4*a*c) ** (.5)) / (2 * a)\n",
132
    "    m2 = (-b - (b**2-4*a*c) ** (.5)) / (2 * a)\n",
133
    "    \n",
134
    "    # y-ycm = m*x - m*xcm -> y = m*x - m*xcm + ycm\n",
135
    "    n1 = ycm - m1 * xcm\n",
136
    "    n2 = ycm - m2 * xcm\n",
137
    "    \n",
138
    "    th = math.atan(m2)\n",
139
    "    \n",
140
    "    return xcm, ycm, m2, n2, th"
141
   ]
142
  },
143
  {
144
   "cell_type": "code",
145
   "execution_count": 7,
146
   "metadata": {},
147
   "outputs": [],
148
   "source": [
149
    "def norm_study(dir, group_name):\n",
150
    "    ''' Normalize study given by pandas DataFrame g\n",
151
    "    :param dir: dataset directory\n",
152
    "    :param g: single study to normalize\n",
153
    "    '''\n",
154
    "    global dfg\n",
155
    "    g = dfg.get_group(group_name)\n",
156
    "    fid_max_area, pcts_tissue = get_fid_with_max_area(dir, g)\n",
157
    "    f = f'{dir}/{fid_max_area}.dcm'\n",
158
    "    try:\n",
159
    "        dcm = pydicom.dcmread(str(f))\n",
160
    "    except Exceptione as e:\n",
161
    "        print(e,f)\n",
162
    "    brain = dcm_to_np(dcm)\n",
163
    "    xcm, ycm, m, n, th = get_axis(brain)\n",
164
    "    eg = g.copy()\n",
165
    "    z0 = g[g['SOPInstanceUID'] == fid_max_area].iloc[0]['ImagePositionPatient_2']\n",
166
    "    eg['normz'] = eg['ImagePositionPatient_2'] - z0\n",
167
    "    eg['xcm'] = xcm\n",
168
    "    eg['ycm'] = ycm\n",
169
    "    eg['theta'] = th\n",
170
    "    eg['pct_tissue'] = pcts_tissue\n",
171
    "    return eg"
172
   ]
173
  },
174
  {
175
   "cell_type": "markdown",
176
   "metadata": {},
177
   "source": [
178
    "# Train dataset"
179
   ]
180
  },
181
  {
182
   "cell_type": "code",
183
   "execution_count": 8,
184
   "metadata": {},
185
   "outputs": [],
186
   "source": [
187
    "df = pd.read_csv(f'data/{stage}_train_dicom_diags.csv')"
188
   ]
189
  },
190
  {
191
   "cell_type": "code",
192
   "execution_count": 9,
193
   "metadata": {},
194
   "outputs": [],
195
   "source": [
196
    "dfg = df.groupby('SeriesInstanceUID')"
197
   ]
198
  },
199
  {
200
   "cell_type": "code",
201
   "execution_count": 10,
202
   "metadata": {},
203
   "outputs": [],
204
   "source": [
205
    "group_names = list(dfg.groups.keys())\n",
206
    "#group_names = group_names[:64] # test with small subset"
207
   ]
208
  },
209
  {
210
   "cell_type": "code",
211
   "execution_count": 11,
212
   "metadata": {},
213
   "outputs": [
214
    {
215
     "data": {
216
      "application/vnd.jupyter.widget-view+json": {
217
       "model_id": "4bf5a2291e32453ca38813214dbe1a11",
218
       "version_major": 2,
219
       "version_minor": 0
220
      },
221
      "text/plain": [
222
       "HBox(children=(IntProgress(value=0, max=21744), HTML(value='')))"
223
      ]
224
     },
225
     "metadata": {},
226
     "output_type": "display_data"
227
    },
228
    {
229
     "name": "stdout",
230
     "output_type": "stream",
231
     "text": [
232
      "\n"
233
     ]
234
    }
235
   ],
236
   "source": [
237
    "with ProcessPoolExecutor(max_workers=32) as e:\n",
238
    "     extended_df = pd.concat(\n",
239
    "         tqdm(e.map(norm_study, repeat(f'data/unzip/{stage}_train_images'), group_names), total=len(group_names)))"
240
   ]
241
  },
242
  {
243
   "cell_type": "code",
244
   "execution_count": 12,
245
   "metadata": {},
246
   "outputs": [
247
    {
248
     "data": {
249
      "text/html": [
250
       "<div>\n",
251
       "<style scoped>\n",
252
       "    .dataframe tbody tr th:only-of-type {\n",
253
       "        vertical-align: middle;\n",
254
       "    }\n",
255
       "\n",
256
       "    .dataframe tbody tr th {\n",
257
       "        vertical-align: top;\n",
258
       "    }\n",
259
       "\n",
260
       "    .dataframe thead th {\n",
261
       "        text-align: right;\n",
262
       "    }\n",
263
       "</style>\n",
264
       "<table border=\"1\" class=\"dataframe\">\n",
265
       "  <thead>\n",
266
       "    <tr style=\"text-align: right;\">\n",
267
       "      <th></th>\n",
268
       "      <th>556142</th>\n",
269
       "      <th>561465</th>\n",
270
       "      <th>580918</th>\n",
271
       "      <th>584578</th>\n",
272
       "      <th>617029</th>\n",
273
       "      <th>617400</th>\n",
274
       "      <th>645424</th>\n",
275
       "      <th>661661</th>\n",
276
       "      <th>665938</th>\n",
277
       "      <th>748282</th>\n",
278
       "    </tr>\n",
279
       "  </thead>\n",
280
       "  <tbody>\n",
281
       "    <tr>\n",
282
       "      <th>Unnamed: 0</th>\n",
283
       "      <td>556142</td>\n",
284
       "      <td>561465</td>\n",
285
       "      <td>580918</td>\n",
286
       "      <td>584578</td>\n",
287
       "      <td>617029</td>\n",
288
       "      <td>617400</td>\n",
289
       "      <td>645424</td>\n",
290
       "      <td>661661</td>\n",
291
       "      <td>665938</td>\n",
292
       "      <td>748282</td>\n",
293
       "    </tr>\n",
294
       "    <tr>\n",
295
       "      <th>BitsAllocated</th>\n",
296
       "      <td>16</td>\n",
297
       "      <td>16</td>\n",
298
       "      <td>16</td>\n",
299
       "      <td>16</td>\n",
300
       "      <td>16</td>\n",
301
       "      <td>16</td>\n",
302
       "      <td>16</td>\n",
303
       "      <td>16</td>\n",
304
       "      <td>16</td>\n",
305
       "      <td>16</td>\n",
306
       "    </tr>\n",
307
       "    <tr>\n",
308
       "      <th>BitsStored</th>\n",
309
       "      <td>16</td>\n",
310
       "      <td>16</td>\n",
311
       "      <td>16</td>\n",
312
       "      <td>16</td>\n",
313
       "      <td>16</td>\n",
314
       "      <td>16</td>\n",
315
       "      <td>16</td>\n",
316
       "      <td>16</td>\n",
317
       "      <td>16</td>\n",
318
       "      <td>16</td>\n",
319
       "    </tr>\n",
320
       "    <tr>\n",
321
       "      <th>Columns</th>\n",
322
       "      <td>512</td>\n",
323
       "      <td>512</td>\n",
324
       "      <td>512</td>\n",
325
       "      <td>512</td>\n",
326
       "      <td>512</td>\n",
327
       "      <td>512</td>\n",
328
       "      <td>512</td>\n",
329
       "      <td>512</td>\n",
330
       "      <td>512</td>\n",
331
       "      <td>512</td>\n",
332
       "    </tr>\n",
333
       "    <tr>\n",
334
       "      <th>HighBit</th>\n",
335
       "      <td>15</td>\n",
336
       "      <td>15</td>\n",
337
       "      <td>15</td>\n",
338
       "      <td>15</td>\n",
339
       "      <td>15</td>\n",
340
       "      <td>15</td>\n",
341
       "      <td>15</td>\n",
342
       "      <td>15</td>\n",
343
       "      <td>15</td>\n",
344
       "      <td>15</td>\n",
345
       "    </tr>\n",
346
       "    <tr>\n",
347
       "      <th>ImageOrientationPatient_0</th>\n",
348
       "      <td>1</td>\n",
349
       "      <td>1</td>\n",
350
       "      <td>1</td>\n",
351
       "      <td>1</td>\n",
352
       "      <td>1</td>\n",
353
       "      <td>1</td>\n",
354
       "      <td>1</td>\n",
355
       "      <td>1</td>\n",
356
       "      <td>1</td>\n",
357
       "      <td>1</td>\n",
358
       "    </tr>\n",
359
       "    <tr>\n",
360
       "      <th>ImageOrientationPatient_1</th>\n",
361
       "      <td>0</td>\n",
362
       "      <td>0</td>\n",
363
       "      <td>0</td>\n",
364
       "      <td>0</td>\n",
365
       "      <td>0</td>\n",
366
       "      <td>0</td>\n",
367
       "      <td>0</td>\n",
368
       "      <td>0</td>\n",
369
       "      <td>0</td>\n",
370
       "      <td>0</td>\n",
371
       "    </tr>\n",
372
       "    <tr>\n",
373
       "      <th>ImageOrientationPatient_2</th>\n",
374
       "      <td>0</td>\n",
375
       "      <td>0</td>\n",
376
       "      <td>0</td>\n",
377
       "      <td>0</td>\n",
378
       "      <td>0</td>\n",
379
       "      <td>0</td>\n",
380
       "      <td>0</td>\n",
381
       "      <td>0</td>\n",
382
       "      <td>0</td>\n",
383
       "      <td>0</td>\n",
384
       "    </tr>\n",
385
       "    <tr>\n",
386
       "      <th>ImageOrientationPatient_3</th>\n",
387
       "      <td>0</td>\n",
388
       "      <td>0</td>\n",
389
       "      <td>0</td>\n",
390
       "      <td>0</td>\n",
391
       "      <td>0</td>\n",
392
       "      <td>0</td>\n",
393
       "      <td>0</td>\n",
394
       "      <td>0</td>\n",
395
       "      <td>0</td>\n",
396
       "      <td>0</td>\n",
397
       "    </tr>\n",
398
       "    <tr>\n",
399
       "      <th>ImageOrientationPatient_4</th>\n",
400
       "      <td>0.979925</td>\n",
401
       "      <td>0.979925</td>\n",
402
       "      <td>0.979925</td>\n",
403
       "      <td>0.979925</td>\n",
404
       "      <td>0.979925</td>\n",
405
       "      <td>0.979925</td>\n",
406
       "      <td>0.979925</td>\n",
407
       "      <td>0.979925</td>\n",
408
       "      <td>0.979925</td>\n",
409
       "      <td>0.979925</td>\n",
410
       "    </tr>\n",
411
       "    <tr>\n",
412
       "      <th>ImageOrientationPatient_5</th>\n",
413
       "      <td>-0.199368</td>\n",
414
       "      <td>-0.199368</td>\n",
415
       "      <td>-0.199368</td>\n",
416
       "      <td>-0.199368</td>\n",
417
       "      <td>-0.199368</td>\n",
418
       "      <td>-0.199368</td>\n",
419
       "      <td>-0.199368</td>\n",
420
       "      <td>-0.199368</td>\n",
421
       "      <td>-0.199368</td>\n",
422
       "      <td>-0.199368</td>\n",
423
       "    </tr>\n",
424
       "    <tr>\n",
425
       "      <th>ImagePositionPatient_0</th>\n",
426
       "      <td>-125</td>\n",
427
       "      <td>-125</td>\n",
428
       "      <td>-125</td>\n",
429
       "      <td>-125</td>\n",
430
       "      <td>-125</td>\n",
431
       "      <td>-125</td>\n",
432
       "      <td>-125</td>\n",
433
       "      <td>-125</td>\n",
434
       "      <td>-125</td>\n",
435
       "      <td>-125</td>\n",
436
       "    </tr>\n",
437
       "    <tr>\n",
438
       "      <th>ImagePositionPatient_1</th>\n",
439
       "      <td>-122.491</td>\n",
440
       "      <td>-122.491</td>\n",
441
       "      <td>-122.491</td>\n",
442
       "      <td>-122.491</td>\n",
443
       "      <td>-122.491</td>\n",
444
       "      <td>-122.491</td>\n",
445
       "      <td>-122.491</td>\n",
446
       "      <td>-122.491</td>\n",
447
       "      <td>-122.491</td>\n",
448
       "      <td>-122.491</td>\n",
449
       "    </tr>\n",
450
       "    <tr>\n",
451
       "      <th>ImagePositionPatient_2</th>\n",
452
       "      <td>166.528</td>\n",
453
       "      <td>156.323</td>\n",
454
       "      <td>176.733</td>\n",
455
       "      <td>49.171</td>\n",
456
       "      <td>89.991</td>\n",
457
       "      <td>186.938</td>\n",
458
       "      <td>125.708</td>\n",
459
       "      <td>146.118</td>\n",
460
       "      <td>74.6834</td>\n",
461
       "      <td>141.016</td>\n",
462
       "    </tr>\n",
463
       "    <tr>\n",
464
       "      <th>Modality</th>\n",
465
       "      <td>CT</td>\n",
466
       "      <td>CT</td>\n",
467
       "      <td>CT</td>\n",
468
       "      <td>CT</td>\n",
469
       "      <td>CT</td>\n",
470
       "      <td>CT</td>\n",
471
       "      <td>CT</td>\n",
472
       "      <td>CT</td>\n",
473
       "      <td>CT</td>\n",
474
       "      <td>CT</td>\n",
475
       "    </tr>\n",
476
       "    <tr>\n",
477
       "      <th>PatientID</th>\n",
478
       "      <td>ID_160aea75</td>\n",
479
       "      <td>ID_160aea75</td>\n",
480
       "      <td>ID_160aea75</td>\n",
481
       "      <td>ID_160aea75</td>\n",
482
       "      <td>ID_160aea75</td>\n",
483
       "      <td>ID_160aea75</td>\n",
484
       "      <td>ID_160aea75</td>\n",
485
       "      <td>ID_160aea75</td>\n",
486
       "      <td>ID_160aea75</td>\n",
487
       "      <td>ID_160aea75</td>\n",
488
       "    </tr>\n",
489
       "    <tr>\n",
490
       "      <th>PhotometricInterpretation</th>\n",
491
       "      <td>MONOCHROME2</td>\n",
492
       "      <td>MONOCHROME2</td>\n",
493
       "      <td>MONOCHROME2</td>\n",
494
       "      <td>MONOCHROME2</td>\n",
495
       "      <td>MONOCHROME2</td>\n",
496
       "      <td>MONOCHROME2</td>\n",
497
       "      <td>MONOCHROME2</td>\n",
498
       "      <td>MONOCHROME2</td>\n",
499
       "      <td>MONOCHROME2</td>\n",
500
       "      <td>MONOCHROME2</td>\n",
501
       "    </tr>\n",
502
       "    <tr>\n",
503
       "      <th>PixelRepresentation</th>\n",
504
       "      <td>1</td>\n",
505
       "      <td>1</td>\n",
506
       "      <td>1</td>\n",
507
       "      <td>1</td>\n",
508
       "      <td>1</td>\n",
509
       "      <td>1</td>\n",
510
       "      <td>1</td>\n",
511
       "      <td>1</td>\n",
512
       "      <td>1</td>\n",
513
       "      <td>1</td>\n",
514
       "    </tr>\n",
515
       "    <tr>\n",
516
       "      <th>PixelSpacing_0</th>\n",
517
       "      <td>0.488281</td>\n",
518
       "      <td>0.488281</td>\n",
519
       "      <td>0.488281</td>\n",
520
       "      <td>0.488281</td>\n",
521
       "      <td>0.488281</td>\n",
522
       "      <td>0.488281</td>\n",
523
       "      <td>0.488281</td>\n",
524
       "      <td>0.488281</td>\n",
525
       "      <td>0.488281</td>\n",
526
       "      <td>0.488281</td>\n",
527
       "    </tr>\n",
528
       "    <tr>\n",
529
       "      <th>PixelSpacing_1</th>\n",
530
       "      <td>0.488281</td>\n",
531
       "      <td>0.488281</td>\n",
532
       "      <td>0.488281</td>\n",
533
       "      <td>0.488281</td>\n",
534
       "      <td>0.488281</td>\n",
535
       "      <td>0.488281</td>\n",
536
       "      <td>0.488281</td>\n",
537
       "      <td>0.488281</td>\n",
538
       "      <td>0.488281</td>\n",
539
       "      <td>0.488281</td>\n",
540
       "    </tr>\n",
541
       "    <tr>\n",
542
       "      <th>RescaleIntercept</th>\n",
543
       "      <td>-1024</td>\n",
544
       "      <td>-1024</td>\n",
545
       "      <td>-1024</td>\n",
546
       "      <td>-1024</td>\n",
547
       "      <td>-1024</td>\n",
548
       "      <td>-1024</td>\n",
549
       "      <td>-1024</td>\n",
550
       "      <td>-1024</td>\n",
551
       "      <td>-1024</td>\n",
552
       "      <td>-1024</td>\n",
553
       "    </tr>\n",
554
       "    <tr>\n",
555
       "      <th>RescaleSlope</th>\n",
556
       "      <td>1</td>\n",
557
       "      <td>1</td>\n",
558
       "      <td>1</td>\n",
559
       "      <td>1</td>\n",
560
       "      <td>1</td>\n",
561
       "      <td>1</td>\n",
562
       "      <td>1</td>\n",
563
       "      <td>1</td>\n",
564
       "      <td>1</td>\n",
565
       "      <td>1</td>\n",
566
       "    </tr>\n",
567
       "    <tr>\n",
568
       "      <th>Rows</th>\n",
569
       "      <td>512</td>\n",
570
       "      <td>512</td>\n",
571
       "      <td>512</td>\n",
572
       "      <td>512</td>\n",
573
       "      <td>512</td>\n",
574
       "      <td>512</td>\n",
575
       "      <td>512</td>\n",
576
       "      <td>512</td>\n",
577
       "      <td>512</td>\n",
578
       "      <td>512</td>\n",
579
       "    </tr>\n",
580
       "    <tr>\n",
581
       "      <th>SOPInstanceUID</th>\n",
582
       "      <td>ID_bcdecac1b</td>\n",
583
       "      <td>ID_beb4aacbe</td>\n",
584
       "      <td>ID_c554939b0</td>\n",
585
       "      <td>ID_c6909d6fa</td>\n",
586
       "      <td>ID_d1ad467b0</td>\n",
587
       "      <td>ID_d1d25ea8c</td>\n",
588
       "      <td>ID_db59bb95e</td>\n",
589
       "      <td>ID_e0ed751c2</td>\n",
590
       "      <td>ID_e262d76e2</td>\n",
591
       "      <td>ID_fe77ec61b</td>\n",
592
       "    </tr>\n",
593
       "    <tr>\n",
594
       "      <th>SamplesPerPixel</th>\n",
595
       "      <td>1</td>\n",
596
       "      <td>1</td>\n",
597
       "      <td>1</td>\n",
598
       "      <td>1</td>\n",
599
       "      <td>1</td>\n",
600
       "      <td>1</td>\n",
601
       "      <td>1</td>\n",
602
       "      <td>1</td>\n",
603
       "      <td>1</td>\n",
604
       "      <td>1</td>\n",
605
       "    </tr>\n",
606
       "    <tr>\n",
607
       "      <th>SeriesInstanceUID</th>\n",
608
       "      <td>ID_000a935543</td>\n",
609
       "      <td>ID_000a935543</td>\n",
610
       "      <td>ID_000a935543</td>\n",
611
       "      <td>ID_000a935543</td>\n",
612
       "      <td>ID_000a935543</td>\n",
613
       "      <td>ID_000a935543</td>\n",
614
       "      <td>ID_000a935543</td>\n",
615
       "      <td>ID_000a935543</td>\n",
616
       "      <td>ID_000a935543</td>\n",
617
       "      <td>ID_000a935543</td>\n",
618
       "    </tr>\n",
619
       "    <tr>\n",
620
       "      <th>StudyID</th>\n",
621
       "      <td>NaN</td>\n",
622
       "      <td>NaN</td>\n",
623
       "      <td>NaN</td>\n",
624
       "      <td>NaN</td>\n",
625
       "      <td>NaN</td>\n",
626
       "      <td>NaN</td>\n",
627
       "      <td>NaN</td>\n",
628
       "      <td>NaN</td>\n",
629
       "      <td>NaN</td>\n",
630
       "      <td>NaN</td>\n",
631
       "    </tr>\n",
632
       "    <tr>\n",
633
       "      <th>StudyInstanceUID</th>\n",
634
       "      <td>ID_41d976e5d3</td>\n",
635
       "      <td>ID_41d976e5d3</td>\n",
636
       "      <td>ID_41d976e5d3</td>\n",
637
       "      <td>ID_41d976e5d3</td>\n",
638
       "      <td>ID_41d976e5d3</td>\n",
639
       "      <td>ID_41d976e5d3</td>\n",
640
       "      <td>ID_41d976e5d3</td>\n",
641
       "      <td>ID_41d976e5d3</td>\n",
642
       "      <td>ID_41d976e5d3</td>\n",
643
       "      <td>ID_41d976e5d3</td>\n",
644
       "    </tr>\n",
645
       "    <tr>\n",
646
       "      <th>WindowCenter</th>\n",
647
       "      <td>30</td>\n",
648
       "      <td>30</td>\n",
649
       "      <td>30</td>\n",
650
       "      <td>30</td>\n",
651
       "      <td>30</td>\n",
652
       "      <td>30</td>\n",
653
       "      <td>30</td>\n",
654
       "      <td>30</td>\n",
655
       "      <td>30</td>\n",
656
       "      <td>30</td>\n",
657
       "    </tr>\n",
658
       "    <tr>\n",
659
       "      <th>WindowCenter_0</th>\n",
660
       "      <td>NaN</td>\n",
661
       "      <td>NaN</td>\n",
662
       "      <td>NaN</td>\n",
663
       "      <td>NaN</td>\n",
664
       "      <td>NaN</td>\n",
665
       "      <td>NaN</td>\n",
666
       "      <td>NaN</td>\n",
667
       "      <td>NaN</td>\n",
668
       "      <td>NaN</td>\n",
669
       "      <td>NaN</td>\n",
670
       "    </tr>\n",
671
       "    <tr>\n",
672
       "      <th>WindowCenter_1</th>\n",
673
       "      <td>NaN</td>\n",
674
       "      <td>NaN</td>\n",
675
       "      <td>NaN</td>\n",
676
       "      <td>NaN</td>\n",
677
       "      <td>NaN</td>\n",
678
       "      <td>NaN</td>\n",
679
       "      <td>NaN</td>\n",
680
       "      <td>NaN</td>\n",
681
       "      <td>NaN</td>\n",
682
       "      <td>NaN</td>\n",
683
       "    </tr>\n",
684
       "    <tr>\n",
685
       "      <th>WindowWidth</th>\n",
686
       "      <td>80</td>\n",
687
       "      <td>80</td>\n",
688
       "      <td>80</td>\n",
689
       "      <td>80</td>\n",
690
       "      <td>80</td>\n",
691
       "      <td>80</td>\n",
692
       "      <td>80</td>\n",
693
       "      <td>80</td>\n",
694
       "      <td>80</td>\n",
695
       "      <td>80</td>\n",
696
       "    </tr>\n",
697
       "    <tr>\n",
698
       "      <th>WindowWidth_0</th>\n",
699
       "      <td>NaN</td>\n",
700
       "      <td>NaN</td>\n",
701
       "      <td>NaN</td>\n",
702
       "      <td>NaN</td>\n",
703
       "      <td>NaN</td>\n",
704
       "      <td>NaN</td>\n",
705
       "      <td>NaN</td>\n",
706
       "      <td>NaN</td>\n",
707
       "      <td>NaN</td>\n",
708
       "      <td>NaN</td>\n",
709
       "    </tr>\n",
710
       "    <tr>\n",
711
       "      <th>WindowWidth_1</th>\n",
712
       "      <td>NaN</td>\n",
713
       "      <td>NaN</td>\n",
714
       "      <td>NaN</td>\n",
715
       "      <td>NaN</td>\n",
716
       "      <td>NaN</td>\n",
717
       "      <td>NaN</td>\n",
718
       "      <td>NaN</td>\n",
719
       "      <td>NaN</td>\n",
720
       "      <td>NaN</td>\n",
721
       "      <td>NaN</td>\n",
722
       "    </tr>\n",
723
       "    <tr>\n",
724
       "      <th>fid</th>\n",
725
       "      <td>ID_bcdecac1b</td>\n",
726
       "      <td>ID_beb4aacbe</td>\n",
727
       "      <td>ID_c554939b0</td>\n",
728
       "      <td>ID_c6909d6fa</td>\n",
729
       "      <td>ID_d1ad467b0</td>\n",
730
       "      <td>ID_d1d25ea8c</td>\n",
731
       "      <td>ID_db59bb95e</td>\n",
732
       "      <td>ID_e0ed751c2</td>\n",
733
       "      <td>ID_e262d76e2</td>\n",
734
       "      <td>ID_fe77ec61b</td>\n",
735
       "    </tr>\n",
736
       "    <tr>\n",
737
       "      <th>any</th>\n",
738
       "      <td>0</td>\n",
739
       "      <td>0</td>\n",
740
       "      <td>0</td>\n",
741
       "      <td>0</td>\n",
742
       "      <td>0</td>\n",
743
       "      <td>0</td>\n",
744
       "      <td>0</td>\n",
745
       "      <td>0</td>\n",
746
       "      <td>0</td>\n",
747
       "      <td>0</td>\n",
748
       "    </tr>\n",
749
       "    <tr>\n",
750
       "      <th>epidural</th>\n",
751
       "      <td>0</td>\n",
752
       "      <td>0</td>\n",
753
       "      <td>0</td>\n",
754
       "      <td>0</td>\n",
755
       "      <td>0</td>\n",
756
       "      <td>0</td>\n",
757
       "      <td>0</td>\n",
758
       "      <td>0</td>\n",
759
       "      <td>0</td>\n",
760
       "      <td>0</td>\n",
761
       "    </tr>\n",
762
       "    <tr>\n",
763
       "      <th>intraparenchymal</th>\n",
764
       "      <td>0</td>\n",
765
       "      <td>0</td>\n",
766
       "      <td>0</td>\n",
767
       "      <td>0</td>\n",
768
       "      <td>0</td>\n",
769
       "      <td>0</td>\n",
770
       "      <td>0</td>\n",
771
       "      <td>0</td>\n",
772
       "      <td>0</td>\n",
773
       "      <td>0</td>\n",
774
       "    </tr>\n",
775
       "    <tr>\n",
776
       "      <th>intraventricular</th>\n",
777
       "      <td>0</td>\n",
778
       "      <td>0</td>\n",
779
       "      <td>0</td>\n",
780
       "      <td>0</td>\n",
781
       "      <td>0</td>\n",
782
       "      <td>0</td>\n",
783
       "      <td>0</td>\n",
784
       "      <td>0</td>\n",
785
       "      <td>0</td>\n",
786
       "      <td>0</td>\n",
787
       "    </tr>\n",
788
       "    <tr>\n",
789
       "      <th>subarachnoid</th>\n",
790
       "      <td>0</td>\n",
791
       "      <td>0</td>\n",
792
       "      <td>0</td>\n",
793
       "      <td>0</td>\n",
794
       "      <td>0</td>\n",
795
       "      <td>0</td>\n",
796
       "      <td>0</td>\n",
797
       "      <td>0</td>\n",
798
       "      <td>0</td>\n",
799
       "      <td>0</td>\n",
800
       "    </tr>\n",
801
       "    <tr>\n",
802
       "      <th>subdural</th>\n",
803
       "      <td>0</td>\n",
804
       "      <td>0</td>\n",
805
       "      <td>0</td>\n",
806
       "      <td>0</td>\n",
807
       "      <td>0</td>\n",
808
       "      <td>0</td>\n",
809
       "      <td>0</td>\n",
810
       "      <td>0</td>\n",
811
       "      <td>0</td>\n",
812
       "      <td>0</td>\n",
813
       "    </tr>\n",
814
       "    <tr>\n",
815
       "      <th>normz</th>\n",
816
       "      <td>61.23</td>\n",
817
       "      <td>51.0251</td>\n",
818
       "      <td>71.4351</td>\n",
819
       "      <td>-56.1273</td>\n",
820
       "      <td>-15.3073</td>\n",
821
       "      <td>81.64</td>\n",
822
       "      <td>20.41</td>\n",
823
       "      <td>40.82</td>\n",
824
       "      <td>-30.6149</td>\n",
825
       "      <td>35.7176</td>\n",
826
       "    </tr>\n",
827
       "    <tr>\n",
828
       "      <th>xcm</th>\n",
829
       "      <td>249.719</td>\n",
830
       "      <td>249.719</td>\n",
831
       "      <td>249.719</td>\n",
832
       "      <td>249.719</td>\n",
833
       "      <td>249.719</td>\n",
834
       "      <td>249.719</td>\n",
835
       "      <td>249.719</td>\n",
836
       "      <td>249.719</td>\n",
837
       "      <td>249.719</td>\n",
838
       "      <td>249.719</td>\n",
839
       "    </tr>\n",
840
       "    <tr>\n",
841
       "      <th>ycm</th>\n",
842
       "      <td>249.632</td>\n",
843
       "      <td>249.632</td>\n",
844
       "      <td>249.632</td>\n",
845
       "      <td>249.632</td>\n",
846
       "      <td>249.632</td>\n",
847
       "      <td>249.632</td>\n",
848
       "      <td>249.632</td>\n",
849
       "      <td>249.632</td>\n",
850
       "      <td>249.632</td>\n",
851
       "      <td>249.632</td>\n",
852
       "    </tr>\n",
853
       "    <tr>\n",
854
       "      <th>theta</th>\n",
855
       "      <td>-0.0280377</td>\n",
856
       "      <td>-0.0280377</td>\n",
857
       "      <td>-0.0280377</td>\n",
858
       "      <td>-0.0280377</td>\n",
859
       "      <td>-0.0280377</td>\n",
860
       "      <td>-0.0280377</td>\n",
861
       "      <td>-0.0280377</td>\n",
862
       "      <td>-0.0280377</td>\n",
863
       "      <td>-0.0280377</td>\n",
864
       "      <td>-0.0280377</td>\n",
865
       "    </tr>\n",
866
       "    <tr>\n",
867
       "      <th>pct_tissue</th>\n",
868
       "      <td>0.140537</td>\n",
869
       "      <td>0.194378</td>\n",
870
       "      <td>0.0751762</td>\n",
871
       "      <td>0.170967</td>\n",
872
       "      <td>0.320599</td>\n",
873
       "      <td>0.0209999</td>\n",
874
       "      <td>0.305298</td>\n",
875
       "      <td>0.242409</td>\n",
876
       "      <td>0.247257</td>\n",
877
       "      <td>0.262268</td>\n",
878
       "    </tr>\n",
879
       "  </tbody>\n",
880
       "</table>\n",
881
       "</div>"
882
      ],
883
      "text/plain": [
884
       "                                  556142         561465         580918  \\\n",
885
       "Unnamed: 0                        556142         561465         580918   \n",
886
       "BitsAllocated                         16             16             16   \n",
887
       "BitsStored                            16             16             16   \n",
888
       "Columns                              512            512            512   \n",
889
       "HighBit                               15             15             15   \n",
890
       "ImageOrientationPatient_0              1              1              1   \n",
891
       "ImageOrientationPatient_1              0              0              0   \n",
892
       "ImageOrientationPatient_2              0              0              0   \n",
893
       "ImageOrientationPatient_3              0              0              0   \n",
894
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
895
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
896
       "ImagePositionPatient_0              -125           -125           -125   \n",
897
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
898
       "ImagePositionPatient_2           166.528        156.323        176.733   \n",
899
       "Modality                              CT             CT             CT   \n",
900
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
901
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
902
       "PixelRepresentation                    1              1              1   \n",
903
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
904
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
905
       "RescaleIntercept                   -1024          -1024          -1024   \n",
906
       "RescaleSlope                           1              1              1   \n",
907
       "Rows                                 512            512            512   \n",
908
       "SOPInstanceUID              ID_bcdecac1b   ID_beb4aacbe   ID_c554939b0   \n",
909
       "SamplesPerPixel                        1              1              1   \n",
910
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
911
       "StudyID                              NaN            NaN            NaN   \n",
912
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
913
       "WindowCenter                          30             30             30   \n",
914
       "WindowCenter_0                       NaN            NaN            NaN   \n",
915
       "WindowCenter_1                       NaN            NaN            NaN   \n",
916
       "WindowWidth                           80             80             80   \n",
917
       "WindowWidth_0                        NaN            NaN            NaN   \n",
918
       "WindowWidth_1                        NaN            NaN            NaN   \n",
919
       "fid                         ID_bcdecac1b   ID_beb4aacbe   ID_c554939b0   \n",
920
       "any                                    0              0              0   \n",
921
       "epidural                               0              0              0   \n",
922
       "intraparenchymal                       0              0              0   \n",
923
       "intraventricular                       0              0              0   \n",
924
       "subarachnoid                           0              0              0   \n",
925
       "subdural                               0              0              0   \n",
926
       "normz                              61.23        51.0251        71.4351   \n",
927
       "xcm                              249.719        249.719        249.719   \n",
928
       "ycm                              249.632        249.632        249.632   \n",
929
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
930
       "pct_tissue                      0.140537       0.194378      0.0751762   \n",
931
       "\n",
932
       "                                  584578         617029         617400  \\\n",
933
       "Unnamed: 0                        584578         617029         617400   \n",
934
       "BitsAllocated                         16             16             16   \n",
935
       "BitsStored                            16             16             16   \n",
936
       "Columns                              512            512            512   \n",
937
       "HighBit                               15             15             15   \n",
938
       "ImageOrientationPatient_0              1              1              1   \n",
939
       "ImageOrientationPatient_1              0              0              0   \n",
940
       "ImageOrientationPatient_2              0              0              0   \n",
941
       "ImageOrientationPatient_3              0              0              0   \n",
942
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
943
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
944
       "ImagePositionPatient_0              -125           -125           -125   \n",
945
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
946
       "ImagePositionPatient_2            49.171         89.991        186.938   \n",
947
       "Modality                              CT             CT             CT   \n",
948
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
949
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
950
       "PixelRepresentation                    1              1              1   \n",
951
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
952
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
953
       "RescaleIntercept                   -1024          -1024          -1024   \n",
954
       "RescaleSlope                           1              1              1   \n",
955
       "Rows                                 512            512            512   \n",
956
       "SOPInstanceUID              ID_c6909d6fa   ID_d1ad467b0   ID_d1d25ea8c   \n",
957
       "SamplesPerPixel                        1              1              1   \n",
958
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
959
       "StudyID                              NaN            NaN            NaN   \n",
960
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
961
       "WindowCenter                          30             30             30   \n",
962
       "WindowCenter_0                       NaN            NaN            NaN   \n",
963
       "WindowCenter_1                       NaN            NaN            NaN   \n",
964
       "WindowWidth                           80             80             80   \n",
965
       "WindowWidth_0                        NaN            NaN            NaN   \n",
966
       "WindowWidth_1                        NaN            NaN            NaN   \n",
967
       "fid                         ID_c6909d6fa   ID_d1ad467b0   ID_d1d25ea8c   \n",
968
       "any                                    0              0              0   \n",
969
       "epidural                               0              0              0   \n",
970
       "intraparenchymal                       0              0              0   \n",
971
       "intraventricular                       0              0              0   \n",
972
       "subarachnoid                           0              0              0   \n",
973
       "subdural                               0              0              0   \n",
974
       "normz                           -56.1273       -15.3073          81.64   \n",
975
       "xcm                              249.719        249.719        249.719   \n",
976
       "ycm                              249.632        249.632        249.632   \n",
977
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
978
       "pct_tissue                      0.170967       0.320599      0.0209999   \n",
979
       "\n",
980
       "                                  645424         661661         665938  \\\n",
981
       "Unnamed: 0                        645424         661661         665938   \n",
982
       "BitsAllocated                         16             16             16   \n",
983
       "BitsStored                            16             16             16   \n",
984
       "Columns                              512            512            512   \n",
985
       "HighBit                               15             15             15   \n",
986
       "ImageOrientationPatient_0              1              1              1   \n",
987
       "ImageOrientationPatient_1              0              0              0   \n",
988
       "ImageOrientationPatient_2              0              0              0   \n",
989
       "ImageOrientationPatient_3              0              0              0   \n",
990
       "ImageOrientationPatient_4       0.979925       0.979925       0.979925   \n",
991
       "ImageOrientationPatient_5      -0.199368      -0.199368      -0.199368   \n",
992
       "ImagePositionPatient_0              -125           -125           -125   \n",
993
       "ImagePositionPatient_1          -122.491       -122.491       -122.491   \n",
994
       "ImagePositionPatient_2           125.708        146.118        74.6834   \n",
995
       "Modality                              CT             CT             CT   \n",
996
       "PatientID                    ID_160aea75    ID_160aea75    ID_160aea75   \n",
997
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
998
       "PixelRepresentation                    1              1              1   \n",
999
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
1000
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
1001
       "RescaleIntercept                   -1024          -1024          -1024   \n",
1002
       "RescaleSlope                           1              1              1   \n",
1003
       "Rows                                 512            512            512   \n",
1004
       "SOPInstanceUID              ID_db59bb95e   ID_e0ed751c2   ID_e262d76e2   \n",
1005
       "SamplesPerPixel                        1              1              1   \n",
1006
       "SeriesInstanceUID          ID_000a935543  ID_000a935543  ID_000a935543   \n",
1007
       "StudyID                              NaN            NaN            NaN   \n",
1008
       "StudyInstanceUID           ID_41d976e5d3  ID_41d976e5d3  ID_41d976e5d3   \n",
1009
       "WindowCenter                          30             30             30   \n",
1010
       "WindowCenter_0                       NaN            NaN            NaN   \n",
1011
       "WindowCenter_1                       NaN            NaN            NaN   \n",
1012
       "WindowWidth                           80             80             80   \n",
1013
       "WindowWidth_0                        NaN            NaN            NaN   \n",
1014
       "WindowWidth_1                        NaN            NaN            NaN   \n",
1015
       "fid                         ID_db59bb95e   ID_e0ed751c2   ID_e262d76e2   \n",
1016
       "any                                    0              0              0   \n",
1017
       "epidural                               0              0              0   \n",
1018
       "intraparenchymal                       0              0              0   \n",
1019
       "intraventricular                       0              0              0   \n",
1020
       "subarachnoid                           0              0              0   \n",
1021
       "subdural                               0              0              0   \n",
1022
       "normz                              20.41          40.82       -30.6149   \n",
1023
       "xcm                              249.719        249.719        249.719   \n",
1024
       "ycm                              249.632        249.632        249.632   \n",
1025
       "theta                         -0.0280377     -0.0280377     -0.0280377   \n",
1026
       "pct_tissue                      0.305298       0.242409       0.247257   \n",
1027
       "\n",
1028
       "                                  748282  \n",
1029
       "Unnamed: 0                        748282  \n",
1030
       "BitsAllocated                         16  \n",
1031
       "BitsStored                            16  \n",
1032
       "Columns                              512  \n",
1033
       "HighBit                               15  \n",
1034
       "ImageOrientationPatient_0              1  \n",
1035
       "ImageOrientationPatient_1              0  \n",
1036
       "ImageOrientationPatient_2              0  \n",
1037
       "ImageOrientationPatient_3              0  \n",
1038
       "ImageOrientationPatient_4       0.979925  \n",
1039
       "ImageOrientationPatient_5      -0.199368  \n",
1040
       "ImagePositionPatient_0              -125  \n",
1041
       "ImagePositionPatient_1          -122.491  \n",
1042
       "ImagePositionPatient_2           141.016  \n",
1043
       "Modality                              CT  \n",
1044
       "PatientID                    ID_160aea75  \n",
1045
       "PhotometricInterpretation    MONOCHROME2  \n",
1046
       "PixelRepresentation                    1  \n",
1047
       "PixelSpacing_0                  0.488281  \n",
1048
       "PixelSpacing_1                  0.488281  \n",
1049
       "RescaleIntercept                   -1024  \n",
1050
       "RescaleSlope                           1  \n",
1051
       "Rows                                 512  \n",
1052
       "SOPInstanceUID              ID_fe77ec61b  \n",
1053
       "SamplesPerPixel                        1  \n",
1054
       "SeriesInstanceUID          ID_000a935543  \n",
1055
       "StudyID                              NaN  \n",
1056
       "StudyInstanceUID           ID_41d976e5d3  \n",
1057
       "WindowCenter                          30  \n",
1058
       "WindowCenter_0                       NaN  \n",
1059
       "WindowCenter_1                       NaN  \n",
1060
       "WindowWidth                           80  \n",
1061
       "WindowWidth_0                        NaN  \n",
1062
       "WindowWidth_1                        NaN  \n",
1063
       "fid                         ID_fe77ec61b  \n",
1064
       "any                                    0  \n",
1065
       "epidural                               0  \n",
1066
       "intraparenchymal                       0  \n",
1067
       "intraventricular                       0  \n",
1068
       "subarachnoid                           0  \n",
1069
       "subdural                               0  \n",
1070
       "normz                            35.7176  \n",
1071
       "xcm                              249.719  \n",
1072
       "ycm                              249.632  \n",
1073
       "theta                         -0.0280377  \n",
1074
       "pct_tissue                      0.262268  "
1075
      ]
1076
     },
1077
     "execution_count": 12,
1078
     "metadata": {},
1079
     "output_type": "execute_result"
1080
    }
1081
   ],
1082
   "source": [
1083
    "extended_df[90:100].T"
1084
   ]
1085
  },
1086
  {
1087
   "cell_type": "code",
1088
   "execution_count": 13,
1089
   "metadata": {},
1090
   "outputs": [
1091
    {
1092
     "data": {
1093
      "text/plain": [
1094
       "752802"
1095
      ]
1096
     },
1097
     "execution_count": 13,
1098
     "metadata": {},
1099
     "output_type": "execute_result"
1100
    }
1101
   ],
1102
   "source": [
1103
    "assert len(df) == len(extended_df)\n",
1104
    "len(df)"
1105
   ]
1106
  },
1107
  {
1108
   "cell_type": "code",
1109
   "execution_count": 14,
1110
   "metadata": {},
1111
   "outputs": [],
1112
   "source": [
1113
    "extended_df.to_csv(f'data/{stage}_train_dicom_diags_norm.csv')"
1114
   ]
1115
  },
1116
  {
1117
   "cell_type": "code",
1118
   "execution_count": 15,
1119
   "metadata": {},
1120
   "outputs": [],
1121
   "source": [
1122
    "df = pd.read_csv(f'data/{stage}_test_dicom.csv')"
1123
   ]
1124
  },
1125
  {
1126
   "cell_type": "code",
1127
   "execution_count": 16,
1128
   "metadata": {},
1129
   "outputs": [],
1130
   "source": [
1131
    "dfg = df.groupby('SeriesInstanceUID')"
1132
   ]
1133
  },
1134
  {
1135
   "cell_type": "code",
1136
   "execution_count": 17,
1137
   "metadata": {},
1138
   "outputs": [
1139
    {
1140
     "data": {
1141
      "text/plain": [
1142
       "3518"
1143
      ]
1144
     },
1145
     "execution_count": 17,
1146
     "metadata": {},
1147
     "output_type": "execute_result"
1148
    }
1149
   ],
1150
   "source": [
1151
    "group_names = list(dfg.groups.keys())\n",
1152
    "len(group_names)"
1153
   ]
1154
  },
1155
  {
1156
   "cell_type": "code",
1157
   "execution_count": 18,
1158
   "metadata": {},
1159
   "outputs": [
1160
    {
1161
     "data": {
1162
      "text/html": [
1163
       "<div>\n",
1164
       "<style scoped>\n",
1165
       "    .dataframe tbody tr th:only-of-type {\n",
1166
       "        vertical-align: middle;\n",
1167
       "    }\n",
1168
       "\n",
1169
       "    .dataframe tbody tr th {\n",
1170
       "        vertical-align: top;\n",
1171
       "    }\n",
1172
       "\n",
1173
       "    .dataframe thead th {\n",
1174
       "        text-align: right;\n",
1175
       "    }\n",
1176
       "</style>\n",
1177
       "<table border=\"1\" class=\"dataframe\">\n",
1178
       "  <thead>\n",
1179
       "    <tr style=\"text-align: right;\">\n",
1180
       "      <th></th>\n",
1181
       "      <th>0</th>\n",
1182
       "      <th>1</th>\n",
1183
       "      <th>2</th>\n",
1184
       "      <th>3</th>\n",
1185
       "      <th>4</th>\n",
1186
       "    </tr>\n",
1187
       "  </thead>\n",
1188
       "  <tbody>\n",
1189
       "    <tr>\n",
1190
       "      <th>Unnamed: 0</th>\n",
1191
       "      <td>0</td>\n",
1192
       "      <td>1</td>\n",
1193
       "      <td>2</td>\n",
1194
       "      <td>3</td>\n",
1195
       "      <td>4</td>\n",
1196
       "    </tr>\n",
1197
       "    <tr>\n",
1198
       "      <th>BitsAllocated</th>\n",
1199
       "      <td>16</td>\n",
1200
       "      <td>16</td>\n",
1201
       "      <td>16</td>\n",
1202
       "      <td>16</td>\n",
1203
       "      <td>16</td>\n",
1204
       "    </tr>\n",
1205
       "    <tr>\n",
1206
       "      <th>BitsStored</th>\n",
1207
       "      <td>12</td>\n",
1208
       "      <td>16</td>\n",
1209
       "      <td>16</td>\n",
1210
       "      <td>16</td>\n",
1211
       "      <td>12</td>\n",
1212
       "    </tr>\n",
1213
       "    <tr>\n",
1214
       "      <th>Columns</th>\n",
1215
       "      <td>512</td>\n",
1216
       "      <td>512</td>\n",
1217
       "      <td>512</td>\n",
1218
       "      <td>512</td>\n",
1219
       "      <td>512</td>\n",
1220
       "    </tr>\n",
1221
       "    <tr>\n",
1222
       "      <th>HighBit</th>\n",
1223
       "      <td>11</td>\n",
1224
       "      <td>15</td>\n",
1225
       "      <td>15</td>\n",
1226
       "      <td>15</td>\n",
1227
       "      <td>11</td>\n",
1228
       "    </tr>\n",
1229
       "    <tr>\n",
1230
       "      <th>ImageOrientationPatient_0</th>\n",
1231
       "      <td>1</td>\n",
1232
       "      <td>1</td>\n",
1233
       "      <td>1</td>\n",
1234
       "      <td>1</td>\n",
1235
       "      <td>1</td>\n",
1236
       "    </tr>\n",
1237
       "    <tr>\n",
1238
       "      <th>ImageOrientationPatient_1</th>\n",
1239
       "      <td>0</td>\n",
1240
       "      <td>0</td>\n",
1241
       "      <td>0</td>\n",
1242
       "      <td>0</td>\n",
1243
       "      <td>0</td>\n",
1244
       "    </tr>\n",
1245
       "    <tr>\n",
1246
       "      <th>ImageOrientationPatient_2</th>\n",
1247
       "      <td>0</td>\n",
1248
       "      <td>0</td>\n",
1249
       "      <td>0</td>\n",
1250
       "      <td>0</td>\n",
1251
       "      <td>0</td>\n",
1252
       "    </tr>\n",
1253
       "    <tr>\n",
1254
       "      <th>ImageOrientationPatient_3</th>\n",
1255
       "      <td>0</td>\n",
1256
       "      <td>0</td>\n",
1257
       "      <td>0</td>\n",
1258
       "      <td>0</td>\n",
1259
       "      <td>0</td>\n",
1260
       "    </tr>\n",
1261
       "    <tr>\n",
1262
       "      <th>ImageOrientationPatient_4</th>\n",
1263
       "      <td>0.981627</td>\n",
1264
       "      <td>0.987688</td>\n",
1265
       "      <td>0.927184</td>\n",
1266
       "      <td>0.986286</td>\n",
1267
       "      <td>1</td>\n",
1268
       "    </tr>\n",
1269
       "    <tr>\n",
1270
       "      <th>ImageOrientationPatient_5</th>\n",
1271
       "      <td>-0.190809</td>\n",
1272
       "      <td>-0.156434</td>\n",
1273
       "      <td>-0.374607</td>\n",
1274
       "      <td>-0.165048</td>\n",
1275
       "      <td>0</td>\n",
1276
       "    </tr>\n",
1277
       "    <tr>\n",
1278
       "      <th>ImagePositionPatient_0</th>\n",
1279
       "      <td>-112</td>\n",
1280
       "      <td>-125</td>\n",
1281
       "      <td>-125</td>\n",
1282
       "      <td>-125</td>\n",
1283
       "      <td>-125</td>\n",
1284
       "    </tr>\n",
1285
       "    <tr>\n",
1286
       "      <th>ImagePositionPatient_1</th>\n",
1287
       "      <td>-9.80803</td>\n",
1288
       "      <td>-127.061</td>\n",
1289
       "      <td>-119.998</td>\n",
1290
       "      <td>-120.286</td>\n",
1291
       "      <td>-16</td>\n",
1292
       "    </tr>\n",
1293
       "    <tr>\n",
1294
       "      <th>ImagePositionPatient_2</th>\n",
1295
       "      <td>267.219</td>\n",
1296
       "      <td>197.807</td>\n",
1297
       "      <td>78.6604</td>\n",
1298
       "      <td>131.737</td>\n",
1299
       "      <td>98.2</td>\n",
1300
       "    </tr>\n",
1301
       "    <tr>\n",
1302
       "      <th>Modality</th>\n",
1303
       "      <td>CT</td>\n",
1304
       "      <td>CT</td>\n",
1305
       "      <td>CT</td>\n",
1306
       "      <td>CT</td>\n",
1307
       "      <td>CT</td>\n",
1308
       "    </tr>\n",
1309
       "    <tr>\n",
1310
       "      <th>PatientID</th>\n",
1311
       "      <td>ID_52c1ab5a</td>\n",
1312
       "      <td>ID_39e44bce</td>\n",
1313
       "      <td>ID_6f87831a</td>\n",
1314
       "      <td>ID_7a7c9c9e</td>\n",
1315
       "      <td>ID_ab5e477f</td>\n",
1316
       "    </tr>\n",
1317
       "    <tr>\n",
1318
       "      <th>PhotometricInterpretation</th>\n",
1319
       "      <td>MONOCHROME2</td>\n",
1320
       "      <td>MONOCHROME2</td>\n",
1321
       "      <td>MONOCHROME2</td>\n",
1322
       "      <td>MONOCHROME2</td>\n",
1323
       "      <td>MONOCHROME2</td>\n",
1324
       "    </tr>\n",
1325
       "    <tr>\n",
1326
       "      <th>PixelRepresentation</th>\n",
1327
       "      <td>0</td>\n",
1328
       "      <td>1</td>\n",
1329
       "      <td>1</td>\n",
1330
       "      <td>1</td>\n",
1331
       "      <td>0</td>\n",
1332
       "    </tr>\n",
1333
       "    <tr>\n",
1334
       "      <th>PixelSpacing_0</th>\n",
1335
       "      <td>0.488281</td>\n",
1336
       "      <td>0.488281</td>\n",
1337
       "      <td>0.488281</td>\n",
1338
       "      <td>0.488281</td>\n",
1339
       "      <td>0.488281</td>\n",
1340
       "    </tr>\n",
1341
       "    <tr>\n",
1342
       "      <th>PixelSpacing_1</th>\n",
1343
       "      <td>0.488281</td>\n",
1344
       "      <td>0.488281</td>\n",
1345
       "      <td>0.488281</td>\n",
1346
       "      <td>0.488281</td>\n",
1347
       "      <td>0.488281</td>\n",
1348
       "    </tr>\n",
1349
       "    <tr>\n",
1350
       "      <th>RescaleIntercept</th>\n",
1351
       "      <td>-1024</td>\n",
1352
       "      <td>-1024</td>\n",
1353
       "      <td>-1024</td>\n",
1354
       "      <td>-1024</td>\n",
1355
       "      <td>-1024</td>\n",
1356
       "    </tr>\n",
1357
       "    <tr>\n",
1358
       "      <th>RescaleSlope</th>\n",
1359
       "      <td>1</td>\n",
1360
       "      <td>1</td>\n",
1361
       "      <td>1</td>\n",
1362
       "      <td>1</td>\n",
1363
       "      <td>1</td>\n",
1364
       "    </tr>\n",
1365
       "    <tr>\n",
1366
       "      <th>Rows</th>\n",
1367
       "      <td>512</td>\n",
1368
       "      <td>512</td>\n",
1369
       "      <td>512</td>\n",
1370
       "      <td>512</td>\n",
1371
       "      <td>512</td>\n",
1372
       "    </tr>\n",
1373
       "    <tr>\n",
1374
       "      <th>SOPInstanceUID</th>\n",
1375
       "      <td>ID_000000e27</td>\n",
1376
       "      <td>ID_000009146</td>\n",
1377
       "      <td>ID_00007b8cb</td>\n",
1378
       "      <td>ID_000134952</td>\n",
1379
       "      <td>ID_000176f2a</td>\n",
1380
       "    </tr>\n",
1381
       "    <tr>\n",
1382
       "      <th>SamplesPerPixel</th>\n",
1383
       "      <td>1</td>\n",
1384
       "      <td>1</td>\n",
1385
       "      <td>1</td>\n",
1386
       "      <td>1</td>\n",
1387
       "      <td>1</td>\n",
1388
       "    </tr>\n",
1389
       "    <tr>\n",
1390
       "      <th>SeriesInstanceUID</th>\n",
1391
       "      <td>ID_4d28912ba6</td>\n",
1392
       "      <td>ID_acabdeee86</td>\n",
1393
       "      <td>ID_d00cee7f0c</td>\n",
1394
       "      <td>ID_a52a0112d5</td>\n",
1395
       "      <td>ID_f552d3b922</td>\n",
1396
       "    </tr>\n",
1397
       "    <tr>\n",
1398
       "      <th>StudyID</th>\n",
1399
       "      <td>NaN</td>\n",
1400
       "      <td>NaN</td>\n",
1401
       "      <td>NaN</td>\n",
1402
       "      <td>NaN</td>\n",
1403
       "      <td>NaN</td>\n",
1404
       "    </tr>\n",
1405
       "    <tr>\n",
1406
       "      <th>StudyInstanceUID</th>\n",
1407
       "      <td>ID_1f6d1e8aeb</td>\n",
1408
       "      <td>ID_4a8d7ec19f</td>\n",
1409
       "      <td>ID_a6ca244172</td>\n",
1410
       "      <td>ID_fa950a03af</td>\n",
1411
       "      <td>ID_965d8b3d8e</td>\n",
1412
       "    </tr>\n",
1413
       "    <tr>\n",
1414
       "      <th>WindowCenter</th>\n",
1415
       "      <td>NaN</td>\n",
1416
       "      <td>30</td>\n",
1417
       "      <td>30</td>\n",
1418
       "      <td>30</td>\n",
1419
       "      <td>NaN</td>\n",
1420
       "    </tr>\n",
1421
       "    <tr>\n",
1422
       "      <th>WindowCenter_0</th>\n",
1423
       "      <td>40</td>\n",
1424
       "      <td>NaN</td>\n",
1425
       "      <td>NaN</td>\n",
1426
       "      <td>NaN</td>\n",
1427
       "      <td>36</td>\n",
1428
       "    </tr>\n",
1429
       "    <tr>\n",
1430
       "      <th>WindowCenter_1</th>\n",
1431
       "      <td>40</td>\n",
1432
       "      <td>NaN</td>\n",
1433
       "      <td>NaN</td>\n",
1434
       "      <td>NaN</td>\n",
1435
       "      <td>36</td>\n",
1436
       "    </tr>\n",
1437
       "    <tr>\n",
1438
       "      <th>WindowWidth</th>\n",
1439
       "      <td>NaN</td>\n",
1440
       "      <td>80</td>\n",
1441
       "      <td>80</td>\n",
1442
       "      <td>80</td>\n",
1443
       "      <td>NaN</td>\n",
1444
       "    </tr>\n",
1445
       "    <tr>\n",
1446
       "      <th>WindowWidth_0</th>\n",
1447
       "      <td>80</td>\n",
1448
       "      <td>NaN</td>\n",
1449
       "      <td>NaN</td>\n",
1450
       "      <td>NaN</td>\n",
1451
       "      <td>80</td>\n",
1452
       "    </tr>\n",
1453
       "    <tr>\n",
1454
       "      <th>WindowWidth_1</th>\n",
1455
       "      <td>80</td>\n",
1456
       "      <td>NaN</td>\n",
1457
       "      <td>NaN</td>\n",
1458
       "      <td>NaN</td>\n",
1459
       "      <td>80</td>\n",
1460
       "    </tr>\n",
1461
       "  </tbody>\n",
1462
       "</table>\n",
1463
       "</div>"
1464
      ],
1465
      "text/plain": [
1466
       "                                       0              1              2  \\\n",
1467
       "Unnamed: 0                             0              1              2   \n",
1468
       "BitsAllocated                         16             16             16   \n",
1469
       "BitsStored                            12             16             16   \n",
1470
       "Columns                              512            512            512   \n",
1471
       "HighBit                               11             15             15   \n",
1472
       "ImageOrientationPatient_0              1              1              1   \n",
1473
       "ImageOrientationPatient_1              0              0              0   \n",
1474
       "ImageOrientationPatient_2              0              0              0   \n",
1475
       "ImageOrientationPatient_3              0              0              0   \n",
1476
       "ImageOrientationPatient_4       0.981627       0.987688       0.927184   \n",
1477
       "ImageOrientationPatient_5      -0.190809      -0.156434      -0.374607   \n",
1478
       "ImagePositionPatient_0              -112           -125           -125   \n",
1479
       "ImagePositionPatient_1          -9.80803       -127.061       -119.998   \n",
1480
       "ImagePositionPatient_2           267.219        197.807        78.6604   \n",
1481
       "Modality                              CT             CT             CT   \n",
1482
       "PatientID                    ID_52c1ab5a    ID_39e44bce    ID_6f87831a   \n",
1483
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
1484
       "PixelRepresentation                    0              1              1   \n",
1485
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
1486
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
1487
       "RescaleIntercept                   -1024          -1024          -1024   \n",
1488
       "RescaleSlope                           1              1              1   \n",
1489
       "Rows                                 512            512            512   \n",
1490
       "SOPInstanceUID              ID_000000e27   ID_000009146   ID_00007b8cb   \n",
1491
       "SamplesPerPixel                        1              1              1   \n",
1492
       "SeriesInstanceUID          ID_4d28912ba6  ID_acabdeee86  ID_d00cee7f0c   \n",
1493
       "StudyID                              NaN            NaN            NaN   \n",
1494
       "StudyInstanceUID           ID_1f6d1e8aeb  ID_4a8d7ec19f  ID_a6ca244172   \n",
1495
       "WindowCenter                         NaN             30             30   \n",
1496
       "WindowCenter_0                        40            NaN            NaN   \n",
1497
       "WindowCenter_1                        40            NaN            NaN   \n",
1498
       "WindowWidth                          NaN             80             80   \n",
1499
       "WindowWidth_0                         80            NaN            NaN   \n",
1500
       "WindowWidth_1                         80            NaN            NaN   \n",
1501
       "\n",
1502
       "                                       3              4  \n",
1503
       "Unnamed: 0                             3              4  \n",
1504
       "BitsAllocated                         16             16  \n",
1505
       "BitsStored                            16             12  \n",
1506
       "Columns                              512            512  \n",
1507
       "HighBit                               15             11  \n",
1508
       "ImageOrientationPatient_0              1              1  \n",
1509
       "ImageOrientationPatient_1              0              0  \n",
1510
       "ImageOrientationPatient_2              0              0  \n",
1511
       "ImageOrientationPatient_3              0              0  \n",
1512
       "ImageOrientationPatient_4       0.986286              1  \n",
1513
       "ImageOrientationPatient_5      -0.165048              0  \n",
1514
       "ImagePositionPatient_0              -125           -125  \n",
1515
       "ImagePositionPatient_1          -120.286            -16  \n",
1516
       "ImagePositionPatient_2           131.737           98.2  \n",
1517
       "Modality                              CT             CT  \n",
1518
       "PatientID                    ID_7a7c9c9e    ID_ab5e477f  \n",
1519
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2  \n",
1520
       "PixelRepresentation                    1              0  \n",
1521
       "PixelSpacing_0                  0.488281       0.488281  \n",
1522
       "PixelSpacing_1                  0.488281       0.488281  \n",
1523
       "RescaleIntercept                   -1024          -1024  \n",
1524
       "RescaleSlope                           1              1  \n",
1525
       "Rows                                 512            512  \n",
1526
       "SOPInstanceUID              ID_000134952   ID_000176f2a  \n",
1527
       "SamplesPerPixel                        1              1  \n",
1528
       "SeriesInstanceUID          ID_a52a0112d5  ID_f552d3b922  \n",
1529
       "StudyID                              NaN            NaN  \n",
1530
       "StudyInstanceUID           ID_fa950a03af  ID_965d8b3d8e  \n",
1531
       "WindowCenter                          30            NaN  \n",
1532
       "WindowCenter_0                       NaN             36  \n",
1533
       "WindowCenter_1                       NaN             36  \n",
1534
       "WindowWidth                           80            NaN  \n",
1535
       "WindowWidth_0                        NaN             80  \n",
1536
       "WindowWidth_1                        NaN             80  "
1537
      ]
1538
     },
1539
     "execution_count": 18,
1540
     "metadata": {},
1541
     "output_type": "execute_result"
1542
    }
1543
   ],
1544
   "source": [
1545
    "df.head().T"
1546
   ]
1547
  },
1548
  {
1549
   "cell_type": "code",
1550
   "execution_count": 19,
1551
   "metadata": {},
1552
   "outputs": [
1553
    {
1554
     "data": {
1555
      "application/vnd.jupyter.widget-view+json": {
1556
       "model_id": "cce4c6aa74ec4e65adc8cafc672f8ba1",
1557
       "version_major": 2,
1558
       "version_minor": 0
1559
      },
1560
      "text/plain": [
1561
       "HBox(children=(IntProgress(value=0, max=3518), HTML(value='')))"
1562
      ]
1563
     },
1564
     "metadata": {},
1565
     "output_type": "display_data"
1566
    },
1567
    {
1568
     "name": "stdout",
1569
     "output_type": "stream",
1570
     "text": [
1571
      "\n"
1572
     ]
1573
    }
1574
   ],
1575
   "source": [
1576
    "with ProcessPoolExecutor(max_workers=32) as e:\n",
1577
    "     extended_df = pd.concat(tqdm(\n",
1578
    "         e.map(norm_study, repeat(f'data/unzip/{stage}_test_images'), group_names), total=len(group_names)))"
1579
   ]
1580
  },
1581
  {
1582
   "cell_type": "code",
1583
   "execution_count": 20,
1584
   "metadata": {},
1585
   "outputs": [
1586
    {
1587
     "data": {
1588
      "text/html": [
1589
       "<div>\n",
1590
       "<style scoped>\n",
1591
       "    .dataframe tbody tr th:only-of-type {\n",
1592
       "        vertical-align: middle;\n",
1593
       "    }\n",
1594
       "\n",
1595
       "    .dataframe tbody tr th {\n",
1596
       "        vertical-align: top;\n",
1597
       "    }\n",
1598
       "\n",
1599
       "    .dataframe thead th {\n",
1600
       "        text-align: right;\n",
1601
       "    }\n",
1602
       "</style>\n",
1603
       "<table border=\"1\" class=\"dataframe\">\n",
1604
       "  <thead>\n",
1605
       "    <tr style=\"text-align: right;\">\n",
1606
       "      <th></th>\n",
1607
       "      <th>13820</th>\n",
1608
       "      <th>18901</th>\n",
1609
       "      <th>20132</th>\n",
1610
       "      <th>24764</th>\n",
1611
       "      <th>36224</th>\n",
1612
       "    </tr>\n",
1613
       "  </thead>\n",
1614
       "  <tbody>\n",
1615
       "    <tr>\n",
1616
       "      <th>Unnamed: 0</th>\n",
1617
       "      <td>13820</td>\n",
1618
       "      <td>18901</td>\n",
1619
       "      <td>20132</td>\n",
1620
       "      <td>24764</td>\n",
1621
       "      <td>36224</td>\n",
1622
       "    </tr>\n",
1623
       "    <tr>\n",
1624
       "      <th>BitsAllocated</th>\n",
1625
       "      <td>16</td>\n",
1626
       "      <td>16</td>\n",
1627
       "      <td>16</td>\n",
1628
       "      <td>16</td>\n",
1629
       "      <td>16</td>\n",
1630
       "    </tr>\n",
1631
       "    <tr>\n",
1632
       "      <th>BitsStored</th>\n",
1633
       "      <td>12</td>\n",
1634
       "      <td>12</td>\n",
1635
       "      <td>12</td>\n",
1636
       "      <td>12</td>\n",
1637
       "      <td>12</td>\n",
1638
       "    </tr>\n",
1639
       "    <tr>\n",
1640
       "      <th>Columns</th>\n",
1641
       "      <td>512</td>\n",
1642
       "      <td>512</td>\n",
1643
       "      <td>512</td>\n",
1644
       "      <td>512</td>\n",
1645
       "      <td>512</td>\n",
1646
       "    </tr>\n",
1647
       "    <tr>\n",
1648
       "      <th>HighBit</th>\n",
1649
       "      <td>11</td>\n",
1650
       "      <td>11</td>\n",
1651
       "      <td>11</td>\n",
1652
       "      <td>11</td>\n",
1653
       "      <td>11</td>\n",
1654
       "    </tr>\n",
1655
       "    <tr>\n",
1656
       "      <th>ImageOrientationPatient_0</th>\n",
1657
       "      <td>1</td>\n",
1658
       "      <td>1</td>\n",
1659
       "      <td>1</td>\n",
1660
       "      <td>1</td>\n",
1661
       "      <td>1</td>\n",
1662
       "    </tr>\n",
1663
       "    <tr>\n",
1664
       "      <th>ImageOrientationPatient_1</th>\n",
1665
       "      <td>0</td>\n",
1666
       "      <td>0</td>\n",
1667
       "      <td>0</td>\n",
1668
       "      <td>0</td>\n",
1669
       "      <td>0</td>\n",
1670
       "    </tr>\n",
1671
       "    <tr>\n",
1672
       "      <th>ImageOrientationPatient_2</th>\n",
1673
       "      <td>0</td>\n",
1674
       "      <td>0</td>\n",
1675
       "      <td>0</td>\n",
1676
       "      <td>0</td>\n",
1677
       "      <td>0</td>\n",
1678
       "    </tr>\n",
1679
       "    <tr>\n",
1680
       "      <th>ImageOrientationPatient_3</th>\n",
1681
       "      <td>0</td>\n",
1682
       "      <td>0</td>\n",
1683
       "      <td>0</td>\n",
1684
       "      <td>0</td>\n",
1685
       "      <td>0</td>\n",
1686
       "    </tr>\n",
1687
       "    <tr>\n",
1688
       "      <th>ImageOrientationPatient_4</th>\n",
1689
       "      <td>0.939693</td>\n",
1690
       "      <td>0.939693</td>\n",
1691
       "      <td>0.939693</td>\n",
1692
       "      <td>0.939693</td>\n",
1693
       "      <td>0.939693</td>\n",
1694
       "    </tr>\n",
1695
       "    <tr>\n",
1696
       "      <th>ImageOrientationPatient_5</th>\n",
1697
       "      <td>-0.34202</td>\n",
1698
       "      <td>-0.34202</td>\n",
1699
       "      <td>-0.34202</td>\n",
1700
       "      <td>-0.34202</td>\n",
1701
       "      <td>-0.34202</td>\n",
1702
       "    </tr>\n",
1703
       "    <tr>\n",
1704
       "      <th>ImagePositionPatient_0</th>\n",
1705
       "      <td>-167</td>\n",
1706
       "      <td>-167</td>\n",
1707
       "      <td>-167</td>\n",
1708
       "      <td>-167</td>\n",
1709
       "      <td>-167</td>\n",
1710
       "    </tr>\n",
1711
       "    <tr>\n",
1712
       "      <th>ImagePositionPatient_1</th>\n",
1713
       "      <td>17.8026</td>\n",
1714
       "      <td>17.8026</td>\n",
1715
       "      <td>17.8026</td>\n",
1716
       "      <td>17.8026</td>\n",
1717
       "      <td>17.8026</td>\n",
1718
       "    </tr>\n",
1719
       "    <tr>\n",
1720
       "      <th>ImagePositionPatient_2</th>\n",
1721
       "      <td>340.091</td>\n",
1722
       "      <td>302.953</td>\n",
1723
       "      <td>324.153</td>\n",
1724
       "      <td>387.753</td>\n",
1725
       "      <td>265.891</td>\n",
1726
       "    </tr>\n",
1727
       "    <tr>\n",
1728
       "      <th>Modality</th>\n",
1729
       "      <td>CT</td>\n",
1730
       "      <td>CT</td>\n",
1731
       "      <td>CT</td>\n",
1732
       "      <td>CT</td>\n",
1733
       "      <td>CT</td>\n",
1734
       "    </tr>\n",
1735
       "    <tr>\n",
1736
       "      <th>PatientID</th>\n",
1737
       "      <td>ID_f997418a</td>\n",
1738
       "      <td>ID_f997418a</td>\n",
1739
       "      <td>ID_f997418a</td>\n",
1740
       "      <td>ID_f997418a</td>\n",
1741
       "      <td>ID_f997418a</td>\n",
1742
       "    </tr>\n",
1743
       "    <tr>\n",
1744
       "      <th>PhotometricInterpretation</th>\n",
1745
       "      <td>MONOCHROME2</td>\n",
1746
       "      <td>MONOCHROME2</td>\n",
1747
       "      <td>MONOCHROME2</td>\n",
1748
       "      <td>MONOCHROME2</td>\n",
1749
       "      <td>MONOCHROME2</td>\n",
1750
       "    </tr>\n",
1751
       "    <tr>\n",
1752
       "      <th>PixelRepresentation</th>\n",
1753
       "      <td>0</td>\n",
1754
       "      <td>0</td>\n",
1755
       "      <td>0</td>\n",
1756
       "      <td>0</td>\n",
1757
       "      <td>0</td>\n",
1758
       "    </tr>\n",
1759
       "    <tr>\n",
1760
       "      <th>PixelSpacing_0</th>\n",
1761
       "      <td>0.488281</td>\n",
1762
       "      <td>0.488281</td>\n",
1763
       "      <td>0.488281</td>\n",
1764
       "      <td>0.488281</td>\n",
1765
       "      <td>0.488281</td>\n",
1766
       "    </tr>\n",
1767
       "    <tr>\n",
1768
       "      <th>PixelSpacing_1</th>\n",
1769
       "      <td>0.488281</td>\n",
1770
       "      <td>0.488281</td>\n",
1771
       "      <td>0.488281</td>\n",
1772
       "      <td>0.488281</td>\n",
1773
       "      <td>0.488281</td>\n",
1774
       "    </tr>\n",
1775
       "    <tr>\n",
1776
       "      <th>RescaleIntercept</th>\n",
1777
       "      <td>-1024</td>\n",
1778
       "      <td>-1024</td>\n",
1779
       "      <td>-1024</td>\n",
1780
       "      <td>-1024</td>\n",
1781
       "      <td>-1024</td>\n",
1782
       "    </tr>\n",
1783
       "    <tr>\n",
1784
       "      <th>RescaleSlope</th>\n",
1785
       "      <td>1</td>\n",
1786
       "      <td>1</td>\n",
1787
       "      <td>1</td>\n",
1788
       "      <td>1</td>\n",
1789
       "      <td>1</td>\n",
1790
       "    </tr>\n",
1791
       "    <tr>\n",
1792
       "      <th>Rows</th>\n",
1793
       "      <td>512</td>\n",
1794
       "      <td>512</td>\n",
1795
       "      <td>512</td>\n",
1796
       "      <td>512</td>\n",
1797
       "      <td>512</td>\n",
1798
       "    </tr>\n",
1799
       "    <tr>\n",
1800
       "      <th>SOPInstanceUID</th>\n",
1801
       "      <td>ID_1cf8d2973</td>\n",
1802
       "      <td>ID_2775d5917</td>\n",
1803
       "      <td>ID_29fb61fbb</td>\n",
1804
       "      <td>ID_33ba827f2</td>\n",
1805
       "      <td>ID_4c1ea3745</td>\n",
1806
       "    </tr>\n",
1807
       "    <tr>\n",
1808
       "      <th>SamplesPerPixel</th>\n",
1809
       "      <td>1</td>\n",
1810
       "      <td>1</td>\n",
1811
       "      <td>1</td>\n",
1812
       "      <td>1</td>\n",
1813
       "      <td>1</td>\n",
1814
       "    </tr>\n",
1815
       "    <tr>\n",
1816
       "      <th>SeriesInstanceUID</th>\n",
1817
       "      <td>ID_0018be306d</td>\n",
1818
       "      <td>ID_0018be306d</td>\n",
1819
       "      <td>ID_0018be306d</td>\n",
1820
       "      <td>ID_0018be306d</td>\n",
1821
       "      <td>ID_0018be306d</td>\n",
1822
       "    </tr>\n",
1823
       "    <tr>\n",
1824
       "      <th>StudyID</th>\n",
1825
       "      <td>NaN</td>\n",
1826
       "      <td>NaN</td>\n",
1827
       "      <td>NaN</td>\n",
1828
       "      <td>NaN</td>\n",
1829
       "      <td>NaN</td>\n",
1830
       "    </tr>\n",
1831
       "    <tr>\n",
1832
       "      <th>StudyInstanceUID</th>\n",
1833
       "      <td>ID_16aac16e79</td>\n",
1834
       "      <td>ID_16aac16e79</td>\n",
1835
       "      <td>ID_16aac16e79</td>\n",
1836
       "      <td>ID_16aac16e79</td>\n",
1837
       "      <td>ID_16aac16e79</td>\n",
1838
       "    </tr>\n",
1839
       "    <tr>\n",
1840
       "      <th>WindowCenter</th>\n",
1841
       "      <td>NaN</td>\n",
1842
       "      <td>NaN</td>\n",
1843
       "      <td>NaN</td>\n",
1844
       "      <td>NaN</td>\n",
1845
       "      <td>NaN</td>\n",
1846
       "    </tr>\n",
1847
       "    <tr>\n",
1848
       "      <th>WindowCenter_0</th>\n",
1849
       "      <td>40</td>\n",
1850
       "      <td>40</td>\n",
1851
       "      <td>40</td>\n",
1852
       "      <td>40</td>\n",
1853
       "      <td>40</td>\n",
1854
       "    </tr>\n",
1855
       "    <tr>\n",
1856
       "      <th>WindowCenter_1</th>\n",
1857
       "      <td>40</td>\n",
1858
       "      <td>40</td>\n",
1859
       "      <td>40</td>\n",
1860
       "      <td>40</td>\n",
1861
       "      <td>40</td>\n",
1862
       "    </tr>\n",
1863
       "    <tr>\n",
1864
       "      <th>WindowWidth</th>\n",
1865
       "      <td>NaN</td>\n",
1866
       "      <td>NaN</td>\n",
1867
       "      <td>NaN</td>\n",
1868
       "      <td>NaN</td>\n",
1869
       "      <td>NaN</td>\n",
1870
       "    </tr>\n",
1871
       "    <tr>\n",
1872
       "      <th>WindowWidth_0</th>\n",
1873
       "      <td>80</td>\n",
1874
       "      <td>80</td>\n",
1875
       "      <td>80</td>\n",
1876
       "      <td>80</td>\n",
1877
       "      <td>80</td>\n",
1878
       "    </tr>\n",
1879
       "    <tr>\n",
1880
       "      <th>WindowWidth_1</th>\n",
1881
       "      <td>80</td>\n",
1882
       "      <td>80</td>\n",
1883
       "      <td>80</td>\n",
1884
       "      <td>80</td>\n",
1885
       "      <td>80</td>\n",
1886
       "    </tr>\n",
1887
       "    <tr>\n",
1888
       "      <th>normz</th>\n",
1889
       "      <td>37.1382</td>\n",
1890
       "      <td>0</td>\n",
1891
       "      <td>21.2001</td>\n",
1892
       "      <td>84.8</td>\n",
1893
       "      <td>-37.0619</td>\n",
1894
       "    </tr>\n",
1895
       "    <tr>\n",
1896
       "      <th>xcm</th>\n",
1897
       "      <td>255.127</td>\n",
1898
       "      <td>255.127</td>\n",
1899
       "      <td>255.127</td>\n",
1900
       "      <td>255.127</td>\n",
1901
       "      <td>255.127</td>\n",
1902
       "    </tr>\n",
1903
       "    <tr>\n",
1904
       "      <th>ycm</th>\n",
1905
       "      <td>241.378</td>\n",
1906
       "      <td>241.378</td>\n",
1907
       "      <td>241.378</td>\n",
1908
       "      <td>241.378</td>\n",
1909
       "      <td>241.378</td>\n",
1910
       "    </tr>\n",
1911
       "    <tr>\n",
1912
       "      <th>theta</th>\n",
1913
       "      <td>0.132157</td>\n",
1914
       "      <td>0.132157</td>\n",
1915
       "      <td>0.132157</td>\n",
1916
       "      <td>0.132157</td>\n",
1917
       "      <td>0.132157</td>\n",
1918
       "    </tr>\n",
1919
       "    <tr>\n",
1920
       "      <th>pct_tissue</th>\n",
1921
       "      <td>0.191586</td>\n",
1922
       "      <td>0.271954</td>\n",
1923
       "      <td>0.242237</td>\n",
1924
       "      <td>0.00454712</td>\n",
1925
       "      <td>0.143639</td>\n",
1926
       "    </tr>\n",
1927
       "  </tbody>\n",
1928
       "</table>\n",
1929
       "</div>"
1930
      ],
1931
      "text/plain": [
1932
       "                                   13820          18901          20132  \\\n",
1933
       "Unnamed: 0                         13820          18901          20132   \n",
1934
       "BitsAllocated                         16             16             16   \n",
1935
       "BitsStored                            12             12             12   \n",
1936
       "Columns                              512            512            512   \n",
1937
       "HighBit                               11             11             11   \n",
1938
       "ImageOrientationPatient_0              1              1              1   \n",
1939
       "ImageOrientationPatient_1              0              0              0   \n",
1940
       "ImageOrientationPatient_2              0              0              0   \n",
1941
       "ImageOrientationPatient_3              0              0              0   \n",
1942
       "ImageOrientationPatient_4       0.939693       0.939693       0.939693   \n",
1943
       "ImageOrientationPatient_5       -0.34202       -0.34202       -0.34202   \n",
1944
       "ImagePositionPatient_0              -167           -167           -167   \n",
1945
       "ImagePositionPatient_1           17.8026        17.8026        17.8026   \n",
1946
       "ImagePositionPatient_2           340.091        302.953        324.153   \n",
1947
       "Modality                              CT             CT             CT   \n",
1948
       "PatientID                    ID_f997418a    ID_f997418a    ID_f997418a   \n",
1949
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2    MONOCHROME2   \n",
1950
       "PixelRepresentation                    0              0              0   \n",
1951
       "PixelSpacing_0                  0.488281       0.488281       0.488281   \n",
1952
       "PixelSpacing_1                  0.488281       0.488281       0.488281   \n",
1953
       "RescaleIntercept                   -1024          -1024          -1024   \n",
1954
       "RescaleSlope                           1              1              1   \n",
1955
       "Rows                                 512            512            512   \n",
1956
       "SOPInstanceUID              ID_1cf8d2973   ID_2775d5917   ID_29fb61fbb   \n",
1957
       "SamplesPerPixel                        1              1              1   \n",
1958
       "SeriesInstanceUID          ID_0018be306d  ID_0018be306d  ID_0018be306d   \n",
1959
       "StudyID                              NaN            NaN            NaN   \n",
1960
       "StudyInstanceUID           ID_16aac16e79  ID_16aac16e79  ID_16aac16e79   \n",
1961
       "WindowCenter                         NaN            NaN            NaN   \n",
1962
       "WindowCenter_0                        40             40             40   \n",
1963
       "WindowCenter_1                        40             40             40   \n",
1964
       "WindowWidth                          NaN            NaN            NaN   \n",
1965
       "WindowWidth_0                         80             80             80   \n",
1966
       "WindowWidth_1                         80             80             80   \n",
1967
       "normz                            37.1382              0        21.2001   \n",
1968
       "xcm                              255.127        255.127        255.127   \n",
1969
       "ycm                              241.378        241.378        241.378   \n",
1970
       "theta                           0.132157       0.132157       0.132157   \n",
1971
       "pct_tissue                      0.191586       0.271954       0.242237   \n",
1972
       "\n",
1973
       "                                   24764          36224  \n",
1974
       "Unnamed: 0                         24764          36224  \n",
1975
       "BitsAllocated                         16             16  \n",
1976
       "BitsStored                            12             12  \n",
1977
       "Columns                              512            512  \n",
1978
       "HighBit                               11             11  \n",
1979
       "ImageOrientationPatient_0              1              1  \n",
1980
       "ImageOrientationPatient_1              0              0  \n",
1981
       "ImageOrientationPatient_2              0              0  \n",
1982
       "ImageOrientationPatient_3              0              0  \n",
1983
       "ImageOrientationPatient_4       0.939693       0.939693  \n",
1984
       "ImageOrientationPatient_5       -0.34202       -0.34202  \n",
1985
       "ImagePositionPatient_0              -167           -167  \n",
1986
       "ImagePositionPatient_1           17.8026        17.8026  \n",
1987
       "ImagePositionPatient_2           387.753        265.891  \n",
1988
       "Modality                              CT             CT  \n",
1989
       "PatientID                    ID_f997418a    ID_f997418a  \n",
1990
       "PhotometricInterpretation    MONOCHROME2    MONOCHROME2  \n",
1991
       "PixelRepresentation                    0              0  \n",
1992
       "PixelSpacing_0                  0.488281       0.488281  \n",
1993
       "PixelSpacing_1                  0.488281       0.488281  \n",
1994
       "RescaleIntercept                   -1024          -1024  \n",
1995
       "RescaleSlope                           1              1  \n",
1996
       "Rows                                 512            512  \n",
1997
       "SOPInstanceUID              ID_33ba827f2   ID_4c1ea3745  \n",
1998
       "SamplesPerPixel                        1              1  \n",
1999
       "SeriesInstanceUID          ID_0018be306d  ID_0018be306d  \n",
2000
       "StudyID                              NaN            NaN  \n",
2001
       "StudyInstanceUID           ID_16aac16e79  ID_16aac16e79  \n",
2002
       "WindowCenter                         NaN            NaN  \n",
2003
       "WindowCenter_0                        40             40  \n",
2004
       "WindowCenter_1                        40             40  \n",
2005
       "WindowWidth                          NaN            NaN  \n",
2006
       "WindowWidth_0                         80             80  \n",
2007
       "WindowWidth_1                         80             80  \n",
2008
       "normz                               84.8       -37.0619  \n",
2009
       "xcm                              255.127        255.127  \n",
2010
       "ycm                              241.378        241.378  \n",
2011
       "theta                           0.132157       0.132157  \n",
2012
       "pct_tissue                    0.00454712       0.143639  "
2013
      ]
2014
     },
2015
     "execution_count": 20,
2016
     "metadata": {},
2017
     "output_type": "execute_result"
2018
    }
2019
   ],
2020
   "source": [
2021
    "extended_df.head().T"
2022
   ]
2023
  },
2024
  {
2025
   "cell_type": "code",
2026
   "execution_count": 21,
2027
   "metadata": {},
2028
   "outputs": [
2029
    {
2030
     "data": {
2031
      "text/plain": [
2032
       "121232"
2033
      ]
2034
     },
2035
     "execution_count": 21,
2036
     "metadata": {},
2037
     "output_type": "execute_result"
2038
    }
2039
   ],
2040
   "source": [
2041
    "assert len(df) == len(extended_df)\n",
2042
    "len(df)"
2043
   ]
2044
  },
2045
  {
2046
   "cell_type": "code",
2047
   "execution_count": 22,
2048
   "metadata": {},
2049
   "outputs": [],
2050
   "source": [
2051
    "extended_df.to_csv(f'data/{stage}_test_dicom_norm.csv')"
2052
   ]
2053
  }
2054
 ],
2055
 "metadata": {
2056
  "kernelspec": {
2057
   "display_name": "Python 3",
2058
   "language": "python",
2059
   "name": "python3"
2060
  },
2061
  "language_info": {
2062
   "codemirror_mode": {
2063
    "name": "ipython",
2064
    "version": 3
2065
   },
2066
   "file_extension": ".py",
2067
   "mimetype": "text/x-python",
2068
   "name": "python",
2069
   "nbconvert_exporter": "python",
2070
   "pygments_lexer": "ipython3",
2071
   "version": "3.7.3"
2072
  }
2073
 },
2074
 "nbformat": 4,
2075
 "nbformat_minor": 2
2076
}