a b/Serialized/Prepare.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [
8
    {
9
     "name": "stderr",
10
     "output_type": "stream",
11
     "text": [
12
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
13
      "  return f(*args, **kwds)\n",
14
      "/home/reina/anaconda3/envs/RSNA/lib/python3.6/importlib/_bootstrap.py:219: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__\n",
15
      "  return f(*args, **kwds)\n"
16
     ]
17
    }
18
   ],
19
   "source": [
20
    "from __future__ import absolute_import\n",
21
    "from __future__ import division\n",
22
    "from __future__ import print_function\n",
23
    "\n",
24
    "import glob, pylab, pandas as pd\n",
25
    "import pydicom, numpy as np\n",
26
    "from os import listdir\n",
27
    "from os.path import isfile, join\n",
28
    "import matplotlib.pylab as plt\n",
29
    "import os\n",
30
    "import seaborn as sns\n",
31
    "import warnings\n",
32
    "warnings.filterwarnings(action='once')\n",
33
    "import pickle\n",
34
    "from tqdm import tqdm, tqdm_notebook\n",
35
    "%load_ext autoreload\n",
36
    "%autoreload 2\n",
37
    "%matplotlib notebook\n",
38
    "from helper import *\n",
39
    "import time\n",
40
    "from IPython.core.interactiveshell import InteractiveShell\n",
41
    "InteractiveShell.ast_node_interactivity = \"all\"\n",
42
    "from defenitions import *"
43
   ]
44
  },
45
  {
46
   "cell_type": "code",
47
   "execution_count": 65,
48
   "metadata": {},
49
   "outputs": [],
50
   "source": [
51
    "train_base_df = pd.read_csv(train_images_dir)"
52
   ]
53
  },
54
  {
55
   "cell_type": "code",
56
   "execution_count": 67,
57
   "metadata": {},
58
   "outputs": [
59
    {
60
     "data": {
61
      "text/plain": [
62
       "4045572"
63
      ]
64
     },
65
     "execution_count": 67,
66
     "metadata": {},
67
     "output_type": "execute_result"
68
    }
69
   ],
70
   "source": [
71
    "train_base_df.shape[0]"
72
   ]
73
  },
74
  {
75
   "cell_type": "code",
76
   "execution_count": 4,
77
   "metadata": {},
78
   "outputs": [
79
    {
80
     "data": {
81
      "text/html": [
82
       "<div>\n",
83
       "<style scoped>\n",
84
       "    .dataframe tbody tr th:only-of-type {\n",
85
       "        vertical-align: middle;\n",
86
       "    }\n",
87
       "\n",
88
       "    .dataframe tbody tr th {\n",
89
       "        vertical-align: top;\n",
90
       "    }\n",
91
       "\n",
92
       "    .dataframe thead th {\n",
93
       "        text-align: right;\n",
94
       "    }\n",
95
       "</style>\n",
96
       "<table border=\"1\" class=\"dataframe\">\n",
97
       "  <thead>\n",
98
       "    <tr style=\"text-align: right;\">\n",
99
       "      <th></th>\n",
100
       "      <th>ID</th>\n",
101
       "      <th>Label</th>\n",
102
       "      <th>Sub_type</th>\n",
103
       "      <th>PatientID</th>\n",
104
       "    </tr>\n",
105
       "  </thead>\n",
106
       "  <tbody>\n",
107
       "    <tr>\n",
108
       "      <th>0</th>\n",
109
       "      <td>ID_28fbab7eb_epidural</td>\n",
110
       "      <td>0.5</td>\n",
111
       "      <td>epidural</td>\n",
112
       "      <td>28fbab7eb</td>\n",
113
       "    </tr>\n",
114
       "    <tr>\n",
115
       "      <th>1</th>\n",
116
       "      <td>ID_28fbab7eb_intraparenchymal</td>\n",
117
       "      <td>0.5</td>\n",
118
       "      <td>intraparenchymal</td>\n",
119
       "      <td>28fbab7eb</td>\n",
120
       "    </tr>\n",
121
       "    <tr>\n",
122
       "      <th>2</th>\n",
123
       "      <td>ID_28fbab7eb_intraventricular</td>\n",
124
       "      <td>0.5</td>\n",
125
       "      <td>intraventricular</td>\n",
126
       "      <td>28fbab7eb</td>\n",
127
       "    </tr>\n",
128
       "    <tr>\n",
129
       "      <th>3</th>\n",
130
       "      <td>ID_28fbab7eb_subarachnoid</td>\n",
131
       "      <td>0.5</td>\n",
132
       "      <td>subarachnoid</td>\n",
133
       "      <td>28fbab7eb</td>\n",
134
       "    </tr>\n",
135
       "    <tr>\n",
136
       "      <th>4</th>\n",
137
       "      <td>ID_28fbab7eb_subdural</td>\n",
138
       "      <td>0.5</td>\n",
139
       "      <td>subdural</td>\n",
140
       "      <td>28fbab7eb</td>\n",
141
       "    </tr>\n",
142
       "  </tbody>\n",
143
       "</table>\n",
144
       "</div>"
145
      ],
146
      "text/plain": [
147
       "                              ID  Label          Sub_type  PatientID\n",
148
       "0          ID_28fbab7eb_epidural    0.5          epidural  28fbab7eb\n",
149
       "1  ID_28fbab7eb_intraparenchymal    0.5  intraparenchymal  28fbab7eb\n",
150
       "2  ID_28fbab7eb_intraventricular    0.5  intraventricular  28fbab7eb\n",
151
       "3      ID_28fbab7eb_subarachnoid    0.5      subarachnoid  28fbab7eb\n",
152
       "4          ID_28fbab7eb_subdural    0.5          subdural  28fbab7eb"
153
      ]
154
     },
155
     "execution_count": 4,
156
     "metadata": {},
157
     "output_type": "execute_result"
158
    }
159
   ],
160
   "source": [
161
    "train_base_df['Sub_type'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n",
162
    "train_base_df['PatientID'] = train_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n",
163
    "train_base_df.head()"
164
   ]
165
  },
166
  {
167
   "cell_type": "code",
168
   "execution_count": 26,
169
   "metadata": {},
170
   "outputs": [
171
    {
172
     "data": {
173
      "text/plain": [
174
       "array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n",
175
       "       'subdural', 'any'], dtype=object)"
176
      ]
177
     },
178
     "execution_count": 26,
179
     "metadata": {},
180
     "output_type": "execute_result"
181
    }
182
   ],
183
   "source": [
184
    "sub_types=train_base_df.Sub_type.unique()\n",
185
    "sub_types"
186
   ]
187
  },
188
  {
189
   "cell_type": "code",
190
   "execution_count": 27,
191
   "metadata": {},
192
   "outputs": [
193
    {
194
     "data": {
195
      "application/vnd.jupyter.widget-view+json": {
196
       "model_id": "ae4afd5b0d824567b34dd59b1b81e430",
197
       "version_major": 2,
198
       "version_minor": 0
199
      },
200
      "text/plain": [
201
       "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
202
      ]
203
     },
204
     "metadata": {},
205
     "output_type": "display_data"
206
    },
207
    {
208
     "name": "stdout",
209
     "output_type": "stream",
210
     "text": [
211
      "\n"
212
     ]
213
    }
214
   ],
215
   "source": [
216
    "dfs =[]\n",
217
    "for sub_type in tqdm_notebook(sub_types):\n",
218
    "    df = train_base_df[train_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n",
219
    "    df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n",
220
    "    dfs.append(df)\n",
221
    "train_df=dfs[0]\n",
222
    "for df in tqdm_notebook(dfs[1:]):\n",
223
    "    train_df=test_df.merge(df,on='PatientID')"
224
   ]
225
  },
226
  {
227
   "cell_type": "code",
228
   "execution_count": 11,
229
   "metadata": {},
230
   "outputs": [
231
    {
232
     "data": {
233
      "application/vnd.jupyter.widget-view+json": {
234
       "model_id": "5fe21e059b0f4fff9decf2a356119816",
235
       "version_major": 2,
236
       "version_minor": 0
237
      },
238
      "text/plain": [
239
       "HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))"
240
      ]
241
     },
242
     "metadata": {},
243
     "output_type": "display_data"
244
    },
245
    {
246
     "name": "stdout",
247
     "output_type": "stream",
248
     "text": [
249
      "\n"
250
     ]
251
    }
252
   ],
253
   "source": [
254
    "PID = np.zeros(train_df.shape[0],dtype=object)\n",
255
    "StudyI = np.zeros(train_df.shape[0],dtype=object)\n",
256
    "SeriesI = np.zeros(train_df.shape[0],dtype=object)\n",
257
    "WindowCenter = np.zeros(train_df.shape[0],dtype=object)\n",
258
    "WindowWidth = np.zeros(train_df.shape[0],dtype=object)\n",
259
    "ImagePositionX = np.zeros(train_df.shape[0],dtype=np.float)\n",
260
    "ImagePositionY = np.zeros(train_df.shape[0],dtype=np.float)\n",
261
    "ImagePositionZ = np.zeros(train_df.shape[0],dtype=np.float)\n",
262
    "\n",
263
    "for i,row in tqdm_notebook(train_df.iterrows(),total=train_df.shape[0]):\n",
264
    "    ds = pydicom.dcmread(train_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n",
265
    "    SeriesI[i]=ds.SeriesInstanceUID\n",
266
    "    PID[i]=ds.PatientID\n",
267
    "    StudyI[i]=ds.StudyInstanceUID \n",
268
    "    WindowCenter[i]=ds.WindowCenter\n",
269
    "    WindowWidth[i]=ds.WindowWidth\n",
270
    "    ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n",
271
    "    ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n",
272
    "    ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n",
273
    "train_df['SeriesI']=SeriesI\n",
274
    "train_df['PID']=PID\n",
275
    "train_df['StudyI']=StudyI\n",
276
    "train_df['WindowCenter']=WindowCenter\n",
277
    "train_df['WindowWidth']=WindowWidth\n",
278
    "train_df['ImagePositionZ']=ImagePositionZ\n",
279
    "train_df['ImagePositionX']=ImagePositionX\n",
280
    "train_df['ImagePositionY']=ImagePositionY\n"
281
   ]
282
  },
283
  {
284
   "cell_type": "code",
285
   "execution_count": null,
286
   "metadata": {},
287
   "outputs": [],
288
   "source": [
289
    "train_df.to_csv(data_dir+'train.csv',index=False)"
290
   ]
291
  },
292
  {
293
   "cell_type": "code",
294
   "execution_count": 3,
295
   "metadata": {},
296
   "outputs": [
297
    {
298
     "data": {
299
      "text/html": [
300
       "<div>\n",
301
       "<style scoped>\n",
302
       "    .dataframe tbody tr th:only-of-type {\n",
303
       "        vertical-align: middle;\n",
304
       "    }\n",
305
       "\n",
306
       "    .dataframe tbody tr th {\n",
307
       "        vertical-align: top;\n",
308
       "    }\n",
309
       "\n",
310
       "    .dataframe thead th {\n",
311
       "        text-align: right;\n",
312
       "    }\n",
313
       "</style>\n",
314
       "<table border=\"1\" class=\"dataframe\">\n",
315
       "  <thead>\n",
316
       "    <tr style=\"text-align: right;\">\n",
317
       "      <th></th>\n",
318
       "      <th>ID</th>\n",
319
       "      <th>Label</th>\n",
320
       "    </tr>\n",
321
       "  </thead>\n",
322
       "  <tbody>\n",
323
       "    <tr>\n",
324
       "      <th>0</th>\n",
325
       "      <td>ID_28fbab7eb_epidural</td>\n",
326
       "      <td>0.5</td>\n",
327
       "    </tr>\n",
328
       "    <tr>\n",
329
       "      <th>1</th>\n",
330
       "      <td>ID_28fbab7eb_intraparenchymal</td>\n",
331
       "      <td>0.5</td>\n",
332
       "    </tr>\n",
333
       "    <tr>\n",
334
       "      <th>2</th>\n",
335
       "      <td>ID_28fbab7eb_intraventricular</td>\n",
336
       "      <td>0.5</td>\n",
337
       "    </tr>\n",
338
       "    <tr>\n",
339
       "      <th>3</th>\n",
340
       "      <td>ID_28fbab7eb_subarachnoid</td>\n",
341
       "      <td>0.5</td>\n",
342
       "    </tr>\n",
343
       "    <tr>\n",
344
       "      <th>4</th>\n",
345
       "      <td>ID_28fbab7eb_subdural</td>\n",
346
       "      <td>0.5</td>\n",
347
       "    </tr>\n",
348
       "  </tbody>\n",
349
       "</table>\n",
350
       "</div>"
351
      ],
352
      "text/plain": [
353
       "                              ID  Label\n",
354
       "0          ID_28fbab7eb_epidural    0.5\n",
355
       "1  ID_28fbab7eb_intraparenchymal    0.5\n",
356
       "2  ID_28fbab7eb_intraventricular    0.5\n",
357
       "3      ID_28fbab7eb_subarachnoid    0.5\n",
358
       "4          ID_28fbab7eb_subdural    0.5"
359
      ]
360
     },
361
     "execution_count": 3,
362
     "metadata": {},
363
     "output_type": "execute_result"
364
    }
365
   ],
366
   "source": [
367
    "sample_submission=pd.read_csv(data_dir+'stage_1_sample_submission.csv')\n",
368
    "sample_submission.head()"
369
   ]
370
  },
371
  {
372
   "cell_type": "code",
373
   "execution_count": 4,
374
   "metadata": {},
375
   "outputs": [
376
    {
377
     "data": {
378
      "text/html": [
379
       "<div>\n",
380
       "<style scoped>\n",
381
       "    .dataframe tbody tr th:only-of-type {\n",
382
       "        vertical-align: middle;\n",
383
       "    }\n",
384
       "\n",
385
       "    .dataframe tbody tr th {\n",
386
       "        vertical-align: top;\n",
387
       "    }\n",
388
       "\n",
389
       "    .dataframe thead th {\n",
390
       "        text-align: right;\n",
391
       "    }\n",
392
       "</style>\n",
393
       "<table border=\"1\" class=\"dataframe\">\n",
394
       "  <thead>\n",
395
       "    <tr style=\"text-align: right;\">\n",
396
       "      <th></th>\n",
397
       "      <th>ID</th>\n",
398
       "      <th>Label</th>\n",
399
       "      <th>Sub_type</th>\n",
400
       "      <th>PatientID</th>\n",
401
       "    </tr>\n",
402
       "  </thead>\n",
403
       "  <tbody>\n",
404
       "    <tr>\n",
405
       "      <th>0</th>\n",
406
       "      <td>ID_28fbab7eb_epidural</td>\n",
407
       "      <td>0.5</td>\n",
408
       "      <td>epidural</td>\n",
409
       "      <td>28fbab7eb</td>\n",
410
       "    </tr>\n",
411
       "    <tr>\n",
412
       "      <th>1</th>\n",
413
       "      <td>ID_28fbab7eb_intraparenchymal</td>\n",
414
       "      <td>0.5</td>\n",
415
       "      <td>intraparenchymal</td>\n",
416
       "      <td>28fbab7eb</td>\n",
417
       "    </tr>\n",
418
       "    <tr>\n",
419
       "      <th>2</th>\n",
420
       "      <td>ID_28fbab7eb_intraventricular</td>\n",
421
       "      <td>0.5</td>\n",
422
       "      <td>intraventricular</td>\n",
423
       "      <td>28fbab7eb</td>\n",
424
       "    </tr>\n",
425
       "    <tr>\n",
426
       "      <th>3</th>\n",
427
       "      <td>ID_28fbab7eb_subarachnoid</td>\n",
428
       "      <td>0.5</td>\n",
429
       "      <td>subarachnoid</td>\n",
430
       "      <td>28fbab7eb</td>\n",
431
       "    </tr>\n",
432
       "    <tr>\n",
433
       "      <th>4</th>\n",
434
       "      <td>ID_28fbab7eb_subdural</td>\n",
435
       "      <td>0.5</td>\n",
436
       "      <td>subdural</td>\n",
437
       "      <td>28fbab7eb</td>\n",
438
       "    </tr>\n",
439
       "  </tbody>\n",
440
       "</table>\n",
441
       "</div>"
442
      ],
443
      "text/plain": [
444
       "                              ID  Label          Sub_type  PatientID\n",
445
       "0          ID_28fbab7eb_epidural    0.5          epidural  28fbab7eb\n",
446
       "1  ID_28fbab7eb_intraparenchymal    0.5  intraparenchymal  28fbab7eb\n",
447
       "2  ID_28fbab7eb_intraventricular    0.5  intraventricular  28fbab7eb\n",
448
       "3      ID_28fbab7eb_subarachnoid    0.5      subarachnoid  28fbab7eb\n",
449
       "4          ID_28fbab7eb_subdural    0.5          subdural  28fbab7eb"
450
      ]
451
     },
452
     "execution_count": 4,
453
     "metadata": {},
454
     "output_type": "execute_result"
455
    }
456
   ],
457
   "source": [
458
    "test_base_df=sample_submission.copy()\n",
459
    "test_base_df['Sub_type'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[2]\n",
460
    "test_base_df['PatientID'] = test_base_df['ID'].str.split(\"_\", n = 3, expand = True)[1]\n",
461
    "test_base_df.head()"
462
   ]
463
  },
464
  {
465
   "cell_type": "code",
466
   "execution_count": 234,
467
   "metadata": {},
468
   "outputs": [],
469
   "source": [
470
    "test_ids=test_df.PatientID.unique()"
471
   ]
472
  },
473
  {
474
   "cell_type": "code",
475
   "execution_count": 26,
476
   "metadata": {},
477
   "outputs": [
478
    {
479
     "data": {
480
      "text/plain": [
481
       "array(['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid',\n",
482
       "       'subdural', 'any'], dtype=object)"
483
      ]
484
     },
485
     "execution_count": 26,
486
     "metadata": {},
487
     "output_type": "execute_result"
488
    }
489
   ],
490
   "source": [
491
    "sub_types=test_base_df.Sub_type.unique()\n",
492
    "sub_types"
493
   ]
494
  },
495
  {
496
   "cell_type": "code",
497
   "execution_count": 7,
498
   "metadata": {},
499
   "outputs": [
500
    {
501
     "data": {
502
      "application/vnd.jupyter.widget-view+json": {
503
       "model_id": "f153620a2b61441bb7cb7f1acf392844",
504
       "version_major": 2,
505
       "version_minor": 0
506
      },
507
      "text/plain": [
508
       "HBox(children=(IntProgress(value=0, max=6), HTML(value='')))"
509
      ]
510
     },
511
     "metadata": {},
512
     "output_type": "display_data"
513
    },
514
    {
515
     "name": "stdout",
516
     "output_type": "stream",
517
     "text": [
518
      "\n"
519
     ]
520
    },
521
    {
522
     "data": {
523
      "application/vnd.jupyter.widget-view+json": {
524
       "model_id": "189e8ac701514c2c963161608d43a459",
525
       "version_major": 2,
526
       "version_minor": 0
527
      },
528
      "text/plain": [
529
       "HBox(children=(IntProgress(value=0, max=5), HTML(value='')))"
530
      ]
531
     },
532
     "metadata": {},
533
     "output_type": "display_data"
534
    },
535
    {
536
     "name": "stdout",
537
     "output_type": "stream",
538
     "text": [
539
      "\n"
540
     ]
541
    }
542
   ],
543
   "source": [
544
    "dfs =[]\n",
545
    "for sub_type in tqdm_notebook(sub_types):\n",
546
    "    df = test_base_df[test_base_df.Sub_type==sub_type][['PatientID','Label']].copy()\n",
547
    "    df=df.rename(columns={\"Label\": sub_type}).reset_index(drop=True)\n",
548
    "    dfs.append(df)\n",
549
    "test_df=dfs[0]\n",
550
    "for df in tqdm_notebook(dfs[1:]):\n",
551
    "    test_df=test_df.merge(df,on='PatientID')"
552
   ]
553
  },
554
  {
555
   "cell_type": "code",
556
   "execution_count": 8,
557
   "metadata": {},
558
   "outputs": [
559
    {
560
     "data": {
561
      "text/html": [
562
       "<div>\n",
563
       "<style scoped>\n",
564
       "    .dataframe tbody tr th:only-of-type {\n",
565
       "        vertical-align: middle;\n",
566
       "    }\n",
567
       "\n",
568
       "    .dataframe tbody tr th {\n",
569
       "        vertical-align: top;\n",
570
       "    }\n",
571
       "\n",
572
       "    .dataframe thead th {\n",
573
       "        text-align: right;\n",
574
       "    }\n",
575
       "</style>\n",
576
       "<table border=\"1\" class=\"dataframe\">\n",
577
       "  <thead>\n",
578
       "    <tr style=\"text-align: right;\">\n",
579
       "      <th></th>\n",
580
       "      <th>PatientID</th>\n",
581
       "      <th>epidural</th>\n",
582
       "      <th>intraparenchymal</th>\n",
583
       "      <th>intraventricular</th>\n",
584
       "      <th>subarachnoid</th>\n",
585
       "      <th>subdural</th>\n",
586
       "      <th>any</th>\n",
587
       "    </tr>\n",
588
       "  </thead>\n",
589
       "  <tbody>\n",
590
       "    <tr>\n",
591
       "      <th>0</th>\n",
592
       "      <td>28fbab7eb</td>\n",
593
       "      <td>0.5</td>\n",
594
       "      <td>0.5</td>\n",
595
       "      <td>0.5</td>\n",
596
       "      <td>0.5</td>\n",
597
       "      <td>0.5</td>\n",
598
       "      <td>0.5</td>\n",
599
       "    </tr>\n",
600
       "    <tr>\n",
601
       "      <th>1</th>\n",
602
       "      <td>877923b8b</td>\n",
603
       "      <td>0.5</td>\n",
604
       "      <td>0.5</td>\n",
605
       "      <td>0.5</td>\n",
606
       "      <td>0.5</td>\n",
607
       "      <td>0.5</td>\n",
608
       "      <td>0.5</td>\n",
609
       "    </tr>\n",
610
       "    <tr>\n",
611
       "      <th>2</th>\n",
612
       "      <td>a591477cb</td>\n",
613
       "      <td>0.5</td>\n",
614
       "      <td>0.5</td>\n",
615
       "      <td>0.5</td>\n",
616
       "      <td>0.5</td>\n",
617
       "      <td>0.5</td>\n",
618
       "      <td>0.5</td>\n",
619
       "    </tr>\n",
620
       "    <tr>\n",
621
       "      <th>3</th>\n",
622
       "      <td>42217c898</td>\n",
623
       "      <td>0.5</td>\n",
624
       "      <td>0.5</td>\n",
625
       "      <td>0.5</td>\n",
626
       "      <td>0.5</td>\n",
627
       "      <td>0.5</td>\n",
628
       "      <td>0.5</td>\n",
629
       "    </tr>\n",
630
       "    <tr>\n",
631
       "      <th>4</th>\n",
632
       "      <td>a130c4d2f</td>\n",
633
       "      <td>0.5</td>\n",
634
       "      <td>0.5</td>\n",
635
       "      <td>0.5</td>\n",
636
       "      <td>0.5</td>\n",
637
       "      <td>0.5</td>\n",
638
       "      <td>0.5</td>\n",
639
       "    </tr>\n",
640
       "  </tbody>\n",
641
       "</table>\n",
642
       "</div>"
643
      ],
644
      "text/plain": [
645
       "   PatientID  epidural  intraparenchymal  intraventricular  subarachnoid  \\\n",
646
       "0  28fbab7eb       0.5               0.5               0.5           0.5   \n",
647
       "1  877923b8b       0.5               0.5               0.5           0.5   \n",
648
       "2  a591477cb       0.5               0.5               0.5           0.5   \n",
649
       "3  42217c898       0.5               0.5               0.5           0.5   \n",
650
       "4  a130c4d2f       0.5               0.5               0.5           0.5   \n",
651
       "\n",
652
       "   subdural  any  \n",
653
       "0       0.5  0.5  \n",
654
       "1       0.5  0.5  \n",
655
       "2       0.5  0.5  \n",
656
       "3       0.5  0.5  \n",
657
       "4       0.5  0.5  "
658
      ]
659
     },
660
     "execution_count": 8,
661
     "metadata": {},
662
     "output_type": "execute_result"
663
    }
664
   ],
665
   "source": [
666
    "test_df.head()"
667
   ]
668
  },
669
  {
670
   "cell_type": "code",
671
   "execution_count": 11,
672
   "metadata": {},
673
   "outputs": [
674
    {
675
     "data": {
676
      "application/vnd.jupyter.widget-view+json": {
677
       "model_id": "5fe21e059b0f4fff9decf2a356119816",
678
       "version_major": 2,
679
       "version_minor": 0
680
      },
681
      "text/plain": [
682
       "HBox(children=(IntProgress(value=0, max=78545), HTML(value='')))"
683
      ]
684
     },
685
     "metadata": {},
686
     "output_type": "display_data"
687
    },
688
    {
689
     "name": "stdout",
690
     "output_type": "stream",
691
     "text": [
692
      "\n"
693
     ]
694
    }
695
   ],
696
   "source": [
697
    "PID = np.zeros(test_df.shape[0],dtype=object)\n",
698
    "StudyI = np.zeros(test_df.shape[0],dtype=object)\n",
699
    "SeriesI = np.zeros(test_df.shape[0],dtype=object)\n",
700
    "WindowCenter = np.zeros(test_df.shape[0],dtype=object)\n",
701
    "WindowWidth = np.zeros(test_df.shape[0],dtype=object)\n",
702
    "ImagePositionX = np.zeros(test_df.shape[0],dtype=np.float)\n",
703
    "ImagePositionY = np.zeros(test_df.shape[0],dtype=np.float)\n",
704
    "ImagePositionZ = np.zeros(test_df.shape[0],dtype=np.float)\n",
705
    "\n",
706
    "for i,row in tqdm_notebook(test_df.iterrows(),total=test_df.shape[0]):\n",
707
    "    ds = pydicom.dcmread(test_images_dir + 'ID_{}.dcm'.format(row['PatientID']))\n",
708
    "    SeriesI[i]=ds.SeriesInstanceUID\n",
709
    "    PID[i]=ds.PatientID\n",
710
    "    StudyI[i]=ds.StudyInstanceUID \n",
711
    "    WindowCenter[i]=ds.WindowCenter\n",
712
    "    WindowWidth[i]=ds.WindowWidth\n",
713
    "    ImagePositionX[i]=float(ds.ImagePositionPatient[0])\n",
714
    "    ImagePositionY[i]=float(ds.ImagePositionPatient[1])\n",
715
    "    ImagePositionZ[i]=float(ds.ImagePositionPatient[2])\n",
716
    "test_df['SeriesI']=SeriesI\n",
717
    "test_df['PID']=PID\n",
718
    "test_df['StudyI']=StudyI\n",
719
    "test_df['WindowCenter']=WindowCenter\n",
720
    "test_df['WindowWidth']=WindowWidth\n",
721
    "test_df['ImagePositionZ']=ImagePositionZ\n",
722
    "test_df['ImagePositionX']=ImagePositionX\n",
723
    "test_df['ImagePositionY']=ImagePositionY\n"
724
   ]
725
  },
726
  {
727
   "cell_type": "code",
728
   "execution_count": 23,
729
   "metadata": {},
730
   "outputs": [],
731
   "source": [
732
    "test_df.to_csv(data_dir+'test.csv',index=False)"
733
   ]
734
  },
735
  {
736
   "cell_type": "code",
737
   "execution_count": null,
738
   "metadata": {},
739
   "outputs": [],
740
   "source": []
741
  }
742
 ],
743
 "metadata": {
744
  "kernelspec": {
745
   "display_name": "Python 3",
746
   "language": "python",
747
   "name": "python3"
748
  },
749
  "language_info": {
750
   "codemirror_mode": {
751
    "name": "ipython",
752
    "version": 3
753
   },
754
   "file_extension": ".py",
755
   "mimetype": "text/x-python",
756
   "name": "python",
757
   "nbconvert_exporter": "python",
758
   "pygments_lexer": "ipython3",
759
   "version": "3.6.6"
760
  }
761
 },
762
 "nbformat": 4,
763
 "nbformat_minor": 2
764
}