Diff of /MRNet_EDA.ipynb [000000] .. [dc3c86]

Switch to unified view

a b/MRNet_EDA.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "metadata": {},
7
   "outputs": [],
8
   "source": [
9
    "import numpy as np\n",
10
    "import pandas as pd\n",
11
    "import matplotlib.pyplot as plt\n",
12
    "from pathlib import Path\n",
13
    "from ipywidgets import interact, Dropdown, IntSlider\n",
14
    "\n",
15
    "%matplotlib notebook\n",
16
    "plt.style.use('grayscale')"
17
   ]
18
  },
19
  {
20
   "cell_type": "code",
21
   "execution_count": 2,
22
   "metadata": {},
23
   "outputs": [
24
    {
25
     "name": "stdout",
26
     "output_type": "stream",
27
     "text": [
28
      "\u001b[01;34m..\u001b[00m\r\n",
29
      "├── \u001b[01;34mdata\u001b[00m\r\n",
30
      "│   ├── \u001b[01;34mtrain\u001b[00m\r\n",
31
      "│   │   ├── \u001b[01;34maxial\u001b[00m\r\n",
32
      "│   │   ├── \u001b[01;34mcoronal\u001b[00m\r\n",
33
      "│   │   └── \u001b[01;34msagittal\u001b[00m\r\n",
34
      "│   └── \u001b[01;34mvalid\u001b[00m\r\n",
35
      "│       ├── \u001b[01;34maxial\u001b[00m\r\n",
36
      "│       ├── \u001b[01;34mcoronal\u001b[00m\r\n",
37
      "│       └── \u001b[01;34msagittal\u001b[00m\r\n",
38
      "├── \u001b[01;34mexp\u001b[00m\r\n",
39
      "└── \u001b[01;34mmrnet-fastai\u001b[00m\r\n",
40
      "\r\n",
41
      "11 directories\r\n"
42
     ]
43
    }
44
   ],
45
   "source": [
46
    "! tree -d .."
47
   ]
48
  },
49
  {
50
   "cell_type": "code",
51
   "execution_count": 3,
52
   "metadata": {},
53
   "outputs": [
54
    {
55
     "name": "stdout",
56
     "output_type": "stream",
57
     "text": [
58
      "0000.npy\r\n",
59
      "0001.npy\r\n",
60
      "0002.npy\r\n",
61
      "0003.npy\r\n",
62
      "0004.npy\r\n",
63
      "ls: write error: Broken pipe\r\n"
64
     ]
65
    }
66
   ],
67
   "source": [
68
    "! ls ../data/train/axial | head -n 5"
69
   ]
70
  },
71
  {
72
   "cell_type": "code",
73
   "execution_count": 4,
74
   "metadata": {},
75
   "outputs": [],
76
   "source": [
77
    "data_path = Path('../data')\n",
78
    "train_path = data_path/'train'\n",
79
    "valid_path = data_path/'valid'"
80
   ]
81
  },
82
  {
83
   "cell_type": "code",
84
   "execution_count": 5,
85
   "metadata": {},
86
   "outputs": [
87
    {
88
     "name": "stdout",
89
     "output_type": "stream",
90
     "text": [
91
      "          Case\n",
92
      "Abnormal      \n",
93
      "0          217\n",
94
      "1          913\n"
95
     ]
96
    },
97
    {
98
     "data": {
99
      "text/html": [
100
       "<div>\n",
101
       "<style scoped>\n",
102
       "    .dataframe tbody tr th:only-of-type {\n",
103
       "        vertical-align: middle;\n",
104
       "    }\n",
105
       "\n",
106
       "    .dataframe tbody tr th {\n",
107
       "        vertical-align: top;\n",
108
       "    }\n",
109
       "\n",
110
       "    .dataframe thead th {\n",
111
       "        text-align: right;\n",
112
       "    }\n",
113
       "</style>\n",
114
       "<table border=\"1\" class=\"dataframe\">\n",
115
       "  <thead>\n",
116
       "    <tr style=\"text-align: right;\">\n",
117
       "      <th></th>\n",
118
       "      <th>Case</th>\n",
119
       "      <th>Abnormal</th>\n",
120
       "    </tr>\n",
121
       "  </thead>\n",
122
       "  <tbody>\n",
123
       "    <tr>\n",
124
       "      <th>0</th>\n",
125
       "      <td>0000</td>\n",
126
       "      <td>1</td>\n",
127
       "    </tr>\n",
128
       "    <tr>\n",
129
       "      <th>1</th>\n",
130
       "      <td>0001</td>\n",
131
       "      <td>1</td>\n",
132
       "    </tr>\n",
133
       "    <tr>\n",
134
       "      <th>2</th>\n",
135
       "      <td>0002</td>\n",
136
       "      <td>1</td>\n",
137
       "    </tr>\n",
138
       "    <tr>\n",
139
       "      <th>3</th>\n",
140
       "      <td>0003</td>\n",
141
       "      <td>1</td>\n",
142
       "    </tr>\n",
143
       "    <tr>\n",
144
       "      <th>4</th>\n",
145
       "      <td>0004</td>\n",
146
       "      <td>1</td>\n",
147
       "    </tr>\n",
148
       "  </tbody>\n",
149
       "</table>\n",
150
       "</div>"
151
      ],
152
      "text/plain": [
153
       "   Case  Abnormal\n",
154
       "0  0000         1\n",
155
       "1  0001         1\n",
156
       "2  0002         1\n",
157
       "3  0003         1\n",
158
       "4  0004         1"
159
      ]
160
     },
161
     "execution_count": 5,
162
     "metadata": {},
163
     "output_type": "execute_result"
164
    }
165
   ],
166
   "source": [
167
    "train_abnl = pd.read_csv(data_path/'train-abnormal.csv', header=None,\n",
168
    "                       names=['Case', 'Abnormal'], \n",
169
    "                       dtype={'Case': str, 'Abnormal': np.int64})\n",
170
    "print(train_abnl.groupby('Abnormal').count())\n",
171
    "train_abnl.head()"
172
   ]
173
  },
174
  {
175
   "cell_type": "code",
176
   "execution_count": 6,
177
   "metadata": {},
178
   "outputs": [
179
    {
180
     "name": "stdout",
181
     "output_type": "stream",
182
     "text": [
183
      "          Case\n",
184
      "ACL_tear      \n",
185
      "0          922\n",
186
      "1          208\n"
187
     ]
188
    },
189
    {
190
     "data": {
191
      "text/html": [
192
       "<div>\n",
193
       "<style scoped>\n",
194
       "    .dataframe tbody tr th:only-of-type {\n",
195
       "        vertical-align: middle;\n",
196
       "    }\n",
197
       "\n",
198
       "    .dataframe tbody tr th {\n",
199
       "        vertical-align: top;\n",
200
       "    }\n",
201
       "\n",
202
       "    .dataframe thead th {\n",
203
       "        text-align: right;\n",
204
       "    }\n",
205
       "</style>\n",
206
       "<table border=\"1\" class=\"dataframe\">\n",
207
       "  <thead>\n",
208
       "    <tr style=\"text-align: right;\">\n",
209
       "      <th></th>\n",
210
       "      <th>Case</th>\n",
211
       "      <th>ACL_tear</th>\n",
212
       "    </tr>\n",
213
       "  </thead>\n",
214
       "  <tbody>\n",
215
       "    <tr>\n",
216
       "      <th>0</th>\n",
217
       "      <td>0000</td>\n",
218
       "      <td>0</td>\n",
219
       "    </tr>\n",
220
       "    <tr>\n",
221
       "      <th>1</th>\n",
222
       "      <td>0001</td>\n",
223
       "      <td>1</td>\n",
224
       "    </tr>\n",
225
       "    <tr>\n",
226
       "      <th>2</th>\n",
227
       "      <td>0002</td>\n",
228
       "      <td>0</td>\n",
229
       "    </tr>\n",
230
       "    <tr>\n",
231
       "      <th>3</th>\n",
232
       "      <td>0003</td>\n",
233
       "      <td>0</td>\n",
234
       "    </tr>\n",
235
       "    <tr>\n",
236
       "      <th>4</th>\n",
237
       "      <td>0004</td>\n",
238
       "      <td>0</td>\n",
239
       "    </tr>\n",
240
       "  </tbody>\n",
241
       "</table>\n",
242
       "</div>"
243
      ],
244
      "text/plain": [
245
       "   Case  ACL_tear\n",
246
       "0  0000         0\n",
247
       "1  0001         1\n",
248
       "2  0002         0\n",
249
       "3  0003         0\n",
250
       "4  0004         0"
251
      ]
252
     },
253
     "execution_count": 6,
254
     "metadata": {},
255
     "output_type": "execute_result"
256
    }
257
   ],
258
   "source": [
259
    "train_acl = pd.read_csv(data_path/'train-acl.csv', header=None,\n",
260
    "                       names=['Case', 'ACL_tear'], \n",
261
    "                       dtype={'Case': str, 'ACL_tear': np.int64})\n",
262
    "print(train_acl.groupby('ACL_tear').count())\n",
263
    "train_acl.head()"
264
   ]
265
  },
266
  {
267
   "cell_type": "code",
268
   "execution_count": 7,
269
   "metadata": {},
270
   "outputs": [
271
    {
272
     "name": "stdout",
273
     "output_type": "stream",
274
     "text": [
275
      "               Case\n",
276
      "Meniscus_tear      \n",
277
      "0               733\n",
278
      "1               397\n"
279
     ]
280
    },
281
    {
282
     "data": {
283
      "text/html": [
284
       "<div>\n",
285
       "<style scoped>\n",
286
       "    .dataframe tbody tr th:only-of-type {\n",
287
       "        vertical-align: middle;\n",
288
       "    }\n",
289
       "\n",
290
       "    .dataframe tbody tr th {\n",
291
       "        vertical-align: top;\n",
292
       "    }\n",
293
       "\n",
294
       "    .dataframe thead th {\n",
295
       "        text-align: right;\n",
296
       "    }\n",
297
       "</style>\n",
298
       "<table border=\"1\" class=\"dataframe\">\n",
299
       "  <thead>\n",
300
       "    <tr style=\"text-align: right;\">\n",
301
       "      <th></th>\n",
302
       "      <th>Case</th>\n",
303
       "      <th>Meniscus_tear</th>\n",
304
       "    </tr>\n",
305
       "  </thead>\n",
306
       "  <tbody>\n",
307
       "    <tr>\n",
308
       "      <th>0</th>\n",
309
       "      <td>0000</td>\n",
310
       "      <td>0</td>\n",
311
       "    </tr>\n",
312
       "    <tr>\n",
313
       "      <th>1</th>\n",
314
       "      <td>0001</td>\n",
315
       "      <td>1</td>\n",
316
       "    </tr>\n",
317
       "    <tr>\n",
318
       "      <th>2</th>\n",
319
       "      <td>0002</td>\n",
320
       "      <td>0</td>\n",
321
       "    </tr>\n",
322
       "    <tr>\n",
323
       "      <th>3</th>\n",
324
       "      <td>0003</td>\n",
325
       "      <td>1</td>\n",
326
       "    </tr>\n",
327
       "    <tr>\n",
328
       "      <th>4</th>\n",
329
       "      <td>0004</td>\n",
330
       "      <td>0</td>\n",
331
       "    </tr>\n",
332
       "  </tbody>\n",
333
       "</table>\n",
334
       "</div>"
335
      ],
336
      "text/plain": [
337
       "   Case  Meniscus_tear\n",
338
       "0  0000              0\n",
339
       "1  0001              1\n",
340
       "2  0002              0\n",
341
       "3  0003              1\n",
342
       "4  0004              0"
343
      ]
344
     },
345
     "execution_count": 7,
346
     "metadata": {},
347
     "output_type": "execute_result"
348
    }
349
   ],
350
   "source": [
351
    "train_meniscus = pd.read_csv(data_path/'train-meniscus.csv', header=None,\n",
352
    "                       names=['Case', 'Meniscus_tear'], \n",
353
    "                       dtype={'Case': str, 'Meniscus_tear': np.int64})\n",
354
    "print(train_meniscus.groupby('Meniscus_tear').count())\n",
355
    "train_meniscus.head()"
356
   ]
357
  },
358
  {
359
   "cell_type": "code",
360
   "execution_count": 8,
361
   "metadata": {},
362
   "outputs": [],
363
   "source": [
364
    "def load_one_stack(case, data_path=train_path, plane='coronal'):\n",
365
    "    fpath = data_path/plane/'{}.npy'.format(case)\n",
366
    "    return np.load(fpath)\n",
367
    "\n",
368
    "def load_stacks(case, data_path=train_path):\n",
369
    "    x = {}\n",
370
    "    planes = ['coronal', 'sagittal', 'axial']\n",
371
    "    for i, plane in enumerate(planes):\n",
372
    "        x[plane] = load_one_stack(case, data_path, plane=plane)\n",
373
    "    return x\n",
374
    "\n",
375
    "def load_partial_stacks(case, data_path=train_path, slice_limit=None):\n",
376
    "    x = {}\n",
377
    "    planes = ['coronal', 'sagittal', 'axial']\n",
378
    "    if not slice_limit:\n",
379
    "        return load_stacks(case, data_path)\n",
380
    "    else:\n",
381
    "        for i, plane in enumerate(planes):\n",
382
    "            data = load_one_stack(case, data_path, plane)\n",
383
    "            if slice_limit >= data.shape[0]:\n",
384
    "                x[plane] = data\n",
385
    "            else:\n",
386
    "                mid_slice = data.shape[0] // 2\n",
387
    "                lower = mid_slice - (slice_limit // 2)\n",
388
    "                upper = mid_slice + (slice_limit // 2)\n",
389
    "                x[plane] = data[lower:upper, :, :]\n",
390
    "    return x\n",
391
    "    "
392
   ]
393
  },
394
  {
395
   "cell_type": "code",
396
   "execution_count": 9,
397
   "metadata": {},
398
   "outputs": [
399
    {
400
     "name": "stdout",
401
     "output_type": "stream",
402
     "text": [
403
      "(36, 256, 256)\n",
404
      "255\n"
405
     ]
406
    }
407
   ],
408
   "source": [
409
    "case = train_abnl.Case[0]\n",
410
    "x = load_one_stack(case)\n",
411
    "print(x.shape)\n",
412
    "print(x.max())"
413
   ]
414
  },
415
  {
416
   "cell_type": "code",
417
   "execution_count": 10,
418
   "metadata": {},
419
   "outputs": [
420
    {
421
     "data": {
422
      "text/plain": [
423
       "dict_keys(['coronal', 'sagittal', 'axial'])"
424
      ]
425
     },
426
     "execution_count": 10,
427
     "metadata": {},
428
     "output_type": "execute_result"
429
    }
430
   ],
431
   "source": [
432
    "x = load_stacks(case)\n",
433
    "x.keys()"
434
   ]
435
  },
436
  {
437
   "cell_type": "code",
438
   "execution_count": 11,
439
   "metadata": {},
440
   "outputs": [],
441
   "source": [
442
    "class KneePlot():\n",
443
    "    def __init__(self, x: dict, figsize=(10, 10)):\n",
444
    "        self.x = x\n",
445
    "        self.planes = list(x.keys())\n",
446
    "        self.slice_nums = {plane: self.x[plane].shape[0] for plane in self.planes}\n",
447
    "        self.figsize = figsize\n",
448
    "    \n",
449
    "    def _plot_slices(self, plane, im_slice): \n",
450
    "        fig, ax = plt.subplots(1, 1, figsize=self.figsize)\n",
451
    "        ax.imshow(self.x[plane][im_slice, :, :])\n",
452
    "        plt.show()\n",
453
    "    \n",
454
    "    def draw(self):\n",
455
    "        planes_widget = Dropdown(options=self.planes)\n",
456
    "        plane_init = self.planes[0]\n",
457
    "        slice_init = self.slice_nums[plane_init] - 1\n",
458
    "        slices_widget = IntSlider(min=0, max=slice_init, value=slice_init//2)\n",
459
    "        def update_slices_widget(*args):\n",
460
    "            slices_widget.max = self.slice_nums[planes_widget.value] - 1\n",
461
    "            slices_widget.value = slices_widget.max // 2\n",
462
    "        planes_widget.observe(update_slices_widget, 'value')\n",
463
    "        interact(self._plot_slices, plane=planes_widget, im_slice=slices_widget)\n",
464
    "    \n",
465
    "    def resize(self, figsize): self.figsize = figsize\n"
466
   ]
467
  },
468
  {
469
   "cell_type": "code",
470
   "execution_count": 12,
471
   "metadata": {},
472
   "outputs": [
473
    {
474
     "data": {
475
      "application/vnd.jupyter.widget-view+json": {
476
       "model_id": "78a08edf6b3d4417b695f1b118188a9c",
477
       "version_major": 2,
478
       "version_minor": 0
479
      },
480
      "text/plain": [
481
       "interactive(children=(Dropdown(description='plane', options=('coronal', 'sagittal', 'axial'), value='coronal')…"
482
      ]
483
     },
484
     "metadata": {},
485
     "output_type": "display_data"
486
    }
487
   ],
488
   "source": [
489
    "plot = KneePlot(x)\n",
490
    "plot.draw()"
491
   ]
492
  },
493
  {
494
   "cell_type": "code",
495
   "execution_count": null,
496
   "metadata": {},
497
   "outputs": [],
498
   "source": []
499
  }
500
 ],
501
 "metadata": {
502
  "kernelspec": {
503
   "display_name": "Python 3",
504
   "language": "python",
505
   "name": "python3"
506
  },
507
  "language_info": {
508
   "codemirror_mode": {
509
    "name": "ipython",
510
    "version": 3
511
   },
512
   "file_extension": ".py",
513
   "mimetype": "text/x-python",
514
   "name": "python",
515
   "nbconvert_exporter": "python",
516
   "pygments_lexer": "ipython3",
517
   "version": "3.7.2"
518
  }
519
 },
520
 "nbformat": 4,
521
 "nbformat_minor": 2
522
}