Diff of /MRNet_EDA.ipynb [000000] .. [dc3c86]

Switch to side-by-side view

--- a
+++ b/MRNet_EDA.ipynb
@@ -0,0 +1,522 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "from pathlib import Path\n",
+    "from ipywidgets import interact, Dropdown, IntSlider\n",
+    "\n",
+    "%matplotlib notebook\n",
+    "plt.style.use('grayscale')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[01;34m..\u001b[00m\r\n",
+      "├── \u001b[01;34mdata\u001b[00m\r\n",
+      "│   ├── \u001b[01;34mtrain\u001b[00m\r\n",
+      "│   │   ├── \u001b[01;34maxial\u001b[00m\r\n",
+      "│   │   ├── \u001b[01;34mcoronal\u001b[00m\r\n",
+      "│   │   └── \u001b[01;34msagittal\u001b[00m\r\n",
+      "│   └── \u001b[01;34mvalid\u001b[00m\r\n",
+      "│       ├── \u001b[01;34maxial\u001b[00m\r\n",
+      "│       ├── \u001b[01;34mcoronal\u001b[00m\r\n",
+      "│       └── \u001b[01;34msagittal\u001b[00m\r\n",
+      "├── \u001b[01;34mexp\u001b[00m\r\n",
+      "└── \u001b[01;34mmrnet-fastai\u001b[00m\r\n",
+      "\r\n",
+      "11 directories\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "! tree -d .."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0000.npy\r\n",
+      "0001.npy\r\n",
+      "0002.npy\r\n",
+      "0003.npy\r\n",
+      "0004.npy\r\n",
+      "ls: write error: Broken pipe\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "! ls ../data/train/axial | head -n 5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_path = Path('../data')\n",
+    "train_path = data_path/'train'\n",
+    "valid_path = data_path/'valid'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "          Case\n",
+      "Abnormal      \n",
+      "0          217\n",
+      "1          913\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Case</th>\n",
+       "      <th>Abnormal</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0000</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0001</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0002</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0003</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0004</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Case  Abnormal\n",
+       "0  0000         1\n",
+       "1  0001         1\n",
+       "2  0002         1\n",
+       "3  0003         1\n",
+       "4  0004         1"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_abnl = pd.read_csv(data_path/'train-abnormal.csv', header=None,\n",
+    "                       names=['Case', 'Abnormal'], \n",
+    "                       dtype={'Case': str, 'Abnormal': np.int64})\n",
+    "print(train_abnl.groupby('Abnormal').count())\n",
+    "train_abnl.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "          Case\n",
+      "ACL_tear      \n",
+      "0          922\n",
+      "1          208\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Case</th>\n",
+       "      <th>ACL_tear</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0001</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0002</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0003</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0004</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Case  ACL_tear\n",
+       "0  0000         0\n",
+       "1  0001         1\n",
+       "2  0002         0\n",
+       "3  0003         0\n",
+       "4  0004         0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_acl = pd.read_csv(data_path/'train-acl.csv', header=None,\n",
+    "                       names=['Case', 'ACL_tear'], \n",
+    "                       dtype={'Case': str, 'ACL_tear': np.int64})\n",
+    "print(train_acl.groupby('ACL_tear').count())\n",
+    "train_acl.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "               Case\n",
+      "Meniscus_tear      \n",
+      "0               733\n",
+      "1               397\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Case</th>\n",
+       "      <th>Meniscus_tear</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0001</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0002</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0003</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0004</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Case  Meniscus_tear\n",
+       "0  0000              0\n",
+       "1  0001              1\n",
+       "2  0002              0\n",
+       "3  0003              1\n",
+       "4  0004              0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_meniscus = pd.read_csv(data_path/'train-meniscus.csv', header=None,\n",
+    "                       names=['Case', 'Meniscus_tear'], \n",
+    "                       dtype={'Case': str, 'Meniscus_tear': np.int64})\n",
+    "print(train_meniscus.groupby('Meniscus_tear').count())\n",
+    "train_meniscus.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_one_stack(case, data_path=train_path, plane='coronal'):\n",
+    "    fpath = data_path/plane/'{}.npy'.format(case)\n",
+    "    return np.load(fpath)\n",
+    "\n",
+    "def load_stacks(case, data_path=train_path):\n",
+    "    x = {}\n",
+    "    planes = ['coronal', 'sagittal', 'axial']\n",
+    "    for i, plane in enumerate(planes):\n",
+    "        x[plane] = load_one_stack(case, data_path, plane=plane)\n",
+    "    return x\n",
+    "\n",
+    "def load_partial_stacks(case, data_path=train_path, slice_limit=None):\n",
+    "    x = {}\n",
+    "    planes = ['coronal', 'sagittal', 'axial']\n",
+    "    if not slice_limit:\n",
+    "        return load_stacks(case, data_path)\n",
+    "    else:\n",
+    "        for i, plane in enumerate(planes):\n",
+    "            data = load_one_stack(case, data_path, plane)\n",
+    "            if slice_limit >= data.shape[0]:\n",
+    "                x[plane] = data\n",
+    "            else:\n",
+    "                mid_slice = data.shape[0] // 2\n",
+    "                lower = mid_slice - (slice_limit // 2)\n",
+    "                upper = mid_slice + (slice_limit // 2)\n",
+    "                x[plane] = data[lower:upper, :, :]\n",
+    "    return x\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(36, 256, 256)\n",
+      "255\n"
+     ]
+    }
+   ],
+   "source": [
+    "case = train_abnl.Case[0]\n",
+    "x = load_one_stack(case)\n",
+    "print(x.shape)\n",
+    "print(x.max())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['coronal', 'sagittal', 'axial'])"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x = load_stacks(case)\n",
+    "x.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class KneePlot():\n",
+    "    def __init__(self, x: dict, figsize=(10, 10)):\n",
+    "        self.x = x\n",
+    "        self.planes = list(x.keys())\n",
+    "        self.slice_nums = {plane: self.x[plane].shape[0] for plane in self.planes}\n",
+    "        self.figsize = figsize\n",
+    "    \n",
+    "    def _plot_slices(self, plane, im_slice): \n",
+    "        fig, ax = plt.subplots(1, 1, figsize=self.figsize)\n",
+    "        ax.imshow(self.x[plane][im_slice, :, :])\n",
+    "        plt.show()\n",
+    "    \n",
+    "    def draw(self):\n",
+    "        planes_widget = Dropdown(options=self.planes)\n",
+    "        plane_init = self.planes[0]\n",
+    "        slice_init = self.slice_nums[plane_init] - 1\n",
+    "        slices_widget = IntSlider(min=0, max=slice_init, value=slice_init//2)\n",
+    "        def update_slices_widget(*args):\n",
+    "            slices_widget.max = self.slice_nums[planes_widget.value] - 1\n",
+    "            slices_widget.value = slices_widget.max // 2\n",
+    "        planes_widget.observe(update_slices_widget, 'value')\n",
+    "        interact(self._plot_slices, plane=planes_widget, im_slice=slices_widget)\n",
+    "    \n",
+    "    def resize(self, figsize): self.figsize = figsize\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "78a08edf6b3d4417b695f1b118188a9c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "interactive(children=(Dropdown(description='plane', options=('coronal', 'sagittal', 'axial'), value='coronal')…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "plot = KneePlot(x)\n",
+    "plot.draw()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}