[dc3c86]: / MRNet_EDA.ipynb

Download this file

523 lines (522 with data), 13.8 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "from pathlib import Path\n",
    "from ipywidgets import interact, Dropdown, IntSlider\n",
    "\n",
    "%matplotlib notebook\n",
    "plt.style.use('grayscale')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[01;34m..\u001b[00m\r\n",
      "├── \u001b[01;34mdata\u001b[00m\r\n",
      "│   ├── \u001b[01;34mtrain\u001b[00m\r\n",
      "│   │   ├── \u001b[01;34maxial\u001b[00m\r\n",
      "│   │   ├── \u001b[01;34mcoronal\u001b[00m\r\n",
      "│   │   └── \u001b[01;34msagittal\u001b[00m\r\n",
      "│   └── \u001b[01;34mvalid\u001b[00m\r\n",
      "│       ├── \u001b[01;34maxial\u001b[00m\r\n",
      "│       ├── \u001b[01;34mcoronal\u001b[00m\r\n",
      "│       └── \u001b[01;34msagittal\u001b[00m\r\n",
      "├── \u001b[01;34mexp\u001b[00m\r\n",
      "└── \u001b[01;34mmrnet-fastai\u001b[00m\r\n",
      "\r\n",
      "11 directories\r\n"
     ]
    }
   ],
   "source": [
    "! tree -d .."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0000.npy\r\n",
      "0001.npy\r\n",
      "0002.npy\r\n",
      "0003.npy\r\n",
      "0004.npy\r\n",
      "ls: write error: Broken pipe\r\n"
     ]
    }
   ],
   "source": [
    "! ls ../data/train/axial | head -n 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_path = Path('../data')\n",
    "train_path = data_path/'train'\n",
    "valid_path = data_path/'valid'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Case\n",
      "Abnormal      \n",
      "0          217\n",
      "1          913\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Case</th>\n",
       "      <th>Abnormal</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0002</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0003</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0004</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Case  Abnormal\n",
       "0  0000         1\n",
       "1  0001         1\n",
       "2  0002         1\n",
       "3  0003         1\n",
       "4  0004         1"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_abnl = pd.read_csv(data_path/'train-abnormal.csv', header=None,\n",
    "                       names=['Case', 'Abnormal'], \n",
    "                       dtype={'Case': str, 'Abnormal': np.int64})\n",
    "print(train_abnl.groupby('Abnormal').count())\n",
    "train_abnl.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          Case\n",
      "ACL_tear      \n",
      "0          922\n",
      "1          208\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Case</th>\n",
       "      <th>ACL_tear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0003</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0004</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Case  ACL_tear\n",
       "0  0000         0\n",
       "1  0001         1\n",
       "2  0002         0\n",
       "3  0003         0\n",
       "4  0004         0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_acl = pd.read_csv(data_path/'train-acl.csv', header=None,\n",
    "                       names=['Case', 'ACL_tear'], \n",
    "                       dtype={'Case': str, 'ACL_tear': np.int64})\n",
    "print(train_acl.groupby('ACL_tear').count())\n",
    "train_acl.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "               Case\n",
      "Meniscus_tear      \n",
      "0               733\n",
      "1               397\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Case</th>\n",
       "      <th>Meniscus_tear</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0000</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0001</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0002</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0003</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0004</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Case  Meniscus_tear\n",
       "0  0000              0\n",
       "1  0001              1\n",
       "2  0002              0\n",
       "3  0003              1\n",
       "4  0004              0"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_meniscus = pd.read_csv(data_path/'train-meniscus.csv', header=None,\n",
    "                       names=['Case', 'Meniscus_tear'], \n",
    "                       dtype={'Case': str, 'Meniscus_tear': np.int64})\n",
    "print(train_meniscus.groupby('Meniscus_tear').count())\n",
    "train_meniscus.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_one_stack(case, data_path=train_path, plane='coronal'):\n",
    "    fpath = data_path/plane/'{}.npy'.format(case)\n",
    "    return np.load(fpath)\n",
    "\n",
    "def load_stacks(case, data_path=train_path):\n",
    "    x = {}\n",
    "    planes = ['coronal', 'sagittal', 'axial']\n",
    "    for i, plane in enumerate(planes):\n",
    "        x[plane] = load_one_stack(case, data_path, plane=plane)\n",
    "    return x\n",
    "\n",
    "def load_partial_stacks(case, data_path=train_path, slice_limit=None):\n",
    "    x = {}\n",
    "    planes = ['coronal', 'sagittal', 'axial']\n",
    "    if not slice_limit:\n",
    "        return load_stacks(case, data_path)\n",
    "    else:\n",
    "        for i, plane in enumerate(planes):\n",
    "            data = load_one_stack(case, data_path, plane)\n",
    "            if slice_limit >= data.shape[0]:\n",
    "                x[plane] = data\n",
    "            else:\n",
    "                mid_slice = data.shape[0] // 2\n",
    "                lower = mid_slice - (slice_limit // 2)\n",
    "                upper = mid_slice + (slice_limit // 2)\n",
    "                x[plane] = data[lower:upper, :, :]\n",
    "    return x\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(36, 256, 256)\n",
      "255\n"
     ]
    }
   ],
   "source": [
    "case = train_abnl.Case[0]\n",
    "x = load_one_stack(case)\n",
    "print(x.shape)\n",
    "print(x.max())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['coronal', 'sagittal', 'axial'])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = load_stacks(case)\n",
    "x.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "class KneePlot():\n",
    "    def __init__(self, x: dict, figsize=(10, 10)):\n",
    "        self.x = x\n",
    "        self.planes = list(x.keys())\n",
    "        self.slice_nums = {plane: self.x[plane].shape[0] for plane in self.planes}\n",
    "        self.figsize = figsize\n",
    "    \n",
    "    def _plot_slices(self, plane, im_slice): \n",
    "        fig, ax = plt.subplots(1, 1, figsize=self.figsize)\n",
    "        ax.imshow(self.x[plane][im_slice, :, :])\n",
    "        plt.show()\n",
    "    \n",
    "    def draw(self):\n",
    "        planes_widget = Dropdown(options=self.planes)\n",
    "        plane_init = self.planes[0]\n",
    "        slice_init = self.slice_nums[plane_init] - 1\n",
    "        slices_widget = IntSlider(min=0, max=slice_init, value=slice_init//2)\n",
    "        def update_slices_widget(*args):\n",
    "            slices_widget.max = self.slice_nums[planes_widget.value] - 1\n",
    "            slices_widget.value = slices_widget.max // 2\n",
    "        planes_widget.observe(update_slices_widget, 'value')\n",
    "        interact(self._plot_slices, plane=planes_widget, im_slice=slices_widget)\n",
    "    \n",
    "    def resize(self, figsize): self.figsize = figsize\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "78a08edf6b3d4417b695f1b118188a9c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(Dropdown(description='plane', options=('coronal', 'sagittal', 'axial'), value='coronal')…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plot = KneePlot(x)\n",
    "plot.draw()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}