--- a +++ b/MRNet_EDA.ipynb @@ -0,0 +1,522 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from pathlib import Path\n", + "from ipywidgets import interact, Dropdown, IntSlider\n", + "\n", + "%matplotlib notebook\n", + "plt.style.use('grayscale')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[01;34m..\u001b[00m\r\n", + "├── \u001b[01;34mdata\u001b[00m\r\n", + "│ ├── \u001b[01;34mtrain\u001b[00m\r\n", + "│ │ ├── \u001b[01;34maxial\u001b[00m\r\n", + "│ │ ├── \u001b[01;34mcoronal\u001b[00m\r\n", + "│ │ └── \u001b[01;34msagittal\u001b[00m\r\n", + "│ └── \u001b[01;34mvalid\u001b[00m\r\n", + "│ ├── \u001b[01;34maxial\u001b[00m\r\n", + "│ ├── \u001b[01;34mcoronal\u001b[00m\r\n", + "│ └── \u001b[01;34msagittal\u001b[00m\r\n", + "├── \u001b[01;34mexp\u001b[00m\r\n", + "└── \u001b[01;34mmrnet-fastai\u001b[00m\r\n", + "\r\n", + "11 directories\r\n" + ] + } + ], + "source": [ + "! tree -d .." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0000.npy\r\n", + "0001.npy\r\n", + "0002.npy\r\n", + "0003.npy\r\n", + "0004.npy\r\n", + "ls: write error: Broken pipe\r\n" + ] + } + ], + "source": [ + "! ls ../data/train/axial | head -n 5" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "data_path = Path('../data')\n", + "train_path = data_path/'train'\n", + "valid_path = data_path/'valid'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Case\n", + "Abnormal \n", + "0 217\n", + "1 913\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Case</th>\n", + " <th>Abnormal</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0000</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0001</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0002</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0003</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0004</td>\n", + " <td>1</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Case Abnormal\n", + "0 0000 1\n", + "1 0001 1\n", + "2 0002 1\n", + "3 0003 1\n", + "4 0004 1" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_abnl = pd.read_csv(data_path/'train-abnormal.csv', header=None,\n", + " names=['Case', 'Abnormal'], \n", + " dtype={'Case': str, 'Abnormal': np.int64})\n", + "print(train_abnl.groupby('Abnormal').count())\n", + "train_abnl.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Case\n", + "ACL_tear \n", + "0 922\n", + "1 208\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Case</th>\n", + " <th>ACL_tear</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0000</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0001</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0002</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0003</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0004</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Case ACL_tear\n", + "0 0000 0\n", + "1 0001 1\n", + "2 0002 0\n", + "3 0003 0\n", + "4 0004 0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_acl = pd.read_csv(data_path/'train-acl.csv', header=None,\n", + " names=['Case', 'ACL_tear'], \n", + " dtype={'Case': str, 'ACL_tear': np.int64})\n", + "print(train_acl.groupby('ACL_tear').count())\n", + "train_acl.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Case\n", + "Meniscus_tear \n", + "0 733\n", + "1 397\n" + ] + }, + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>Case</th>\n", + " <th>Meniscus_tear</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0000</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>0001</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>0002</td>\n", + " <td>0</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>0003</td>\n", + " <td>1</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>0004</td>\n", + " <td>0</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " Case Meniscus_tear\n", + "0 0000 0\n", + "1 0001 1\n", + "2 0002 0\n", + "3 0003 1\n", + "4 0004 0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_meniscus = pd.read_csv(data_path/'train-meniscus.csv', header=None,\n", + " names=['Case', 'Meniscus_tear'], \n", + " dtype={'Case': str, 'Meniscus_tear': np.int64})\n", + "print(train_meniscus.groupby('Meniscus_tear').count())\n", + "train_meniscus.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def load_one_stack(case, data_path=train_path, plane='coronal'):\n", + " fpath = data_path/plane/'{}.npy'.format(case)\n", + " return np.load(fpath)\n", + "\n", + "def load_stacks(case, data_path=train_path):\n", + " x = {}\n", + " planes = ['coronal', 'sagittal', 'axial']\n", + " for i, plane in enumerate(planes):\n", + " x[plane] = load_one_stack(case, data_path, plane=plane)\n", + " return x\n", + "\n", + "def load_partial_stacks(case, data_path=train_path, slice_limit=None):\n", + " x = {}\n", + " planes = ['coronal', 'sagittal', 'axial']\n", + " if not slice_limit:\n", + " return load_stacks(case, data_path)\n", + " else:\n", + " for i, plane in enumerate(planes):\n", + " data = load_one_stack(case, data_path, plane)\n", + " if slice_limit >= data.shape[0]:\n", + " x[plane] = data\n", + " else:\n", + " mid_slice = data.shape[0] // 2\n", + " lower = mid_slice - (slice_limit // 2)\n", + " upper = mid_slice + (slice_limit // 2)\n", + " x[plane] = data[lower:upper, :, :]\n", + " return x\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(36, 256, 256)\n", + "255\n" + ] + } + ], + "source": [ + "case = train_abnl.Case[0]\n", + "x = load_one_stack(case)\n", + "print(x.shape)\n", + "print(x.max())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['coronal', 'sagittal', 'axial'])" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x = load_stacks(case)\n", + "x.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class KneePlot():\n", + " def __init__(self, x: dict, figsize=(10, 10)):\n", + " self.x = x\n", + " self.planes = list(x.keys())\n", + " self.slice_nums = {plane: self.x[plane].shape[0] for plane in self.planes}\n", + " self.figsize = figsize\n", + " \n", + " def _plot_slices(self, plane, im_slice): \n", + " fig, ax = plt.subplots(1, 1, figsize=self.figsize)\n", + " ax.imshow(self.x[plane][im_slice, :, :])\n", + " plt.show()\n", + " \n", + " def draw(self):\n", + " planes_widget = Dropdown(options=self.planes)\n", + " plane_init = self.planes[0]\n", + " slice_init = self.slice_nums[plane_init] - 1\n", + " slices_widget = IntSlider(min=0, max=slice_init, value=slice_init//2)\n", + " def update_slices_widget(*args):\n", + " slices_widget.max = self.slice_nums[planes_widget.value] - 1\n", + " slices_widget.value = slices_widget.max // 2\n", + " planes_widget.observe(update_slices_widget, 'value')\n", + " interact(self._plot_slices, plane=planes_widget, im_slice=slices_widget)\n", + " \n", + " def resize(self, figsize): self.figsize = figsize\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "78a08edf6b3d4417b695f1b118188a9c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "interactive(children=(Dropdown(description='plane', options=('coronal', 'sagittal', 'axial'), value='coronal')…" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot = KneePlot(x)\n", + "plot.draw()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}