RSNA-Medical-Image-Detect / Git / Diff of /nbs/RSNA_EfficientNet

Models:
DavidFeaster/
RSNA-Medical-Image-Detect
Downloads: 1
Diff of /nbs/RSNA_EfficientNet_B4.ipynb [000000] .. [638a85]
Switch to side-by-side view

--- a
+++ b/nbs/RSNA_EfficientNet_B4.ipynb
@@ -0,0 +1,609 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## EfficientNet B4 model\n",
+    "\n",
+    "\n",
+    "**Due to GPU quota is only 30 hours/per week on Kaggle, each training need 15+ hours, so the notebook cann't commiting(otherwise will exceeding the quota), only download the csv files to submit**\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install EfficentNet \n",
+    "!pip install efficientnet\n",
+    "!pip install iterative-stratification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import efficientnet.keras as efn \n",
+    "from iterstrat.ml_stratifiers import MultilabelStratifiedShuffleSplit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pydicom\n",
+    "import os\n",
+    "import collections\n",
+    "import sys\n",
+    "import glob\n",
+    "import random\n",
+    "import cv2\n",
+    "import tensorflow as tf\n",
+    "import multiprocessing\n",
+    "\n",
+    "from math import ceil, floor\n",
+    "from copy import deepcopy\n",
+    "from tqdm import tqdm\n",
+    "from imgaug import augmenters as iaa\n",
+    "\n",
+    "import keras\n",
+    "import keras.backend as K\n",
+    "from keras.callbacks import Callback, ModelCheckpoint\n",
+    "from keras.layers import Dense, Flatten, Dropout\n",
+    "from keras.models import Model, load_model\n",
+    "from keras.utils import Sequence\n",
+    "from keras.losses import binary_crossentropy\n",
+    "from keras.optimizers import Adam"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model Parameters Setup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0",
+    "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a"
+   },
+   "outputs": [],
+   "source": [
+    "# Setting the parameters:\n",
+    "seed = 42\n",
+    "np.random.seed(seed)\n",
+    "tf.random.set_seed(seed)\n",
+    "\n",
+    "\n",
+    "input_image_width = 256\n",
+    "input_image_height = 256\n",
+    "\n",
+    "input_image_shape = (input_image_height,input_image_width,3)\n",
+    "\n",
+    "test_size = 0.01\n",
+    "batch_size = 16\n",
+    "train_batch_size = 16\n",
+    "valid_batch_size  = 32\n",
+    "\n",
+    "# Setting the Path \n",
+    "path = '../input/rsna-intracranial-hemorrhage-detection/rsna-intracranial-hemorrhage-detection/'\n",
+    "train_img_path  = path + 'stage_2_train/'\n",
+    "test_img_path = path + 'stage_2_test/'\n",
+    "\n",
+    "# Dataset Filenames\n",
+    "train_dataset_fns = path + 'stage_2_train.csv'\n",
+    "test_dataset_fns = path + 'stage_2_sample_submission.csv'\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dup_image_list = [56346, 56347, 56348, 56349,\n",
+    "                            56350, 56351, 1171830, 1171831,\n",
+    "                            1171832, 1171833, 1171834, 1171835,\n",
+    "                            3705312, 3705313, 3705314, 3705315,\n",
+    "                            3705316, 3705317, 3842478, 3842479,\n",
+    "                            3842480, 3842481, 3842482, 3842483 ]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### load the dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_dataset_loader(filename):\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
+    "    df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
+    "    df = df.drop(index = dup_image_list)\n",
+    "    df = df.reset_index(drop = True)    \n",
+    "    df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
+    "    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
+    "    return df\n",
+    "\n",
+    "\n",
+    "def test_dataset_loader(filename):\n",
+    "    df = pd.read_csv(filename)\n",
+    "    df[\"Image\"] = df[\"ID\"].str.slice(stop=12)\n",
+    "    df[\"Diagnosis\"] = df[\"ID\"].str.slice(start=13)\n",
+    "    df = df.loc[:, [\"Label\", \"Diagnosis\", \"Image\"]]\n",
+    "    df = df.set_index(['Image', 'Diagnosis']).unstack(level=-1)\n",
+    "    return df\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df = train_dataset_loader(train_dataset_fns)\n",
+    "test_df = test_dataset_loader(test_dataset_fns)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data EDA and Cleaning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def correct_dcm(dcm):\n",
+    "    x = dcm.pixel_array + 1000\n",
+    "    px_mode = 4096\n",
+    "    x[x>=px_mode] = x[x>=px_mode] - px_mode\n",
+    "    dcm.PixelData = x.tobytes()\n",
+    "    dcm.RescaleIntercept = -1000\n",
+    "\n",
+    "def window_image(dcm, window_center, window_width):    \n",
+    "    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):\n",
+    "        correct_dcm(dcm)\n",
+    "    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept\n",
+    "    \n",
+    "    # Resize\n",
+    "    img = cv2.resize(img, SHAPE[:2], interpolation = cv2.INTER_LINEAR)\n",
+    "   \n",
+    "    img_min = window_center - window_width // 2\n",
+    "    img_max = window_center + window_width // 2\n",
+    "    img = np.clip(img, img_min, img_max)\n",
+    "    return img\n",
+    "\n",
+    "def bsb_window(dcm):\n",
+    "    brain_img = window_image(dcm, 40, 80)\n",
+    "    subdural_img = window_image(dcm, 80, 200)\n",
+    "    soft_img = window_image(dcm, 40, 380)\n",
+    "    \n",
+    "    brain_img = (brain_img - 0) / 80\n",
+    "    subdural_img = (subdural_img - (-20)) / 200\n",
+    "    soft_img = (soft_img - (-150)) / 380\n",
+    "    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)\n",
+    "    return bsb_img\n",
+    "\n",
+    "def _read(path, SHAPE):\n",
+    "    dcm = pydicom.dcmread(path)\n",
+    "    try:\n",
+    "        img = bsb_window(dcm)\n",
+    "    except:\n",
+    "        img = np.zeros(SHAPE)\n",
+    "    return img"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def window_with_correction(dcm, window_center, window_width):\n",
+    "    if (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100):\n",
+    "        correct_dcm(dcm)\n",
+    "    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept\n",
+    "    img_min = window_center - window_width // 2\n",
+    "    img_max = window_center + window_width // 2\n",
+    "    img = np.clip(img, img_min, img_max)\n",
+    "    return img\n",
+    "\n",
+    "def window_without_correction(dcm, window_center, window_width):\n",
+    "    img = dcm.pixel_array * dcm.RescaleSlope + dcm.RescaleIntercept\n",
+    "    img_min = window_center - window_width // 2\n",
+    "    img_max = window_center + window_width // 2\n",
+    "    img = np.clip(img, img_min, img_max)\n",
+    "    return img\n",
+    "\n",
+    "def window_testing(img, window):\n",
+    "    brain_img = window(img, 40, 80)\n",
+    "    subdural_img = window(img, 80, 200)\n",
+    "    soft_img = window(img, 40, 380)\n",
+    "    \n",
+    "    brain_img = (brain_img - 0) / 80\n",
+    "    subdural_img = (subdural_img - (-20)) / 200\n",
+    "    soft_img = (soft_img - (-150)) / 380\n",
+    "    bsb_img = np.array([brain_img, subdural_img, soft_img]).transpose(1,2,0)\n",
+    "\n",
+    "    return bsb_img\n",
+    "\n",
+    "\n",
+    "# example of a \"bad data point\" (i.e. (dcm.BitsStored == 12) and (dcm.PixelRepresentation == 0) and (int(dcm.RescaleIntercept) > -100) == True)\n",
+    "import matplotlib.pyplot as plt\n",
+    "dicom = pydicom.dcmread(train_img_path + train_df.index[101] + \".dcm\")\n",
+    "\n",
+    "fig, ax = plt.subplots(1, 2)\n",
+    "\n",
+    "ax[0].imshow(window_testing(dicom, window_without_correction), cmap=plt.cm.bone);\n",
+    "ax[0].set_title(\"original\")\n",
+    "ax[1].imshow(window_testing(dicom, window_with_correction), cmap=plt.cm.bone);\n",
+    "ax[1].set_title(\"corrected\");"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Random image augmentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Image Augmentation\n",
+    "sometimes = lambda aug: iaa.Sometimes(0.25, aug)\n",
+    "     \n",
+    "augmentation = iaa.Sequential([ iaa.Fliplr(0.25),\n",
+    "                                iaa.Flipud(0.10),\n",
+    "                                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),\n",
+    "                                iaa.Sometimes(0.5,iaa.GaussianBlur(sigma=(0, 0.5))),# Strengthen or weaken the contrast in each image.\n",
+    "                                iaa.ContrastNormalization((0.75, 1.5)),\n",
+    "                                sometimes(iaa.Crop(px=(0, 25), keep_size = True, sample_independently = False))   \n",
+    "                            ], random_order = True)       \n",
+    "        \n",
+    "\n",
+    "\n",
+    "# Generators\n",
+    "class DataGenerator_Train(keras.utils.Sequence):\n",
+    "    def __init__(self, dataset, labels, batch_size = batch_size, image_shape = input_image_shape, image_path = train_img_path, augment = False, *args, **kwargs):\n",
+    "        self.dataset = dataset\n",
+    "        self.ids = dataset.index\n",
+    "        self.labels = labels\n",
+    "        self.batch_size = batch_size\n",
+    "        self.image_shape = image_shape\n",
+    "        self.image_path = train_img_path\n",
+    "        self.augment = augment\n",
+    "        self.on_epoch_end()\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return int(ceil(len(self.ids) / self.batch_size))\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]\n",
+    "        X, Y = self.__data_generation(indices)\n",
+    "        return X, Y\n",
+    "\n",
+    "    def augmentor(self, image):\n",
+    "        augment_img = augmentation        \n",
+    "        image_aug = augment_img.augment_image(image)\n",
+    "        return image_aug\n",
+    "\n",
+    "    def on_epoch_end(self):\n",
+    "        self.indices = np.arange(len(self.ids))\n",
+    "        np.random.shuffle(self.indices)\n",
+    "\n",
+    "    def __data_generation(self, indices):\n",
+    "        X = np.empty((self.batch_size, *self.image_shape))\n",
+    "        Y = np.empty((self.batch_size, 6), dtype=np.float32)\n",
+    "        \n",
+    "        for i, index in enumerate(indices):\n",
+    "            ID = self.ids[index]\n",
+    "            image = _read(self.image_path+ID+\".dcm\", self.image_shape)\n",
+    "            if self.augment:\n",
+    "                X[i,] = self.augmentor(image)\n",
+    "            else:\n",
+    "                X[i,] = image\n",
+    "            Y[i,] = self.labels.iloc[index].values        \n",
+    "        return X, Y\n",
+    "    \n",
+    "class DataGenerator_Test(keras.utils.Sequence):\n",
+    "    def __init__(self, dataset, labels, batch_size = batch_size, image_shape = input_image_shape, image_path = test_img_path, *args, **kwargs):\n",
+    "        self.dataset = dataset\n",
+    "        self.ids = dataset.index\n",
+    "        self.labels = labels\n",
+    "        self.batch_size = batch_size\n",
+    "        self.image_shape = image_shape\n",
+    "        self.image_path = image_path\n",
+    "        self.on_epoch_end()\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return int(ceil(len(self.ids) / self.batch_size))\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]\n",
+    "        X = self.__data_generation(indices)\n",
+    "        return X\n",
+    "\n",
+    "    def on_epoch_end(self):\n",
+    "        self.indices = np.arange(len(self.ids))\n",
+    "    \n",
+    "    def __data_generation(self, indices):\n",
+    "        X = np.empty((self.batch_size, *self.image_shape))\n",
+    "        \n",
+    "        for i, index in enumerate(indices):\n",
+    "            ID = self.ids[index]\n",
+    "            image = _read(self.image_path+ID+\".dcm\", self.image_shape)\n",
+    "            X[i,] = image              \n",
+    "        return X"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import the training and test datasets."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- oversample the minority class 'epidural' "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Oversampling\n",
+    "epidural_df = train_df[train_df.Label['epidural'] == 1]\n",
+    "train_oversample_df = pd.concat([train_df, epidural_df])\n",
+    "train_df = train_oversample_df\n",
+    "\n",
+    "# Summary\n",
+    "print('Train Shape: {}'.format(train_df.shape))\n",
+    "print('Test Shape: {}'.format(test_df.shape))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### EfficientNet model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predictions(test_df, model):    \n",
+    "    test_preds = model.predict_generator(DataGenerator_Test(test_df, None, 5, input_image_shape, test_img_path), verbose = 1)\n",
+    "    return test_preds[:test_df.iloc[range(test_df.shape[0])].shape[0]]\n",
+    "\n",
+    "def ModelCheckpointFull(model_name):\n",
+    "    return ModelCheckpoint(model_name, \n",
+    "                            monitor = 'val_loss', \n",
+    "                            verbose = 1, \n",
+    "                            save_best_only = False, \n",
+    "                            save_weights_only = True, \n",
+    "                            mode = 'min', \n",
+    "                            period = 1)\n",
+    "\n",
+    "# Create Model\n",
+    "def create_model():\n",
+    "    K.clear_session()\n",
+    "    \n",
+    "    base_model =  efn.EfficientNetB4(weights = 'imagenet', include_top = False, pooling = 'avg', input_shape = input_image_shape)\n",
+    "    x = base_model.output\n",
+    "    x = Dropout(0.2)(x)\n",
+    "    y_pred = Dense(6, activation = 'sigmoid')(x)\n",
+    "\n",
+    "    return Model(inputs = base_model.input, outputs = y_pred)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Multi-Labels Train/Valid Dataset Split"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Submission Placeholder\n",
+    "submission_predictions = []\n",
+    "\n",
+    "\n",
+    "Multi_Stratified_split = MultilabelStratifiedShuffleSplit(n_splits = 10, test_size = test_size, random_state = seed)\n",
+    "X = train_df.index\n",
+    "Y = train_df.Label.values\n",
+    "\n",
+    "# Get train and test index\n",
+    "Multi_Stratified_splits = next(Multi_Stratified_split.split(X, Y))\n",
+    "train_idx = Multi_Stratified_splits[0]\n",
+    "valid_idx = Multi_Stratified_splits[1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Loop through Folds of Multi Label Stratified Split\n",
+    "\n",
+    "for epoch in range(0, 4):\n",
+    "    print('=========== EPOCH {}'.format(epoch))\n",
+    "\n",
+    "    # Shuffle Train data\n",
+    "    np.random.shuffle(train_idx)\n",
+    "    print(train_idx[:5])    \n",
+    "    print(valid_idx[:5])\n",
+    "\n",
+    "    # Create Data Generators for Train and Valid\n",
+    "    data_generator_train = DataGenerator_Train(train_df.iloc[train_idx], \n",
+    "                                                train_df.iloc[train_idx], \n",
+    "                                                train_batch_size, \n",
+    "                                                input_image_shape,\n",
+    "                                                augment = True)\n",
+    "    data_generator_val = DataGenerator_Train(train_df.iloc[valid_idx], \n",
+    "                                             train_df.iloc[valid_idx], \n",
+    "                                             valid_batch_size, \n",
+    "                                             input_image_shape,\n",
+    "                                             augment = False)\n",
+    "\n",
+    "    # Create Model\n",
+    "    model = create_model()\n",
+    "    \n",
+    "    # Full Training Model\n",
+    "    for base_layer in model.layers[:-1]:\n",
+    "        base_layer.trainable = True\n",
+    "    steps = int(len(data_generator_train) / 6)\n",
+    "    LR = 0.0001\n",
+    "\n",
+    "    if epoch != 0:\n",
+    "        # Load Model Weights\n",
+    "        model.load_weights('model.h5')    \n",
+    "\n",
+    "    model.compile(optimizer = Adam(learning_rate = LR), \n",
+    "                  loss = 'binary_crossentropy',\n",
+    "                  metrics = ['acc', tf.keras.metrics.AUC()])\n",
+    "    \n",
+    "    # Train Model\n",
+    "    model.fit_generator(generator = data_generator_train,\n",
+    "                        validation_data = data_generator_val,\n",
+    "                        steps_per_epoch = steps,\n",
+    "                        epochs = 1,\n",
+    "                        callbacks = [ModelCheckpointFull('model.h5')],\n",
+    "                        verbose = 1)\n",
+    "    \n",
+    "    # Starting with the 6th epoch we create predictions for the test set on each epoch\n",
+    "    if epoch >= 1:\n",
+    "        preds = predictions(test_df, model)\n",
+    "        submission_predictions.append(preds)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Ensemble and average all submission_predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_df.iloc[:, :] = np.average(submission_predictions, axis = 0, weights = [2**i for i in range(len(submission_predictions))])\n",
+    "test_df = test_df.stack().reset_index()\n",
+    "test_df.insert(loc = 0, column = 'ID', value = test_df['Image'].astype(str) + \"_\" + test_df['Diagnosis'])\n",
+    "test_df = test_df.drop([\"Image\", \"Diagnosis\"], axis=1)\n",
+    "test_df.to_csv('submission.csv', index = False)\n",
+    "print(test_df.head(12))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import FileLink, FileLinks\n",
+    "FileLink('submission.csv')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}