[d301d9]: / Notebooks / Training the R-CNN.ipynb

Download this file

347 lines (347 with data), 9.9 kB

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python [conda env:work] *",
      "language": "python",
      "name": "conda-env-work-py"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.3"
    },
    "colab": {
      "name": "Training the R-CNN.ipynb",
      "provenance": [],
      "collapsed_sections": []
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "WemaJ8K34tFi",
        "colab_type": "text"
      },
      "source": [
        "# **Importing Dependencies**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xZM4YQYP2I_0",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import pickle\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "import tensorflow as tf\n",
        "\n",
        "from tensorflow.keras import Sequential\n",
        "from tensorflow.keras.layers import LSTM, Bidirectional, TimeDistributed, Dense, Dropout, BatchNormalization, Activation\n",
        "from tensorflow.keras.optimizers import Adam\n",
        "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n",
        "\n",
        "from tensorflow.python.client import device_lib\n",
        "from tensorflow.compat.v1 import ConfigProto\n",
        "from tensorflow.compat.v1 import InteractiveSession"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "AgpkgeGMxBXl",
        "colab_type": "text"
      },
      "source": [
        "# **Allowing for Parallelized Model Training**\n",
        "By default, TensorFlow allocates all available GPU memory to the current training process. By enabling memory growth, however, we can train multiple models in parallel."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "emimSYUx2JAK",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n",
        "\n",
        "if gpus:\n",
        "    try:\n",
        "        for gpu in gpus:\n",
        "            tf.config.experimental.set_memory_growth(gpu, True)\n",
        "    except RuntimeError as e:\n",
        "        print(e)\n",
        "\n",
        "config = ConfigProto()\n",
        "config.gpu_options.allow_growth = True\n",
        "session = InteractiveSession(config=config)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "sh7VEM-qsYk3",
        "colab_type": "text"
      },
      "source": [
        "# **Loading Data & Labels**\n",
        "Here, we load the features previously extracted by the CNN and their corresponding labels."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "f_N4BDsb2JAO",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "data_array = np.load(\"rcnn-data-array.npy\")\n",
        "label_array = np.load(\"rcnn-label-array.npy\")\n",
        "\n",
        "# Add a dummy dimension for label_array to be fed to our R-CNN\n",
        "label_array = np.expand_dims(label_array, axis=2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "W9hHmPG22JAd",
        "colab_type": "code",
        "colab": {},
        "outputId": "c07217cd-f344-45c8-8764-5b47ca989cc6"
      },
      "source": [
        "# We check the percentage of hemorrhages present in the data: It is ~19%.\n",
        "np.average(label_array)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.1935311552247036"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ghsRNPHUu8sO",
        "colab_type": "text"
      },
      "source": [
        "## **Building the Model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "lV8VISV62JAs",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "with tf.device(\"/GPU:1\"):\n",
        "    model = Sequential()\n",
        "\n",
        "    model.add(Bidirectional(LSTM(100, return_sequences=True), input_shape=(slices_per_patient, 8192)))\n",
        "    model.add(TimeDistributed(Dropout(0.5)))\n",
        "    model.add(TimeDistributed(Dense(100, use_bias=False)))\n",
        "    model.add(TimeDistributed(BatchNormalization()))\n",
        "    model.add(TimeDistributed(Activation(\"relu\")))\n",
        "    model.add(TimeDistributed(Dropout(0.5)))\n",
        "    model.add(TimeDistributed(Dense(100, use_bias=False)))\n",
        "    model.add(TimeDistributed(BatchNormalization()))\n",
        "    model.add(TimeDistributed(Activation(\"relu\")))\n",
        "    model.add(TimeDistributed(Dense(1, activation=\"sigmoid\")))\n",
        "    \n",
        "    model.compile(loss=\"binary_crossentropy\", optimizer=Adam(lr=0.0001), metrics=model_metrics)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "hq1J88JEvCfs",
        "colab_type": "text"
      },
      "source": [
        "## **Fitting the Model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZDdbBtY82JAq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Training setup\n",
        "model_metrics = [\"accuracy\",\n",
        "                 tensorflow.keras.metrics.Precision(),\n",
        "                 tensorflow.keras.metrics.Recall(),\n",
        "                 tensorflow.keras.metrics.AUC()]\n",
        "\n",
        "slices_per_patient = 24\n",
        "batch_size = 100\n",
        "epochs = 100\n",
        "model_name = \"bi-rcnn-model\"\n",
        "\n",
        "checkpoint = ModelCheckpoint(\"RCNN_models/bi-rcnn-model-{epoch:03d}-{val_recall:.2f}.hdf5\", \n",
        "                             \"val_accuracy\",\n",
        "                             save_best_only=False, verbose=2)\n",
        "\n",
        "stopping = EarlyStopping(\"val_accuracy\", patience = 10, verbose = 2, mode = \"max\")\n",
        "\n",
        "callbacks = [stopping, checkpoint]"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "scrolled": true,
        "id": "dT5yePJZ2JAv",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Model fitting\n",
        "history = model.fit(data_array, label_array,\n",
        "                    batch_size=batch_size,\n",
        "                    epochs=epochs,\n",
        "                    validation_split=0.2,\n",
        "                    callbacks=callbacks,\n",
        "                    verbose=2)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NDt7vp3sr9Lf",
        "colab_type": "text"
      },
      "source": [
        "# **Plotting Development of Loss, AUC, Recall**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5wubkccr2JAx",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "with open(\"RNN_histories/bi-rcnn-{}.pkl\".format(model_name), \"wb\") as f:\n",
        "    pickle.dump(history.history, f)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "3ZZdmq0P0t5z",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Takes in one of \"loss\", \"auc\", \"recall\"\n",
        "# Plots the development of the corresponding metric\n",
        "def plot_metric(metric):\n",
        "  plt.figure(figsize=(10, 6))\n",
        "  plt.plot(history.history[metric], label=metric)\n",
        "  plt.plot(history.history[\"val_\" + metric], label=\"val_\" + metric)\n",
        "  plt.xlabel(\"epoch\")\n",
        "  plt.ylabel(metric)\n",
        "  plt.title(model_name + metric)\n",
        "  plt.legend()\n",
        "  plt.tight_layout()\n",
        "  plt.savefig(\"RNN_plots/bi-rcnn-{}_\".format(model_name) + metric + \".png\", dpi=300)\n",
        "  plt.show()\n",
        "\n",
        "  pass"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-rLp37bE2JA7",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Plot of train and test loss\n",
        "plot_metric(\"loss\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "VSr_TibG2JBA",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Plot of train and test recall\n",
        "plot_metric(\"recall\")"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "K-4-XAxT2JBD",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Plot of train and test AUC\n",
        "plot_metric(\"auc\")"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}