Switch to side-by-side view

--- a
+++ b/Notebooks/Training the R-CNN.ipynb
@@ -0,0 +1,347 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python [conda env:work] *",
+      "language": "python",
+      "name": "conda-env-work-py"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.3"
+    },
+    "colab": {
+      "name": "Training the R-CNN.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WemaJ8K34tFi",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Importing Dependencies**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "xZM4YQYP2I_0",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import pickle\n",
+        "import numpy as np\n",
+        "import matplotlib.pyplot as plt\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from tensorflow.keras import Sequential\n",
+        "from tensorflow.keras.layers import LSTM, Bidirectional, TimeDistributed, Dense, Dropout, BatchNormalization, Activation\n",
+        "from tensorflow.keras.optimizers import Adam\n",
+        "from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n",
+        "\n",
+        "from tensorflow.python.client import device_lib\n",
+        "from tensorflow.compat.v1 import ConfigProto\n",
+        "from tensorflow.compat.v1 import InteractiveSession"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AgpkgeGMxBXl",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Allowing for Parallelized Model Training**\n",
+        "By default, TensorFlow allocates all available GPU memory to the current training process. By enabling memory growth, however, we can train multiple models in parallel."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "emimSYUx2JAK",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "gpus = tf.config.experimental.list_physical_devices(\"GPU\")\n",
+        "\n",
+        "if gpus:\n",
+        "    try:\n",
+        "        for gpu in gpus:\n",
+        "            tf.config.experimental.set_memory_growth(gpu, True)\n",
+        "    except RuntimeError as e:\n",
+        "        print(e)\n",
+        "\n",
+        "config = ConfigProto()\n",
+        "config.gpu_options.allow_growth = True\n",
+        "session = InteractiveSession(config=config)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sh7VEM-qsYk3",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Loading Data & Labels**\n",
+        "Here, we load the features previously extracted by the CNN and their corresponding labels."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "f_N4BDsb2JAO",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "data_array = np.load(\"rcnn-data-array.npy\")\n",
+        "label_array = np.load(\"rcnn-label-array.npy\")\n",
+        "\n",
+        "# Add a dummy dimension for label_array to be fed to our R-CNN\n",
+        "label_array = np.expand_dims(label_array, axis=2)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "W9hHmPG22JAd",
+        "colab_type": "code",
+        "colab": {},
+        "outputId": "c07217cd-f344-45c8-8764-5b47ca989cc6"
+      },
+      "source": [
+        "# We check the percentage of hemorrhages present in the data: It is ~19%.\n",
+        "np.average(label_array)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "0.1935311552247036"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ghsRNPHUu8sO",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Building the Model**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "lV8VISV62JAs",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "with tf.device(\"/GPU:1\"):\n",
+        "    model = Sequential()\n",
+        "\n",
+        "    model.add(Bidirectional(LSTM(100, return_sequences=True), input_shape=(slices_per_patient, 8192)))\n",
+        "    model.add(TimeDistributed(Dropout(0.5)))\n",
+        "    model.add(TimeDistributed(Dense(100, use_bias=False)))\n",
+        "    model.add(TimeDistributed(BatchNormalization()))\n",
+        "    model.add(TimeDistributed(Activation(\"relu\")))\n",
+        "    model.add(TimeDistributed(Dropout(0.5)))\n",
+        "    model.add(TimeDistributed(Dense(100, use_bias=False)))\n",
+        "    model.add(TimeDistributed(BatchNormalization()))\n",
+        "    model.add(TimeDistributed(Activation(\"relu\")))\n",
+        "    model.add(TimeDistributed(Dense(1, activation=\"sigmoid\")))\n",
+        "    \n",
+        "    model.compile(loss=\"binary_crossentropy\", optimizer=Adam(lr=0.0001), metrics=model_metrics)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hq1J88JEvCfs",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Fitting the Model**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "ZDdbBtY82JAq",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Training setup\n",
+        "model_metrics = [\"accuracy\",\n",
+        "                 tensorflow.keras.metrics.Precision(),\n",
+        "                 tensorflow.keras.metrics.Recall(),\n",
+        "                 tensorflow.keras.metrics.AUC()]\n",
+        "\n",
+        "slices_per_patient = 24\n",
+        "batch_size = 100\n",
+        "epochs = 100\n",
+        "model_name = \"bi-rcnn-model\"\n",
+        "\n",
+        "checkpoint = ModelCheckpoint(\"RCNN_models/bi-rcnn-model-{epoch:03d}-{val_recall:.2f}.hdf5\", \n",
+        "                             \"val_accuracy\",\n",
+        "                             save_best_only=False, verbose=2)\n",
+        "\n",
+        "stopping = EarlyStopping(\"val_accuracy\", patience = 10, verbose = 2, mode = \"max\")\n",
+        "\n",
+        "callbacks = [stopping, checkpoint]"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "scrolled": true,
+        "id": "dT5yePJZ2JAv",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Model fitting\n",
+        "history = model.fit(data_array, label_array,\n",
+        "                    batch_size=batch_size,\n",
+        "                    epochs=epochs,\n",
+        "                    validation_split=0.2,\n",
+        "                    callbacks=callbacks,\n",
+        "                    verbose=2)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NDt7vp3sr9Lf",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Plotting Development of Loss, AUC, Recall**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5wubkccr2JAx",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "with open(\"RNN_histories/bi-rcnn-{}.pkl\".format(model_name), \"wb\") as f:\n",
+        "    pickle.dump(history.history, f)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "3ZZdmq0P0t5z",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Takes in one of \"loss\", \"auc\", \"recall\"\n",
+        "# Plots the development of the corresponding metric\n",
+        "def plot_metric(metric):\n",
+        "  plt.figure(figsize=(10, 6))\n",
+        "  plt.plot(history.history[metric], label=metric)\n",
+        "  plt.plot(history.history[\"val_\" + metric], label=\"val_\" + metric)\n",
+        "  plt.xlabel(\"epoch\")\n",
+        "  plt.ylabel(metric)\n",
+        "  plt.title(model_name + metric)\n",
+        "  plt.legend()\n",
+        "  plt.tight_layout()\n",
+        "  plt.savefig(\"RNN_plots/bi-rcnn-{}_\".format(model_name) + metric + \".png\", dpi=300)\n",
+        "  plt.show()\n",
+        "\n",
+        "  pass"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "-rLp37bE2JA7",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Plot of train and test loss\n",
+        "plot_metric(\"loss\")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VSr_TibG2JBA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Plot of train and test recall\n",
+        "plot_metric(\"recall\")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "K-4-XAxT2JBD",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Plot of train and test AUC\n",
+        "plot_metric(\"auc\")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file