[d301d9]: / Notebooks / Training the CNN.ipynb

Download this file

442 lines (441 with data), 13.4 kB

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "kernelspec": {
      "display_name": "Python [conda env:datasci] *",
      "language": "python",
      "name": "conda-env-datasci-py"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.5"
    },
    "colab": {
      "name": "Training the CNN.ipynb",
      "provenance": [],
      "collapsed_sections": []
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "efCvb66QStgP",
        "colab_type": "text"
      },
      "source": [
        "*To run this notebook, please provide the following four file paths:*"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Iukqj1IzStyn",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "path_to_train = '/path/to/train/images'\n",
        "path_to_test = '/path/to/test/images'\n",
        "path_to_labels = '/path/to/labels.csv'\n",
        "\n",
        "path_to_save_model = '/path/to/save/model/to/cnn-checkpoint-{epoch:02d}-{val_accuracy:.2f}.hdf5'"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XqBOX3Y4MWs3",
        "colab_type": "text"
      },
      "source": [
        "## **Installing & Importing Dependencies**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jnq7FwiHKZwY",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "!pip install keras"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OTnWqAwiKZxb",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import tensorflow as tf\n",
        "import pandas as pd\n",
        "\n",
        "from keras import applications\n",
        "from keras import optimizers\n",
        "from keras import backend as k \n",
        "\n",
        "from keras.models import Sequential, Model\n",
        "from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D\n",
        "from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping\n",
        "from keras.preprocessing.image import ImageDataGenerator"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "N60M6zljKZxs",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Make sure that GPU is available on the machine\n",
        "assert tf.test.is_gpu_available()\n",
        "assert tf.test.is_built_with_cuda()"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "PIErVpE6NHx6",
        "colab_type": "text"
      },
      "source": [
        "## **Reading in Data Labels**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "SGUEeB-cKZx3",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "labels_df = pd.read_csv(path_to_labels)\n",
        "\n",
        "# For flow_from_dataframe to function, string datatype is required\n",
        "labels_df = labels_df.astype(str)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "scrolled": true,
        "id": "qShGxNkPKZx7",
        "colab_type": "code",
        "colab": {},
        "outputId": "f6cf0aa8-a383-440a-e4ed-13499e3669af"
      },
      "source": [
        "# Inspect the DataFrame containing the labels\n",
        "labels_df.head(3)"
      ],
      "execution_count": 0,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Unnamed: 0</th>\n",
              "      <th>ID</th>\n",
              "      <th>any</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>ID_000039fa0.png</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>ID_00005679d.png</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2</td>\n",
              "      <td>ID_00008ce3c.png</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "  Unnamed: 0                ID any\n",
              "0          0  ID_000039fa0.png   0\n",
              "1          1  ID_00005679d.png   0\n",
              "2          2  ID_00008ce3c.png   0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 19
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ElzPsVvaODOk",
        "colab_type": "text"
      },
      "source": [
        "## **Building Transfer-Learning Model**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "qFQ7sFZjx_ba",
        "colab_type": "text"
      },
      "source": [
        "*As our pretrained model, we choose VGG19 with ImageNet weights. Note that include_top = False (otherwise, we would be including VGG19's final 1000-node dense softmax prediction layer)*"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "vh0b4EQtKZyN",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "model = applications.VGG19(weights = \"imagenet\", include_top=False, input_shape = (128, 128, 3))"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tszxYuOwyURR",
        "colab_type": "text"
      },
      "source": [
        "*Freeze the first three convolutional blocks, leave the last two unfrozen. Thereby, we transfer the model's knowledge of low-level features (like edges and angles) while allowing for it to learn new high-level features (like hemorrhages).*"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "BruLVtwJKZ0g",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "for layer in model.layers[0:12]:\n",
        "    layer.trainable = False"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "g_Kb-J_Qylgz",
        "colab_type": "text"
      },
      "source": [
        "*We append our own custom layers to the end of VGG19. Note that a single sigmoid final prediction node is equivalent to two softmax final prediction nodes.*"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Z7uPdX-8c5eU",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "x = model.output\n",
        "x = Flatten()(x)\n",
        "\n",
        "x = Dense(1000, activation = 'relu')(x)\n",
        "x = Dropout(0.5)(x)\n",
        "x = Dense(1000, activation = 'relu')(x)\n",
        "output = Dense(1, activation = 'sigmoid')(x)\n",
        "\n",
        "custom_model = Model(inputs = model.input, outputs = output)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "X49CwEDJy7tN",
        "colab_type": "text"
      },
      "source": [
        "*Compiling the model. The primary metric we care about is recall (that is, the CNN's ability to correctly detect intracranial hemorrhages).*"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "xECXyzILnWTa",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "custom_model.compile(loss = 'binary_crossentropy',\n",
        "                     optimizer = optimizers.Adam(lr=0.0001),\n",
        "                     metrics=['accuracy',\n",
        "                              tf.keras.metrics.Recall(),\n",
        "                              tf.keras.metrics.AUC(),\n",
        "                              tf.keras.metrics.Precision()])"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "u7BWA55koyNp",
        "colab_type": "text"
      },
      "source": [
        "# **Creating Train & Test Generators**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "fgTeFSx5olWu",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Initializing train & test generators to flow train and test images straight from the folders\n",
        "# that they are stored in\n",
        "train_datagen = ImageDataGenerator(rescale = 1./255,\n",
        "                                   horizontal_flip = True,\n",
        "                                   fill_mode = \"nearest\",\n",
        "                                   zoom_range = 0.3,\n",
        "                                   width_shift_range = 0.3,\n",
        "                                   height_shift_range=0.3,\n",
        "                                   rotation_range=30)\n",
        "\n",
        "test_datagen = ImageDataGenerator(rescale = 1./255)"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "7-zf69uioyiE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# We flow from DataFrames, that is, our images are not stored in class-specific folders---instead,\n",
        "# their labels are stored in separate files (specifically, in DataFrames)\n",
        "train_generator = train_datagen.flow_from_dataframe(dataframe=labels_df,\n",
        "                                                    directory= path_to_train,\n",
        "                                                    x_col='ID',\n",
        "                                                    y_col='any',\n",
        "                                                    target_size=(128, 128),\n",
        "                                                    class_mode='binary')\n",
        "\n",
        "test_generator = test_datagen.flow_from_dataframe(dataframe=labels_df,\n",
        "                                                  directory=path_to_test,\n",
        "                                                  x_col='ID',\n",
        "                                                  y_col='any',\n",
        "                                                  target_size=(128, 128),\n",
        "                                                  class_mode='binary')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_Xh6lg1tszMo",
        "colab_type": "text"
      },
      "source": [
        "# **Fitting the Model**"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "HpcKttO0qdEe",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Callbacks\n",
        "checkpoint = ModelCheckpoint(path_to_save_model, monitor='val_acc', verbose=1, save_best_only=False, save_weights_only=False, mode='auto', period=1)\n",
        "early_stopper = EarlyStopping(monitor='val_acc', min_delta=0, patience=3, verbose=1, mode='auto')"
      ],
      "execution_count": 0,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "5e5xPMF2KZ1h",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Fitting the model \n",
        "custom_model.fit_generator(train_generator,\n",
        "                           epochs = 50,\n",
        "                           validation_data = test_generator,\n",
        "                           callbacks = [checkpoint, early_stopper])"
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}