--- a
+++ b/Notebooks/Windowing PNGs.ipynb
@@ -0,0 +1,318 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python [conda env:datasci] *",
+      "language": "python",
+      "name": "conda-env-datasci-py"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.7.5"
+    },
+    "colab": {
+      "name": "Preprocess PNGs.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hd7jDitGM5c_",
+        "colab_type": "text"
+      },
+      "source": [
+        "*To run this notebook, please provide the following three paths:*"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qYAbTAbyNB9T",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Images to be windowed\n",
+        "path_to_images = '/path/to/pngs/'\n",
+        "\n",
+        "# Where to store windowed images\n",
+        "path_to_train = '/path/to/Windowed-PNGs-train/'\n",
+        "path_to_test = '/path/to/Windowed-PNGs-test/'"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8ieLQlGg7Q-v",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Installing & Importing Dependencies**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Dykmkdjv6Z4s",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "!pip install imageio\n",
+        "!pip install pillow"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AmdeoFBU6Z4z",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "from PIL import Image\n",
+        "\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import imageio\n",
+        "import pickle\n",
+        "import glob\n",
+        "import random"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NJNrsZR19sv_",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Reading in File Names**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "QNKov8ka6Z43",
+        "colab_type": "code",
+        "colab": {},
+        "outputId": "e4414b7d-b5a4-4d65-c8f6-d81774ebaec1"
+      },
+      "source": [
+        "patient_df = pd.read_pickle('C:\\\\Users\\\\Administrator\\\\Downloads\\\\ordered_slices_by_patient.pkl')\n",
+        "\n",
+        "# We find the total number of patients (usually, a patient has something like 30-50 associated PNGs, i.e., CT slices of their brain)\n",
+        "len(patient_df)"
+      ],
+      "execution_count": 0,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "17079"
+            ]
+          },
+          "metadata": {
+            "tags": []
+          },
+          "execution_count": 24
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vOo0KXLl96iA",
+        "colab_type": "text"
+      },
+      "source": [
+        "## **Randomly Subsample 2,500 Patients**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2u5YbNzX-qHj",
+        "colab_type": "text"
+      },
+      "source": [
+        "For **faster prototyping**, we randomly subsample 2500 patients. This still yields more than enough images to successfully train and evaluate our models."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "BZC9CQmD6Z5A",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "fewer_images = dict(random.sample(patient_df.items(), k = 2500))"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "aA_i5-Ou6Z5K",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Save the list of randomly subsampled patients for reproducibility\n",
+        "with open(\"ordered_slices_by_patient_randsubset.pkl\", \"wb\") as f:\n",
+        "    pickle.dump(fewer_images, f)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "5Fl464Bg6Z5Q",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Unpack the images (in this NumPy array, they are no longer associated with a given patient)\n",
+        "fewer_images_flat = np.concatenate(list(fewer_images.values()))\n",
+        "nb_ims = len(fewer_images_flat)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XriaSmloAbMf",
+        "colab_type": "text"
+      },
+      "source": [
+        "# **Windowing**"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "QOkRcXutCuYU",
+        "colab_type": "text"
+      },
+      "source": [
+        "When examining brain CT scans, radiologists rarely look at the raw images (they appear mostly gray to the human eye). Instead, they use so-called \"windows\"---simple transformations of the raw data that serve to highlight structures of different density in the human brain. The three most common windows for hemorrhage detection are the **bone, brain, and subdural window**.\n",
+        "\n",
+        "These are also the three windows that we apply to help our model detect hemorrhages. Specifically, we read in black-and-white, one-channel PNGs and turn them into **RGB**, three-channel PNGs where each channel contains one specific window."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "aih_4cwJ6Z5X",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# NOTE: The code in this cell is from\n",
+        "# https://github.com/darraghdog/rsna/blob/master/scripts/prepare_meta_dicom.py\n",
+        "\n",
+        "# This function can apply any window to a given image (passed as a NumPy array)\n",
+        "# Note that a window is specified by only two parameters: center and width\n",
+        "def apply_window(image, center, width):\n",
+        "    image = image.copy()\n",
+        "    min_value = center - width // 2\n",
+        "    max_value = center + width // 2\n",
+        "    image[image < min_value] = min_value\n",
+        "    image[image > max_value] = max_value\n",
+        "    return image\n",
+        "\n",
+        "# This function contains our specific windowing policy:\n",
+        "# Namely, we perform brain, subdural, and bone windowing, then we concatenate these three windows\n",
+        "def apply_window_policy(image):\n",
+        "    image1 = apply_window(image, 40, 80) # brain\n",
+        "    image2 = apply_window(image, 80, 200) # subdural\n",
+        "    image3 = apply_window(image, 40, 380) # bone\n",
+        "    image1 = (image1 - 0) / 80\n",
+        "    image2 = (image2 - (-20)) / 200\n",
+        "    image3 = (image3 - (-150)) / 380\n",
+        "    image = np.array([image1 - image1.mean(),\n",
+        "                      image2 - image2.mean(),\n",
+        "                      image3 - image3.mean(),\n",
+        "                      ]).transpose(1,2,0)\n",
+        "\n",
+        "    return image"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XQSVi-rbXI47",
+        "colab_type": "text"
+      },
+      "source": [
+        "Performing the actual windowing:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "Lzg-CbLg6Z5l",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "# Iterate over all PNGs to window them\n",
+        "for i, image in enumerate(fewer_images_flat):\n",
+        "\n",
+        "    try:\n",
+        "        # Load PNG as NumPy array\n",
+        "        raw_im = imageio.imread(path_to_images + image + '.png')\n",
+        "        print(i, 'out of {} loaded'.format(nb_ims))\n",
+        "        \n",
+        "        # Window PNG\n",
+        "        windowed_image = apply_window_policy(raw_im)\n",
+        "        print(i, 'out of {} windowed'.format(nb_ims))\n",
+        "    \n",
+        "        # Rescale the image to have pixel values in range [0, 255] & convert to uint8\n",
+        "        rescaled_image = 255.0 / windowed_image.max() * (windowed_image - windowed_image.min())\n",
+        "        rescaled_image = rescaled_image.astype(np.uint8)\n",
+        "        print('Rescaled image {} out of {}'.format(i, nb_ims))\n",
+        "\n",
+        "        # Turn NumPy array into PNG again\n",
+        "        final_im = Image.fromarray(rescaled_image)\n",
+        "        \n",
+        "        # Use 16,500 images for testing and the rest for training purposes (this is approximately a 70/30 split)\n",
+        "        if i < 16500:\n",
+        "            final_im.save(path_to_test + fewer_images_flat[i] + '.png')\n",
+        "        else:\n",
+        "            final_im.save(path_to_train + fewer_images_flat[i] + '.png')\n",
+        "        print('Saved image {} out of {}'.format(i + 1, nb_ims))\n",
+        "        \n",
+        "    except FileNotFoundError:\n",
+        "        print('Skipping', image)"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file