--- a
+++ b/Notebook/Week 2/preprocessing_2.ipynb
@@ -0,0 +1,325 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import os\n",
+    "import click\n",
+    "import glob\n",
+    "import cv2\n",
+    "import pydicom\n",
+    "from tqdm import tqdm\n",
+    "from joblib import delayed, Parallel\n",
+    "import random\n",
+    "from PIL import Image\n",
+    "import pydicom\n",
+    "from scipy import ndimage\n",
+    "import pydicom\n",
+    "from skimage import exposure\n",
+    "\n",
+    "base_url = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/'\n",
+    "TRAIN_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_train'\n",
+    "TEST_DIR = '/home/ubuntu/kaggle/rsna-intracranial-hemorrhage-detection/stage_2_test'\n",
+    "os.listdir(base_url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CropHead(object):\n",
+    "    def __init__(self, offset=10):\n",
+    "        \"\"\"\n",
+    "        Crops the head by labelling the objects in an image and keeping the second largest object (the largest object\n",
+    "        is the background). This method removes most of the headrest\n",
+    "\n",
+    "        Originally made as a image transform for use with PyTorch, but too slow to run on the fly :(\n",
+    "        :param offset: Pixel offset to apply to the crop so that it isn't too tight\n",
+    "        \"\"\"\n",
+    "        self.offset = offset\n",
+    "\n",
+    "    def crop_extents(self, img):\n",
+    "        try:\n",
+    "            if type(img) != np.array:\n",
+    "                img_array = np.array(img)\n",
+    "            else:\n",
+    "                img_array = img\n",
+    "\n",
+    "            labeled_blobs, number_of_blobs = ndimage.label(img_array)\n",
+    "            blob_sizes = np.bincount(labeled_blobs.flatten())\n",
+    "            head_blob = labeled_blobs == np.argmax(blob_sizes[1:]) + 1  # The number of the head blob\n",
+    "            head_blob = np.max(head_blob, axis=-1)\n",
+    "\n",
+    "            mask = head_blob == 0\n",
+    "            rows = np.flatnonzero((~mask).sum(axis=1))\n",
+    "            cols = np.flatnonzero((~mask).sum(axis=0))\n",
+    "\n",
+    "            x_min = max([rows.min() - self.offset, 0])\n",
+    "            x_max = min([rows.max() + self.offset + 1, img_array.shape[0]])\n",
+    "            y_min = max([cols.min() - self.offset, 0])\n",
+    "            y_max = min([cols.max() + self.offset + 1, img_array.shape[1]])\n",
+    "\n",
+    "            return x_min, x_max, y_min, y_max\n",
+    "        except ValueError:\n",
+    "            return 0, 0, -1, -1\n",
+    "\n",
+    "    def __call__(self, img):\n",
+    "        \"\"\"\n",
+    "        Crops a CT image to so that as much black area is removed as possible\n",
+    "        :param img: PIL image\n",
+    "        :return: Cropped image\n",
+    "        \"\"\"\n",
+    "\n",
+    "        x_min, x_max, y_min, y_max = self.crop_extents(img)\n",
+    "\n",
+    "        try:\n",
+    "            if type(img) != np.array:\n",
+    "                img_array = np.array(img)\n",
+    "            else:\n",
+    "                img_array = img\n",
+    "\n",
+    "            return Image.fromarray(np.uint8(img_array[x_min:x_max, y_min:y_max]))\n",
+    "        except ValueError:\n",
+    "            return img\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        return self.__class__.__name__ + '(offset={})'.format(self.offset)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crop_head = CropHead()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigmoid(x):\n",
+    "    return 1 / (1 + np.exp(-x))\n",
+    "\n",
+    "\n",
+    "def linear_windowing(img, window_width, window_length):\n",
+    "    \"\"\"\n",
+    "    Applies a linear window on an array\n",
+    "    :param img: Image array (in Hounsfield units)\n",
+    "    :param window_width:\n",
+    "    :param window_length:\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "    if window_width and window_length:\n",
+    "        lower = window_length - (window_width / 2)\n",
+    "        upper = window_length + (window_width / 2)\n",
+    "        img = np.clip(img, lower, upper)\n",
+    "        img = (img - lower) / (upper - lower)\n",
+    "        return (img*255).astype(np.uint8)\n",
+    "    else:\n",
+    "        return img\n",
+    "\n",
+    "\n",
+    "def sigmoid_windowing(img, window_width, window_length, u=255, epsilon=255):\n",
+    "    \"\"\"\n",
+    "    Applies a sigmoid window on an array\n",
+    "    From Practical Window Setting Optimization for Medical Image Deep Learning https://arxiv.org/pdf/1812.00572.pdf\n",
+    "    :param img: Image array (in Hounsfield units)\n",
+    "    :param window_width:\n",
+    "    :param window_length:\n",
+    "    :param u:\n",
+    "    :param epsilon:\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "    if window_width and window_length:\n",
+    "        weight = (2 / window_width) * np.log((u / epsilon) - 1)\n",
+    "        bias = (-2 * window_length / window_width) * np.log((u / epsilon) - 1)\n",
+    "        img = u * sigmoid(weight * img + bias)\n",
+    "        return img.astype(np.uint8)\n",
+    "    else:\n",
+    "        return img\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_dicom(dcm, default_window=False):\n",
+    "    \"\"\"\n",
+    "    Converts a DICOM object to a 16-bit Numpy array (in Housnfield units) or a uint8 image if the default window is used\n",
+    "    :param dcm: DICOM Object\n",
+    "    :param default_window: Flag to use the window settings specified in the metadata\n",
+    "    :return: Numpy array in either int16 or uint8\n",
+    "    \"\"\"\n",
+    "\n",
+    "    try:\n",
+    "        if dcm.BitsStored == 12 and dcm.PixelRepresentation == 0 and dcm.RescaleIntercept > -100:\n",
+    "            x = dcm.pixel_array + 1000\n",
+    "            px_mode = 4096\n",
+    "            x[x >= px_mode] = x[x >= px_mode] - px_mode\n",
+    "            dcm.PixelData = x.tobytes()\n",
+    "            dcm.RescaleIntercept = -1000\n",
+    "\n",
+    "        pixels = dcm.pixel_array.astype(np.float32) * dcm.RescaleSlope + dcm.RescaleIntercept\n",
+    "    except ValueError as e:\n",
+    "        print(\"ValueError with\", dcm.SOPInstanceUID, e)\n",
+    "        return np.zeros((512, 512))\n",
+    "\n",
+    "    # Pad the image if it isn't square\n",
+    "    if pixels.shape[0] != pixels.shape[1]:\n",
+    "        (a, b) = pixels.shape\n",
+    "        if a > b:\n",
+    "            padding = ((0, 0), ((a - b) // 2, (a - b) // 2))\n",
+    "        else:\n",
+    "            padding = (((b - a) // 2, (b - a) // 2), (0, 0))\n",
+    "        pixels = np.pad(pixels, padding, mode='constant', constant_values=0)\n",
+    "        # Return image windows as per the metadata parameters\n",
+    "    if default_window:\n",
+    "        width = dcm.WindowWidth\n",
+    "        if type(width) != pydicom.valuerep.DSfloat:\n",
+    "            width = width[0]\n",
+    "\n",
+    "        level = dcm.WindowCenter\n",
+    "        if type(level) != pydicom.valuerep.DSfloat:\n",
+    "            level = level[0]\n",
+    "\n",
+    "        img_windowed = linear_windowing(pixels, width, level)\n",
+    "        return img_windowed\n",
+    "    # Return array Hounsfield units only\n",
+    "    else:\n",
+    "        return pixels.astype(np.int16)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 126,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_png(dataset, folder_name, channels=(0, 1, 2), crop=False):\n",
+    "    \"\"\"\n",
+    "    Create PNG images using 3 specified window settings\n",
+    "    :param dataset: One of \"train\", \"test_stage_1\" or \"test_stage_2\"\n",
+    "    :param folder_name: Name of the output folder\n",
+    "    :param channels: Tuple to specifiy what windows to use for RGB channels\n",
+    "    :param crop: Flag to crop image to only the head\n",
+    "    :return:\n",
+    "    \"\"\"\n",
+    "\n",
+    "    start = time()\n",
+    "\n",
+    "    image_dirs = {\n",
+    "        \"train\": os.path.join(base_path, \"stage_2_train\"),\n",
+    "        \"test_stage_2\": os.path.join(base_path, \"stage_2_test\")\n",
+    "    }\n",
+    "\n",
+    "    windows = [\n",
+    "        (None, None),  # No windowing\n",
+    "        (80, 40),  # Brain\n",
+    "        (200, 80),  # Subdural\n",
+    "        (40, 40),  # Stroke\n",
+    "        (2800, 600),  # Temporal bone\n",
+    "        (380, 40),  # Soft tissue\n",
+    "        (2000, 600),  # Bone\n",
+    "    ]\n",
+    "\n",
+    "    output_path = os.path.join(data_path, \"png\", dataset, f\"{folder_name}\")\n",
+    "    crop_head = CropHead()\n",
+    "\n",
+    "    if not os.path.exists(output_path):\n",
+    "        os.makedirs(output_path)\n",
+    "\n",
+    "    for image_name in tqdm(os.listdir(image_dirs[dataset])):\n",
+    "        ds = pydicom.dcmread(os.path.join(image_dirs[dataset], image_name))\n",
+    "\n",
+    "        rgb = []\n",
+    "        for c in channels:\n",
+    "            if c == 0:\n",
+    "                ch = prepare_dicom(ds, default_window=False)\n",
+    "            else:\n",
+    "                ch = prepare_dicom(ds)\n",
+    "                ch = linear_windowing(ch, windows[c][0], windows[c][1])\n",
+    "            rgb.append(ch)\n",
+    "\n",
+    "        img = np.stack(rgb, -1)\n",
+    "\n",
+    "        if crop:\n",
+    "            x_min, x_max, y_min, y_max = crop_head.crop_extents(img > 0)\n",
+    "            img = img[x_min:x_max, y_min:y_max]\n",
+    "\n",
+    "            if img.shape[0] == 0 or img.shape[1] == 0:\n",
+    "                img = np.zeros(shape=(512, 512, 3), dtype=np.uint8)\n",
+    "\n",
+    "        im = Image.fromarray(img.astype(np.uint8))\n",
+    "        im.save(os.path.join(output_path, image_name[:-4] + \".png\"))\n",
+    "\n",
+    "    print(\"Done in\", (time() - start) // 60, \"minutes\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare 3 window images (brain-subdural-bone)\n",
+    "prepare_png(\"train\", \"brain-subdural-bone\", channels=(1, 2, 6), crop=True)\n",
+    "#prepare_png(\"test_stage_1\", \"brain-subdural-bone\", channels=(1, 2, 6), crop=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}