--- a
+++ b/.ipynb_checkpoints/3d-ircadb-01_util-checkpoint.ipynb
@@ -0,0 +1,378 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os, glob\n",
+    "import sys\n",
+    "import copy \n",
+    "import pydicom\n",
+    "import scipy\n",
+    "import scipy.misc\n",
+    "import numpy as np\n",
+    "import cv2\n",
+    "import imageio\n",
+    "from scipy.ndimage import rotate\n",
+    "from PIL import Image\n",
+    "from zipfile import ZipFile"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Download the 3D-IRCADb-01 dataset from https://www.ircad.fr/research/3d-ircadb-01/ and into a directory in the root of the repo and name it Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_path = \"Dataset\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#ONLY DO THIS STEP IF YOU'VE DOWNLOADED THE ENTIRE DATASET AT ONCE NOT PATIENT BY PATIENT\n",
+    "with ZipFile(os.path.join(dataset_path,\"3Dircadb1.zip\"), 'r') as zipObj:\n",
+    "        try:\n",
+    "            zipObj.extractall(dataset_path)\n",
+    "        except Exception as error:\n",
+    "            print(error)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_path = \"Dataset\" # You can change the path of the directory if you have the dataset elsewhere\n",
+    "# EXTRACT EVERY PATIENT'S .zip FILE\n",
+    "\n",
+    "for file in os.listdir(dataset_path):\n",
+    "    file_path = os.path.join(dataset_path, file)\n",
+    "    if file_path.endswith(\".zip\"):\n",
+    "        with ZipFile(file_path, 'r') as zipObj:\n",
+    "            try:\n",
+    "                zipObj.extractall(dataset_path)\n",
+    "            except Exception as error:\n",
+    "                print(error)\n",
+    "    \n",
+    "\n",
+    "# GO INTO THE JUST EXTRACTED DIRECTORIES AND EXTRACT THE .zip FILES THAT CONTAIN THE PATIENT SLICES AND MASKS\n",
+    "\n",
+    "for dir_path in os.listdir(dataset_path):\n",
+    "    dir_full_path = os.path.join(dataset_path, dir_path)\n",
+    "    if os.path.isdir(dir_full_path):\n",
+    "        if os.path.isfile(os.path.join(dir_full_path,\"PATIENT_DICOM.zip\")):\n",
+    "            with ZipFile(os.path.join(dir_full_path,\"PATIENT_DICOM.zip\"), 'r') as zipObj:\n",
+    "                zipObj.extractall(dir_full_path)  \n",
+    "\n",
+    "        if os.path.isfile(os.path.join(dir_full_path,\"MASKS_DICOM.zip\")):\n",
+    "            with ZipFile(os.path.join(dir_full_path,\"MASKS_DICOM.zip\"), 'r') as zipObj:\n",
+    "                zipObj.extractall(dir_full_path) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# CHANGE DATASET STRUCTURE\n",
+    "# PREFIX FILES IN THE DIRECTORY WITH THE PATIENT'S ID\n",
+    "for folder in os.listdir(\"Dataset\"):\n",
+    "    x = folder.split('db')\n",
+    "    folder_path = os.path.join('Dataset', folder)\n",
+    "    if os.path.isdir(folder_path):\n",
+    "        for subfolder in os.listdir(folder_path):\n",
+    "            subfolder_path = os.path.join(folder_path,subfolder)\n",
+    "            if os.path.isdir(subfolder_path):\n",
+    "                for sub_sub in os.listdir(subfolder_path):\n",
+    "                    if os.path.isdir(os.path.join(subfolder_path, sub_sub)):\n",
+    "                        for image in os.listdir(os.path.join(subfolder_path, sub_sub)):\n",
+    "                            src= os.path.join(subfolder_path, sub_sub, image)\n",
+    "                            dst = os.path.join(subfolder_path, sub_sub, x[1]+'_'+image)\n",
+    "                            os.rename(src, dst)\n",
+    "                            \n",
+    "                    src= os.path.join(subfolder_path, sub_sub)\n",
+    "                    dst = os.path.join(subfolder_path, x[1]+'_'+sub_sub)\n",
+    "                    os.rename(src, dst)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "directory_to_move = \"PATIENT_DICOM\"\n",
+    "desired_scans_path = os.path.join(\"train\", \"patients\")\n",
+    "desired_masks_path = os.path.join(\"train\", \"masks\")\n",
+    "try:  \n",
+    "    os.mkdir(\"train\")  \n",
+    "except OSError as error:  \n",
+    "    print(error)\n",
+    "\n",
+    "try:  \n",
+    "    os.mkdir(desired_scans_path)  \n",
+    "except OSError as error:  \n",
+    "    print(error)\n",
+    "\n",
+    "for patient_path in os.listdir(dataset_path):\n",
+    "    patient_full_path = os.path.join(dataset_path, patient_path)\n",
+    "    if os.path.isdir(patient_full_path):\n",
+    "        patient_scans_path = os.path.join(patient_full_path, \"PATIENT_DICOM\")\n",
+    "        if os.path.isdir(patient_scans_path):\n",
+    "            for scan in os.listdir(patient_scans_path):\n",
+    "                os.rename(os.path.join(patient_scans_path, scan), os.path.join(desired_scans_path, scan))\n",
+    "\n",
+    "\n",
+    "\n",
+    "try:  \n",
+    "    os.mkdir(desired_masks_path)  \n",
+    "except OSError as error:  \n",
+    "    print(error)\n",
+    "\n",
+    "for patient_path in os.listdir(dataset_path):\n",
+    "    patient_full_path = os.path.join(dataset_path, patient_path)\n",
+    "    if(os.path.isdir(patient_full_path)):\n",
+    "        patient_masks_path = os.path.join(patient_full_path, \"MASKS_DICOM\")\n",
+    "        if os.path.isdir(patient_masks_path):\n",
+    "            for scan in os.listdir(patient_masks_path):\n",
+    "                os.replace(os.path.join(patient_masks_path, scan), os.path.join(desired_masks_path, scan))\n",
+    "\n",
+    "try:  \n",
+    "    os.mkdir(os.path.join(desired_masks_path, \"merged_livertumors\"))  \n",
+    "except OSError as error:  \n",
+    "    print(error)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# COUNT THE NUMBER OF SCANS FOR 1 PATIENT TO APPEND ON THEM\n",
+    "def count_scans_startwith(directory, prefix):\n",
+    "    count = 0\n",
+    "    for file in os.listdir(directory):\n",
+    "        if file.startswith(prefix+'_'):\n",
+    "            count+=1\n",
+    "    return count"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# MERGE ALL LIVERTUMORS IN 1 MASK\n",
+    "def merge_livertumors(scans_path, masks_path):\n",
+    "    for scan in os.listdir(scans_path):\n",
+    "        patient_id = scan.split('_')[0]\n",
+    "        tumor_volume = None\n",
+    "        for mask_dir in os.listdir(masks_path):\n",
+    "            if mask_dir.startswith(patient_id+'_livertumor'):\n",
+    "                current_tumor = pydicom.dcmread(os.path.join(masks_path, mask_dir, scan)).pixel_array\n",
+    "                current_tumor = current_tumor/255.0\n",
+    "                current_tumor = np.clip(current_tumor,0,1)\n",
+    "                tumor_volume = current_tumor if tumor_volume is None else np.logical_or(tumor_volume,current_tumor)\n",
+    "    \n",
+    "        if tumor_volume is None:\n",
+    "            tumor_volume = np.zeros((512,512))\n",
+    "        tumor_volume = [[j*255 for j in i] for i in tumor_volume]\n",
+    "        \n",
+    "    \n",
+    "        tumor_volume = np.array(tumor_volume)\n",
+    "        tumor_volume = tumor_volume.astype(np.uint8)\n",
+    "        im = Image.fromarray(tumor_volume)\n",
+    "        im.save(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'))\n",
+    "#         imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', scan+'.jpg'), tumor_volume)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# REFLECT IMAGE AND MASK TO AUGMENT DATA\n",
+    "def reflect_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count):\n",
+    "    reflected_img = copy.deepcopy(src_img)\n",
+    "    reflected_img.PixelData = np.fliplr(reflected_img.pixel_array).tobytes()\n",
+    "    \n",
+    "\n",
+    "    reflected_liver_mask = copy.deepcopy(src_liver_mask)\n",
+    "    reflected_liver_mask.PixelData = np.fliplr(reflected_liver_mask.pixel_array).tobytes()\n",
+    "    reflected_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))\n",
+    "            \n",
+    "    reflected_mask = np.fliplr(src_mask)\n",
+    "    \n",
+    "   \n",
+    "    reflected_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augref'))\n",
+    "\n",
+    "    reflected_mask = reflected_mask.astype(np.uint8)\n",
+    "    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augref.jpg'), reflected_mask)\n",
+    "                    \n",
+    "    \n",
+    "# ROTATE IMAGE AND MASK TO AUGMENT DATA\n",
+    "def rotate_dicom(src_img, src_mask, src_liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle):\n",
+    "    rotated_img = copy.deepcopy(src_img)\n",
+    "    rotated_img.PixelData = rotate(rotated_img.pixel_array, angle, reshape=False).tobytes()\n",
+    "    \n",
+    "\n",
+    "    rotated_liver_mask = copy.deepcopy(src_liver_mask)\n",
+    "    rotated_liver_mask.PixelData = rotate(rotated_liver_mask.pixel_array, angle, reshape=False).tobytes()\n",
+    "    rotated_liver_mask.save_as(os.path.join(liver_mask_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))\n",
+    "    \n",
+    "    rotated_mask = rotate(src_mask, angle, reshape=False)\n",
+    "    \n",
+    "\n",
+    "    rotated_img.save_as(os.path.join(train_path, patient_id+'_image_'+str(patient_imgs_count)+'_augrot'))\n",
+    "    \n",
+    "    rotated_mask = rotated_mask.astype(np.uint8)\n",
+    "    imageio.imwrite(os.path.join(masks_path, 'merged_livertumors', patient_id+'_image_'+str(patient_imgs_count)+'_augrot.jpg'), rotated_mask)    \n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# AUGMENT THE MASKS WITH TUMORS TO FIX CLASS IMBALANCING\n",
+    "def augment_dicom(train_path, masks_path):\n",
+    "    rotation_angles = [x * 10 for x in range(36) if x > 0]\n",
+    "    train_files = copy.deepcopy(os.listdir(train_path))\n",
+    "    \n",
+    "    for scan in train_files:\n",
+    "        mask_path = os.path.join(masks_path, 'merged_livertumors', scan+'.jpg')\n",
+    "        tumor_mask = cv2.imread(mask_path)\n",
+    "        if 1 in tumor_mask:\n",
+    "            patient_id = scan.split('_')[0]\n",
+    "            patient_imgs_count = count_scans_startwith(train_path, patient_id)\n",
+    "            original_img = pydicom.dcmread(os.path.join(train_path, scan)) \n",
+    "            liver_mask_path = os.path.join(masks_path, patient_id+'_liver')\n",
+    "            liver_mask = pydicom.dcmread(os.path.join(liver_mask_path, scan))\n",
+    "            reflect_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count)\n",
+    "            \n",
+    "            for angle in rotation_angles:\n",
+    "                patient_imgs_count += 1 \n",
+    "                rotate_dicom(original_img, tumor_mask, liver_mask, train_path, masks_path, liver_mask_path, patient_id, patient_imgs_count, angle)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "done\n"
+     ]
+    }
+   ],
+   "source": [
+    "masks_path = os.path.join('train','masks')\n",
+    "training_path = os.path.join('train','patients')\n",
+    "\n",
+    "augment_dicom(training_path, masks_path)\n",
+    "# merge_livertumors(training_path, masks_path)\n",
+    "print('done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "20974\n"
+     ]
+    }
+   ],
+   "source": [
+    "count = 0\n",
+    "for mask in os.listdir(os.path.join(masks_path,'merged_livertumors')):\n",
+    "    tumor_mask = cv2.imread(os.path.join(masks_path,'merged_livertumors', mask))\n",
+    "    if 1 in tumor_mask:\n",
+    "        count += 1\n",
+    "print(count)\n",
+    "# 568"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "done\n"
+     ]
+    }
+   ],
+   "source": [
+    "# REMOVE AUGMENTED DATA\n",
+    "for scan in os.listdir(training_path):\n",
+    "    if scan.endswith('_augref') or scan.endswith('_augrot'):\n",
+    "        os.remove(os.path.join(training_path,scan))\n",
+    "\n",
+    "for mask_dir in os.listdir(masks_path):\n",
+    "    if mask_dir.endswith('liver'):\n",
+    "        mask_dir_path = os.path.join(masks_path, mask_dir)\n",
+    "        for liver_mask in os.listdir(mask_dir_path):\n",
+    "            if liver_mask.endswith('augref') or liver_mask.endswith('augrot') or liver_mask.endswith('aug'):\n",
+    "                os.remove(os.path.join(mask_dir_path, liver_mask))\n",
+    "\n",
+    "for mask in os.listdir(os.path.join(masks_path, 'merged_livertumors')):\n",
+    "    if mask.endswith('_augref.jpg') or mask.endswith('augrot.jpg'):\n",
+    "        os.remove(os.path.join(masks_path, 'merged_livertumors', mask))\n",
+    "print('done')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}