Switch to side-by-side view

--- a
+++ b/Experimentations/Exp15-Human expert comparison with model.ipynb
@@ -0,0 +1,753 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 15. Experimentation: Analysis of Human-Expert Level Performance with the Model's performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "from torch.autograd import Variable\n",
+    "import torch.optim as optim\n",
+    "import torchvision\n",
+    "from torchvision import datasets, models\n",
+    "from torchvision import transforms as T\n",
+    "from torch.utils.data import DataLoader, Dataset\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import os\n",
+    "import time\n",
+    "import pandas as pd\n",
+    "from skimage import io, transform\n",
+    "import matplotlib.image as mpimg\n",
+    "from PIL import Image\n",
+    "from sklearn.metrics import roc_auc_score\n",
+    "import torch.nn.functional as F\n",
+    "import scipy\n",
+    "import random\n",
+    "import pickle\n",
+    "import scipy.io as sio\n",
+    "import itertools\n",
+    "from scipy.ndimage.interpolation import shift\n",
+    "import copy\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "%matplotlib inline\n",
+    "plt.ion()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import Dataloader Class and other utilities"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from dataloader_2d import *\n",
+    "from dataloader_3d import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Build Data loader objects"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_path = '/beegfs/ark576/new_knee_data/train'\n",
+    "val_path = '/beegfs/ark576/new_knee_data/val'\n",
+    "test_path = '/beegfs/ark576/new_knee_data/test'\n",
+    "\n",
+    "train_file_names = sorted(pickle.load(open(train_path + '/train_file_names.p','rb')))\n",
+    "val_file_names = sorted(pickle.load(open(val_path + '/val_file_names.p','rb')))\n",
+    "test_file_names = sorted(pickle.load(open(test_path + '/test_file_names.p','rb')))\n",
+    "\n",
+    "transformed_dataset = {'train': KneeMRIDataset(train_path,train_file_names, train_data= True, flipping=False, normalize= True),\n",
+    "                       'validate': KneeMRIDataset(val_path,val_file_names, normalize= True),\n",
+    "                       'test': KneeMRIDataset(test_path,test_file_names, normalize= True)\n",
+    "                                          }\n",
+    "\n",
+    "dataloader = {x: DataLoader(transformed_dataset[x], batch_size=5,\n",
+    "                        shuffle=True, num_workers=0) for x in ['train', 'validate','test']}\n",
+    "data_sizes ={x: len(transformed_dataset[x]) for x in ['train', 'validate','test']}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "im, seg_F, seg_P, seg_T,_ = next(iter(dataloader['train']))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Find Max and min values of Images (all 7 contrasts), of Fractional Anisotropy maps and of Mean Diffusivity maps for image normalization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "min_fa = np.inf\n",
+    "min_md = np.inf\n",
+    "min_image = np.inf\n",
+    "max_fa = 0\n",
+    "max_md = 0\n",
+    "max_image = 0\n",
+    "for data in dataloader['train']:\n",
+    "    if min_fa > torch.min(data[0][:,7,:,:]):\n",
+    "        min_fa = torch.min(data[0][:,7,:,:])\n",
+    "    if min_md > torch.min(data[0][:,8,:,:]):\n",
+    "        min_md = torch.min(data[0][:,8:,:])\n",
+    "    if min_image > torch.min(data[0][:,:7,:,:]):\n",
+    "        min_image = torch.min(data[0][:,:7,:,:])\n",
+    "    if max_fa < torch.max(data[0][:,7,:,:]):\n",
+    "        max_fa = torch.max(data[0][:,7,:,:])\n",
+    "    if max_md < torch.max(data[0][:,8,:,:]):\n",
+    "        max_md = torch.max(data[0][:,8,:,:])\n",
+    "    if max_image < torch.max(data[0][:,:7,:,:]):\n",
+    "        max_image = torch.max(data[0][:,:7,:,:])\n",
+    "norm_values = (max_image, min_image, max_fa, min_fa, max_md, min_md)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Import Models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from unet_3d import *\n",
+    "from unet_basic_dilated import *\n",
+    "from vnet import *\n",
+    "from ensemble_model import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "seg_sum = torch.zeros(3)\n",
+    "for i, data in enumerate(dataloader['train']):\n",
+    "    input, segF, segP, segT,_ = data\n",
+    "    seg_sum[0] += torch.sum(segF)\n",
+    "    seg_sum[1] += torch.sum(segP)\n",
+    "    seg_sum[2] += torch.sum(segT)\n",
+    "mean_s_sum = seg_sum/i"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import Loss functions and all other utility functions like functions for saving models, for visualizing images, etc."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from utils import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Import all the Training and evaluate functions to evaluate the models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "from train_2d import *\n",
+    "from train_3d import *\n",
+    "from train_ensemble import *\n",
+    "from evaluate_2d import *\n",
+    "from evaluate_3d import *\n",
+    "from evaluate_ensemble import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 15. Experimentation: Analysis of Human-Expert Level Performance with the Model's performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 182,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "keys2 = ['MDnr','FAnr','NUFnr','SegmentationF', 'SegmentationT', 'SegmentationP','S0nr']\n",
+    "keys0 = ['DXnr','DYnr','phasenr','errorner']\n",
+    "keys1 = ['EigenValuenr','EigenVectornr']\n",
+    "new_path = '/beegfs/ark576/new_knee_data/train/'\n",
+    "for i in new_test_file:\n",
+    "    image_data = sio.loadmat('/beegfs/ark576/new-data-apr-24/test data' +'/'+ i)\n",
+    "    for j in range(15):\n",
+    "        new_image_data ={}\n",
+    "        for k,v in image_data.items():\n",
+    "            if k in keys2:\n",
+    "                if len(v.shape) == 4:\n",
+    "                    new_image_data[k] = v[:,:,j,:]\n",
+    "                if len(v.shape) == 3:\n",
+    "                    new_image_data[k] = v[:,:,j]\n",
+    "            if k in keys1:\n",
+    "                new_image_data[k] = v[:,j]\n",
+    "            if k in keys0:\n",
+    "                new_image_data[k] = v[j,:]\n",
+    "        pickle.dump(new_image_data,open('/beegfs/ark576/new-data-apr-24/test data/mod_test/'+i+str(j)+'.p','wb'))\n",
+    "# pickle.dump(new_test_file_names,open(test_path+'/test_file_names.p','wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "new_file = sorted(os.listdir('/beegfs/ark576/new-data-apr-24/test data/mod_test/'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 195,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "existing_test_path = '/beegfs/ark576/new_knee_data/test/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 186,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "existing_test_file_names = sorted(os.listdir(existing_test_path))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 189,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "new_test_file_names = sorted(existing_test_file_names + new_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 190,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "105"
+      ]
+     },
+     "execution_count": 190,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(new_test_file_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 196,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "pickle.dump(new_test_file_names,open(existing_test_path+'test_file_names.p','wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 84,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "example = sio.loadmat('/beegfs/ark576/Knee Cartilage Data/Train Data/FirstEvalautionJR_Pat54_LeftKL1.mat')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_file_names = sorted(os.listdir('/beegfs/ark576/new_knee_data/train/'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 98,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "final_train_files = train_file_names[:-5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "pickle.dump(final_train_files,open('/beegfs/ark576/new_knee_data/train/train_file_names.p','wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(256, 256, 15)"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "variables = pickle.load(open(variable_path_name,'rb')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_full_file_names = os.listdir('/beegfs/ark576/Knee Cartilage Data/Train Data/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_full_file_names.remove('train_file_names')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "pickle.dump(train_full_file_names, open('train_file_names','wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "test_full_file_names = os.listdir('/beegfs/ark576/Knee Cartilage Data/Test Data/')\n",
+    "test_full_file_names.remove('test_file_names')\n",
+    "pickle.dump(test_full_file_names, open('test_file_names','wb'))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(test_full_file_names)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "resegmentation_path = '/beegfs/ark576/new-data-apr-24/resegmentation/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "resegmentation_files = os.listdir(resegmentation_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['2017.09.20_MeasRO1_Pat47_Left_KL_SegmentedEvaluationAD.mat',\n",
+       " '2016.08.11_MeasRO1_Pat13_LeftKL1_SegmentedEvaluationAD.mat',\n",
+       " '2016.08.15_MeasRO1_Pat12_RightKL1_SegmentedEvaluationAD.mat',\n",
+       " '2017.09.29_MeasRO1_Pat4_RightKL1_FU_SegmentedEvaluationAD.mat',\n",
+       " '2017.10.13_MeasRO1_Pat9_RightKL1_FU_SegmentedEvaluationAD.mat',\n",
+       " '2016.08.08_MeasRO1_Pat11_LeftKL1_SegmentedEvaluationAD.mat',\n",
+       " '2017.10.14_MeasRO1_Pat51_RightKL1_SegmentedEvaluationAD.mat',\n",
+       " '2016.09.06_MeasRO1_Pat14_LeftKL1_Meas1_SegmentedEvaluationAD.mat',\n",
+       " '2016.12.19_RadialRO1_Pat15_LeftKL1_SegmentedEvaluationAD.mat',\n",
+       " '2017.09.20_MeasRO1_Pat29_LeftKL1_SegmentedEvaluationAD.mat']"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "resegmentation_files"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_file_path = '/beegfs/ark576/Knee Cartilage Data/Train Data/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "train_files = os.listdir(train_file_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "dice_score_F = 0\n",
+    "dice_score_T = 0\n",
+    "dice_score_P = 0\n",
+    "reseg_dc_sr = {0:[],1:[],2:[]}\n",
+    "for i in resegmentation_files:\n",
+    "    resegmented_image = sio.loadmat(resegmentation_path+i)\n",
+    "    original_image = sio.loadmat(train_file_path+i)\n",
+    "    reseg_F = resegmented_image['SegmentationF']\n",
+    "    reseg_T = resegmented_image['SegmentationT']\n",
+    "    reseg_P = resegmented_image['SegmentationP']\n",
+    "    ori_F = original_image['SegmentationF']\n",
+    "    ori_T = original_image['SegmentationT']\n",
+    "    ori_P = original_image['SegmentationP']\n",
+    "    for j in range(15):\n",
+    "        te_re_F = reseg_F[:,:,j]\n",
+    "        te_or_F = ori_F[:,:,j]\n",
+    "        temp_dice_F = (2*(np.sum(te_re_F*te_or_F)) + 1e-4)/(np.sum(te_or_F + te_re_F) + 1e-4)\n",
+    "        dice_score_F += temp_dice_F\n",
+    "        reseg_dc_sr[0].append(temp_dice_F)\n",
+    "    for k in range(15):\n",
+    "        te_re_T = reseg_T[:,:,k]\n",
+    "        te_or_T = ori_T[:,:,k]\n",
+    "        temp_dice_T = (2*(np.sum(te_re_T*te_or_T)) + 1e-4)/(np.sum(te_or_T + te_re_T) + 1e-4)\n",
+    "        dice_score_T += temp_dice_T\n",
+    "        reseg_dc_sr[2].append(temp_dice_T)\n",
+    "    for l in range(15):\n",
+    "        te_re_P = reseg_P[:,:,l]\n",
+    "        te_or_P = ori_P[:,:,l]\n",
+    "        temp_dice_P = (2*(np.sum(te_re_P*te_or_P)) + 1e-4)/(np.sum(te_or_P + te_re_P) + 1e-4)\n",
+    "        dice_score_P += temp_dice_P\n",
+    "        reseg_dc_sr[1].append(temp_dice_P)\n",
+    "dice_score_F /= 15*5*2\n",
+    "dice_score_T /= 15*5*2\n",
+    "dice_score_P /= 15*5*2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.70956598319055086"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dice_score_F"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.75458329820914749"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dice_score_P"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.60834781778619007"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dice_score_T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 276,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x2b86b2548908>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x2b86b1915d68>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "\n",
+      "text/plain": [
+       "<matplotlib.figure.Figure at 0x2b86b11ddc18>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "def compare_plots(valid,reseg):\n",
+    "    for i in range(3):\n",
+    "        print(i)\n",
+    "        f, axarr = plt.subplots(1,2,sharey=True)\n",
+    "        f.set_size_inches(15, 5, forward=True)\n",
+    "        axarr[0].hist(valid[i],label = \"Model's prediction on the validation sample\",color = 'r', normed = True)\n",
+    "        axarr[1].hist(reseg[i], label = 'Resegmentation by Human Expert',normed=True)\n",
+    "        f.legend()\n",
+    "        axarr[0].set_xlabel('Dice Score')\n",
+    "        axarr[0].set_ylabel('Fraction of number of images')\n",
+    "        plt.xlabel('Dice Score')\n",
+    "        plt.ylabel('Fraction of number of images')\n",
+    "        plt.show()\n",
+    "compare_plots(dice_hist_smooth,reseg_dc_sr)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Discussion\n",
+    "As it can be seen above, the distribution of dice scores for the model's prediction and resegmented images by a human expert are very similar. It suggests the model and human are making similar errors and also focussing on similar kind of information in an image while segmenting it."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}