Diff of /4DetectNodules.ipynb [000000] .. [467f44]

Switch to side-by-side view

--- a
+++ b/4DetectNodules.ipynb
@@ -0,0 +1,889 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# 4. Detect Nodules from Kaggle Dataset\n",
+    "\n",
+    "## Summary\n",
+    "* load and process kaggle dataset\n",
+    "* Generate prediction masks with trained unet\n",
+    "* Reduce false positives with trained CNN"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:64: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:66: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:69: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:71: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:74: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:76: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:79: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:81: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:84: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:86: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(512, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:88: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\keras\\legacy\\layers.py:458: UserWarning: The `Merge` layer is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
+      "  name=name)\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:90: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:91: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(256, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:93: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:95: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:96: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(128, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:98: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:100: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:101: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(64, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:103: UserWarning: The `merge` function is deprecated and will be removed after 08/2017. Use instead layers from `keras.layers.merge`, e.g. `add`, `concatenate`, etc.\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:105: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:106: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(32, (3, 3), activation=\"relu\", padding=\"same\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:107: UserWarning: Update your `Conv2D` call to the Keras 2 API: `Conv2D(1, (1, 1), activation=\"sigmoid\")`\n",
+      "c:\\programdata\\anaconda3\\envs\\tensorflow\\lib\\site-packages\\ipykernel_launcher.py:109: UserWarning: Update your `Model` call to the Keras 2 API: `Model(outputs=Tensor(\"co..., inputs=Tensor(\"in...)`\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+    "### EDIT HERE ###\n",
+    "\n",
+    "unetweightspath=\"modelweights/unet-weights-improvement.hdf5\"\n",
+    "truenoduleweightspath=\"modelweights/truenodule-cnn-weights-improvement.hdf5\"\n",
+    "INPUT_FOLDER = 'stage1/' #path to kaggle stage1 dataset\n",
+    "datafolder=\"processeddata/\"\n",
+    "\n",
+    "####################\n",
+    "import numpy as np # linear algebra\n",
+    "\n",
+    "import matplotlib.pyplot as plt\n",
+    "import dicom\n",
+    "import os\n",
+    "import scipy.ndimage\n",
+    "import time\n",
+    "from keras.callbacks import ModelCheckpoint\n",
+    "import h5py\n",
+    "from sklearn.cluster import KMeans\n",
+    "from skimage import measure, morphology\n",
+    "import cell_magic_wand as cmw\n",
+    "import numpy as np\n",
+    "import csv\n",
+    "import random\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.ensemble import RandomForestClassifier as RF\n",
+    "from sklearn.metrics import confusion_matrix, classification_report\n",
+    "from scipy.ndimage.measurements import center_of_mass, label\n",
+    "from skimage.measure import regionprops\n",
+    "\n",
+    "import keras\n",
+    "from keras.models import Sequential,load_model,Model\n",
+    "from keras.layers import Dense, Dropout, Activation, Flatten\n",
+    "from keras.layers import Conv2D, MaxPooling2D, SpatialDropout2D\n",
+    "from keras.layers import Input, merge, UpSampling2D, BatchNormalization\n",
+    "from keras.optimizers import Adam\n",
+    "from keras.callbacks import EarlyStopping, ModelCheckpoint\n",
+    "from keras import backend as K\n",
+    "from keras.utils import to_categorical\n",
+    "from keras.datasets import mnist\n",
+    "from keras.models import Sequential\n",
+    "from keras import backend as K\n",
+    "from keras.optimizers import Adam\n",
+    "# Some constants \n",
+    "\n",
+    "patients = os.listdir(INPUT_FOLDER)\n",
+    "#patients=patients.sort()\n",
+    "K.set_image_dim_ordering('th') \n",
+    "\n",
+    "#Code sourced from https://www.kaggle.com/c/data-science-bowl-2017#tutorial\n",
+    "smooth = 1.0\n",
+    "width = 32\n",
+    "\n",
+    "def dice_coef(y_true, y_pred):\n",
+    "    y_true_f = K.flatten(y_true)\n",
+    "    y_pred_f = K.flatten(y_pred)\n",
+    "    intersection = K.sum(y_true_f * y_pred_f)\n",
+    "    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)\n",
+    "def dice_coef_loss(y_true, y_pred):\n",
+    "    return -dice_coef(y_true, y_pred)\n",
+    "\n",
+    "def unet_model():\n",
+    "    inputs = Input((1, 512, 512))\n",
+    "    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(inputs)\n",
+    "    conv1 = BatchNormalization(axis = 1)(conv1)\n",
+    "    conv1 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv1)\n",
+    "    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)\n",
+    "\n",
+    "    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(pool1)\n",
+    "    conv2 = BatchNormalization(axis = 1)(conv2)\n",
+    "    conv2 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv2)\n",
+    "    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)\n",
+    "\n",
+    "    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(pool2)\n",
+    "    conv3 = BatchNormalization(axis = 1)(conv3)\n",
+    "    conv3 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv3)\n",
+    "    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)\n",
+    "\n",
+    "    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(pool3)\n",
+    "    conv4 = BatchNormalization(axis = 1)(conv4)\n",
+    "    conv4 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv4)\n",
+    "    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)\n",
+    "\n",
+    "    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(pool4)\n",
+    "    conv5 = BatchNormalization(axis = 1)(conv5)\n",
+    "    conv5 = Conv2D(width*16, 3, 3, activation='relu', border_mode='same')(conv5)\n",
+    "\n",
+    "    up6 = merge([UpSampling2D(size=(2, 2))(conv5), conv4], mode='concat', concat_axis=1)\n",
+    "    conv6 = SpatialDropout2D(0.35)(up6)\n",
+    "    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)\n",
+    "    conv6 = Conv2D(width*8, 3, 3, activation='relu', border_mode='same')(conv6)\n",
+    "\n",
+    "    up7 = merge([UpSampling2D(size=(2, 2))(conv6), conv3], mode='concat', concat_axis=1)\n",
+    "    conv7 = SpatialDropout2D(0.35)(up7)\n",
+    "    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)\n",
+    "    conv7 = Conv2D(width*4, 3, 3, activation='relu', border_mode='same')(conv7)\n",
+    "\n",
+    "    up8 = merge([UpSampling2D(size=(2, 2))(conv7), conv2], mode='concat', concat_axis=1)\n",
+    "    conv8 = SpatialDropout2D(0.35)(up8)\n",
+    "    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)\n",
+    "    conv8 = Conv2D(width*2, 3, 3, activation='relu', border_mode='same')(conv8)\n",
+    "\n",
+    "    up9 = merge([UpSampling2D(size=(2, 2))(conv8), conv1], mode='concat', concat_axis=1)\n",
+    "    conv9 = SpatialDropout2D(0.35)(up9)\n",
+    "    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)\n",
+    "    conv9 = Conv2D(width, 3, 3, activation='relu', border_mode='same')(conv9)\n",
+    "    conv10 = Conv2D(1, 1, 1, activation='sigmoid')(conv9)\n",
+    "\n",
+    "    model = Model(input=inputs, output=conv10)\n",
+    "    model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss, metrics=[dice_coef])\n",
+    "    return model\n",
+    "\n",
+    "unet_model=unet_model()\n",
+    "unet_model.load_weights(unetweightspath)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "#classify as nodule or non-nodule\n",
+    "input_shape=(1,512,512)\n",
+    "num_classes=2\n",
+    "model = Sequential()\n",
+    "model.add(Conv2D(8, kernel_size=(3, 3),\n",
+    "                 activation='relu',\n",
+    "                 input_shape=input_shape))\n",
+    "model.add(Conv2D(16, (3, 3), activation='relu'))\n",
+    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
+    "model.add(Dropout(0.25))\n",
+    "model.add(Flatten())\n",
+    "model.add(Dense(32, activation='relu'))\n",
+    "model.add(Dropout(0.5))\n",
+    "model.add(Dense(num_classes, activation='softmax'))\n",
+    "\n",
+    "model.compile(loss=keras.losses.binary_crossentropy,\n",
+    "              optimizer=Adam(lr=1e-5),\n",
+    "              metrics=['accuracy'])\n",
+    "\n",
+    "\n",
+    "model.load_weights(truenoduleweightspath)\n",
+    "#os.environ[\"PATH\"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'\n",
+    "#plot_model(model, to_file=\"CNNdiagram.png\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "# Load the scans in given folder path\n",
+    "def load_scan(path):\n",
+    "    # code sourced from https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial\n",
+    "    slices = [dicom.read_file(path + '/' + s, force=True) for s in os.listdir(path) if s.endswith('.dcm')]\n",
+    "    slices.sort(key = lambda x: float(x.ImagePositionPatient[2]), reverse=True)\n",
+    "    try:\n",
+    "        slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])\n",
+    "    except:\n",
+    "        slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)\n",
+    "        \n",
+    "    for s in slices:\n",
+    "        s.SliceThickness = slice_thickness\n",
+    "        \n",
+    "    return slices\n",
+    "\n",
+    "def get_pixels_hu(slices):\n",
+    "    #code sourced from https://www.kaggle.com/gzuidhof/full-preprocessing-tutorial\n",
+    "    image = np.stack([s.pixel_array for s in slices])\n",
+    "    # Convert to int16 (from sometimes int16), \n",
+    "    # should be possible as values should always be low enough (<32k)\n",
+    "    image = image.astype(np.int16)\n",
+    "\n",
+    "    # Set outside-of-scan pixels to 0\n",
+    "    # The intercept is usually -1024, so air is approximately 0\n",
+    "    image[image == -2000] = 0\n",
+    "    \n",
+    "    # Convert to Hounsfield units (HU)\n",
+    "    for slice_number in range(len(slices)):\n",
+    "        \n",
+    "        intercept = slices[slice_number].RescaleIntercept\n",
+    "        slope = slices[slice_number].RescaleSlope\n",
+    "        \n",
+    "        if slope != 1:\n",
+    "            image[slice_number] = slope * image[slice_number].astype(np.float64)\n",
+    "            image[slice_number] = image[slice_number].astype(np.int16)\n",
+    "            \n",
+    "        image[slice_number] += np.int16(intercept)\n",
+    "    \n",
+    "    return np.array(image, dtype=np.int16)\n",
+    "\n",
+    "def processimage(img):\n",
+    "    #function sourced from https://www.kaggle.com/c/data-science-bowl-2017#tutorial\n",
+    "    #Standardize the pixel values\n",
+    "    mean = np.mean(img)\n",
+    "    std = np.std(img)\n",
+    "    img = img-mean\n",
+    "    img = img/std\n",
+    "    #plt.hist(img.flatten(),bins=200)\n",
+    "    #plt.show()\n",
+    "    #print(thresh_img[366][280:450])\n",
+    "    middle = img[100:400,100:400] \n",
+    "    mean = np.mean(middle)  \n",
+    "    max = np.max(img)\n",
+    "    min = np.min(img)\n",
+    "    #move the underflow bins\n",
+    "    img[img==max]=mean\n",
+    "    img[img==min]=mean\n",
+    "    kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))\n",
+    "    centers = sorted(kmeans.cluster_centers_.flatten())\n",
+    "    threshold = np.mean(centers)\n",
+    "    thresh_img = np.where(img<threshold,1.0,0.0)  # threshold the image\n",
+    "    eroded = morphology.erosion(thresh_img,np.ones([4,4]))\n",
+    "    dilation = morphology.dilation(eroded,np.ones([10,10]))\n",
+    "    labels = measure.label(dilation)\n",
+    "    label_vals = np.unique(labels)\n",
+    "    #plt.imshow(labels)\n",
+    "    #plt.show()\n",
+    "    labels = measure.label(dilation)\n",
+    "    label_vals = np.unique(labels)\n",
+    "    regions = measure.regionprops(labels)\n",
+    "    good_labels = []\n",
+    "    for prop in regions:\n",
+    "        B = prop.bbox\n",
+    "        if B[2]-B[0]<475 and B[3]-B[1]<475 and B[0]>40 and B[2]<472:\n",
+    "            good_labels.append(prop.label)\n",
+    "    mask = np.ndarray([512,512],dtype=np.int8)\n",
+    "    mask[:] = 0\n",
+    "    #\n",
+    "    #  The mask here is the mask for the lungs--not the nodes\n",
+    "    #  After just the lungs are left, we do another large dilation\n",
+    "    #  in order to fill in and out the lung mask \n",
+    "    #\n",
+    "    for N in good_labels:\n",
+    "        mask = mask + np.where(labels==N,1,0)\n",
+    "    mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation\n",
+    "    return mask*img\n",
+    "\n",
+    "def processimagenomask(img):\n",
+    "    #Standardize the pixel values\n",
+    "    mean = np.mean(img)\n",
+    "    std = np.std(img)\n",
+    "    img = img-mean\n",
+    "    img = img/std\n",
+    "    #plt.hist(img.flatten(),bins=200)\n",
+    "    #plt.show()\n",
+    "    #print(thresh_img[366][280:450])\n",
+    "    middle = img[100:400,100:400] \n",
+    "    mean = np.mean(middle)  \n",
+    "    max = np.max(img)\n",
+    "    min = np.min(img)\n",
+    "    #move the underflow bins\n",
+    "    img[img==max]=mean\n",
+    "    img[img==min]=mean\n",
+    "    return img\n",
+    "\n",
+    "def processimagefromfile(ppix):\n",
+    "    processpix=np.ndarray([ppix.shape[0],1,512,512])\n",
+    "    for i in range(ppix.shape[0]):\n",
+    "        processpix[i,0]=processimage(ppix[i])\n",
+    "    return processpix\n",
+    "\n",
+    "#predict mask from images\n",
+    "def predictmask(images):\n",
+    "    num_test=images.shape[0]\n",
+    "    imgs_mask_test = np.ndarray([num_test,1,512,512],dtype=np.float32)\n",
+    "    for i in range(num_test):\n",
+    "        imgs_mask_test[i] = unet_model.predict([images[i:i+1]], verbose=0)[0]\n",
+    "    return imgs_mask_test\n",
+    "\n",
+    "#find number of slices where a nodule is detected\n",
+    "def getnoduleindex(imgs_mask_test):\n",
+    "    masksum=[np.sum(maskslice[0]) for maskslice in imgs_mask_test]\n",
+    "    return [i for i in range(len(masksum)) if masksum[i]>5]\n",
+    "\n",
+    "def trueindicies(processed_pix, noduleindex):\n",
+    "    noduleimgs=[processed_pix[ind] for ind in noduleindex]\n",
+    "    noduleimgs=np.array(noduleimgs)\n",
+    "    predictions=model.predict(noduleimgs)\n",
+    "    predictions=predictions[:len(predictions),1]\n",
+    "    predictions[predictions>0.5]=True\n",
+    "    predictions[predictions<0.5]=False\n",
+    "    trueindicies=[ind for i,ind in enumerate(noduleindex) if predictions[i]==1]\n",
+    "    return trueindicies\n",
+    "\n",
+    "def thresholdnodules(noduleindices,mask):\n",
+    "    nodulearea=[]\n",
+    "    for ind in trueindicies:\n",
+    "        nodulearea.append(np.sum(mask[ind]))\n",
+    "    return nodulearea\n",
+    "\n",
+    "def largestnodulecoordinates(mask):\n",
+    "    #mask=nodulemasks[indx,0][0]\n",
+    "    mask[mask>0.5]=1\n",
+    "    mask[mask<0.5]=0\n",
+    "    labeled_array,nf=label(mask)\n",
+    "    areasinslice=[]\n",
+    "    if nf>1:\n",
+    "        for n in range(nf):\n",
+    "            lab=np.array(labeled_array)\n",
+    "            lab[lab!=(n+1)]=0\n",
+    "            lab[lab==(n+1)]=1\n",
+    "            areasinslice.append(np.sum(lab))\n",
+    "        nlargest=areasinslice.index(max(areasinslice))\n",
+    "        labeled_array[labeled_array!=(nlargest+1)]=0\n",
+    "        com=center_of_mass(labeled_array)\n",
+    "    else:\n",
+    "        com=center_of_mass(mask)\n",
+    "    return [int(com[0]),int(com[1])]\n",
+    "\n",
+    "def largestnodulearea(mask,table,i):\n",
+    "    #mask=nodulemasks[indx,0][0]\n",
+    "    mask[mask>0.5]=1\n",
+    "    mask[mask<0.5]=0\n",
+    "    labeled_array,nf=label(mask)\n",
+    "    areasinslice=[]\n",
+    "    if nf>1:\n",
+    "        for n in range(nf):\n",
+    "            lab=np.array(labeled_array)\n",
+    "            lab[lab!=(n+1)]=0\n",
+    "            lab[lab==(n+1)]=1\n",
+    "            areasinslice.append(np.sum(lab))\n",
+    "        #nlargest=areasinslice.index(max(areasinslice))\n",
+    "        #labeled_array[labeled_array!=(nlargest+1)]=0\n",
+    "        return max(areasinslice)\n",
+    "    else:\n",
+    "        return table[\"Area\"][i]\n",
+    "\n",
+    "def crop_nodule(coord,image):\n",
+    "    dim=32\n",
+    "    return image[coord[0]-dim:coord[0]+dim,coord[1]-dim:coord[1]+dim]\n",
+    "#output: 64x64 images of the nodules with malignancy labels from the patient\n",
+    "\n",
+    "def largestnoduleproperties(mask):\n",
+    "    mask[mask>0.5]=1\n",
+    "    mask[mask<0.5]=0\n",
+    "    mask=mask.astype(np.int8)\n",
+    "    labeled_array,nf=label(mask)\n",
+    "    areasinslice=[]\n",
+    "    if nf>1:\n",
+    "        for n in range(nf):\n",
+    "            lab=np.array(labeled_array)\n",
+    "            lab[lab!=(n+1)]=0\n",
+    "            lab[lab==(n+1)]=1\n",
+    "            areasinslice.append(np.sum(lab))\n",
+    "        nlargest=areasinslice.index(max(areasinslice))\n",
+    "        labeled_array[labeled_array!=(nlargest+1)]=0\n",
+    "        noduleprops=regionprops(labeled_array)\n",
+    "    else:\n",
+    "        noduleprops=regionprops(mask)\n",
+    "    area=noduleprops[0].area\n",
+    "    eccentricity=noduleprops[0].eccentricity\n",
+    "    diam=noduleprops[0].equivalent_diameter\n",
+    "    diammajor=noduleprops[0].major_axis_length\n",
+    "    spiculation=noduleprops[0].solidity\n",
+    "    return area, eccentricity, diam, diammajor, spiculation\n",
+    "\n",
+    "def generatefeaturetable(nodulemasks):\n",
+    "    meannoduleHU=[]\n",
+    "    nodulecount=[]\n",
+    "    largestarealist=[]\n",
+    "    eccentricitylist=[]\n",
+    "    diamlist=[]\n",
+    "    diammajorlist=[]\n",
+    "    spiculationlist=[]\n",
+    "\n",
+    "    for i in range(nodulemasks.shape[0]):\n",
+    "        mask=nodulemasks[i,0]\n",
+    "        mask[mask>0.5]=1\n",
+    "        mask[mask<0.5]=0\n",
+    "        meannoduleHU.append(np.sum(noduleimages[i,0]*mask)/np.sum(mask))\n",
+    "        labeled_array,features=label(mask)\n",
+    "        nodulecount.append(features)\n",
+    "        area, eccentricity, diam, diammajor, spiculation = largestnoduleproperties(nodulemasks[i,0])\n",
+    "        largestarealist.append(area)\n",
+    "        eccentricitylist.append(eccentricity)\n",
+    "        diamlist.append(diam)\n",
+    "        diammajorlist.append(diammajor)\n",
+    "        spiculationlist.append(spiculation)\n",
+    "    table=pd.DataFrame({\"Patient\":sample,\"NoduleIndex\":noduleindicies,\"Area\":area,\"MeanHU\":meannoduleHU, \"LargestNoduleArea\":largestarealist,\n",
+    "                    \"Eccentricity\":eccentricitylist, \"Diameter\":diamlist, \"DiameterMajor\":diammajorlist, \"Spiculation\":spiculationlist})\n",
+    "    return table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing patient# 1200 ETA: 13.88888888888889 hrs\n",
+      "Processing patient# 1201 ETA: 12.623715912898382 hrs\n",
+      "Processing patient# 1202 ETA: 10.018525106410186 hrs\n",
+      "Processing patient# 1203 ETA: 18.164322644339666 hrs\n",
+      "Processing patient# 1204 ETA: 15.95403834571441 hrs\n",
+      "Processing patient# 1205 ETA: 14.166520277659098 hrs\n",
+      "Processing patient# 1206 ETA: 13.91482162181978 hrs\n",
+      "Processing patient# 1208 ETA: 13.044705638507056 hrs\n",
+      "Processing patient# 1209 ETA: 11.268210861565155 hrs\n",
+      "Processing patient# 1210 ETA: 11.153029166426924 hrs\n",
+      "Processing patient# 1211 ETA: 10.874260760220615 hrs\n",
+      "Processing patient# 1212 ETA: 10.507523334208463 hrs\n",
+      "Processing patient# 1213 ETA: 10.314581797112767 hrs\n",
+      "Processing patient# 1214 ETA: 10.08706813660406 hrs\n",
+      "Processing patient# 1216 ETA: 9.996492877094832 hrs\n",
+      "Processing patient# 1217 ETA: 9.102958305653404 hrs\n",
+      "Processing patient# 1219 ETA: 8.925979718978024 hrs\n",
+      "Processing patient# 1220 ETA: 8.507819953064123 hrs\n",
+      "Processing patient# 1221 ETA: 8.645915125431838 hrs\n",
+      "Processing patient# 1222 ETA: 8.543059910430449 hrs\n",
+      "Processing patient# 1223 ETA: 8.726262301500293 hrs\n",
+      "Processing patient# 1225 ETA: 8.727355263285615 hrs\n",
+      "Processing patient# 1226 ETA: 8.455341982405919 hrs\n",
+      "Processing patient# 1227 ETA: 8.351120784155135 hrs\n",
+      "Processing patient# 1228 ETA: 8.316320655887562 hrs\n",
+      "Processing patient# 1229 ETA: 8.488186760167965 hrs\n",
+      "Processing patient# 1230 ETA: 8.393290068418892 hrs\n",
+      "Processing patient# 1231 ETA: 8.380940078535387 hrs\n",
+      "Processing patient# 1232 ETA: 8.325024080611765 hrs\n",
+      "Processing patient# 1233 ETA: 8.342425880022724 hrs\n",
+      "Processing patient# 1234 ETA: 8.32099210145232 hrs\n",
+      "Processing patient# 1235 ETA: 8.283116624014719 hrs\n",
+      "Processing patient# 1236 ETA: 8.308585265549246 hrs\n",
+      "Processing patient# 1237 ETA: 8.243522517545086 hrs\n",
+      "Processing patient# 1238 ETA: 8.686181035109781 hrs\n",
+      "Processing patient# 1239 ETA: 8.866050753437216 hrs\n",
+      "Processing patient# 1240 ETA: 8.860589005996784 hrs\n",
+      "Processing patient# 1241 ETA: 8.904041662632933 hrs\n",
+      "Processing patient# 1242 ETA: 8.81039298651395 hrs\n",
+      "Processing patient# 1244 ETA: 8.987819228849979 hrs\n",
+      "Processing patient# 1245 ETA: 8.728126824490818 hrs\n",
+      "Processing patient# 1247 ETA: 8.675649444095177 hrs\n",
+      "Processing patient# 1248 ETA: 8.429746265785287 hrs\n",
+      "Processing patient# 1249 ETA: 8.358244928213475 hrs\n",
+      "Processing patient# 1251 ETA: 8.380413904567561 hrs\n",
+      "Processing patient# 1252 ETA: 8.127052396800783 hrs\n",
+      "Processing patient# 1253 ETA: 8.06759037937758 hrs\n",
+      "Processing patient# 1255 ETA: 8.08903314394838 hrs\n",
+      "Processing patient# 1256 ETA: 7.984299782819692 hrs\n",
+      "Processing patient# 1257 ETA: 7.922037844325599 hrs\n",
+      "Processing patient# 1258 ETA: 7.861369710803259 hrs\n",
+      "Processing patient# 1259 ETA: 7.8176956071153185 hrs\n",
+      "Processing patient# 1260 ETA: 7.7597010760285245 hrs\n",
+      "Processing patient# 1261 ETA: 7.840702676859926 hrs\n",
+      "Processing patient# 1262 ETA: 7.801467700129554 hrs\n",
+      "Processing patient# 1263 ETA: 7.790705240697878 hrs\n",
+      "Processing patient# 1264 ETA: 7.734633472519409 hrs\n",
+      "Processing patient# 1265 ETA: 7.703382088404434 hrs\n",
+      "Processing patient# 1266 ETA: 7.695265196908001 hrs\n",
+      "Processing patient# 1267 ETA: 7.699010734739984 hrs\n",
+      "Processing patient# 1268 ETA: 7.6863632827208335 hrs\n",
+      "Processing patient# 1270 ETA: 7.630932073109392 hrs\n",
+      "Processing patient# 1272 ETA: 7.533154250803129 hrs\n",
+      "Processing patient# 1273 ETA: 7.3653022840142786 hrs\n",
+      "Processing patient# 1274 ETA: 7.319828108404134 hrs\n",
+      "Processing patient# 1276 ETA: 7.362176086990922 hrs\n",
+      "Processing patient# 1277 ETA: 7.265992607554316 hrs\n",
+      "Processing patient# 1278 ETA: 7.218131187677043 hrs\n",
+      "Processing patient# 1279 ETA: 7.247058704429203 hrs\n",
+      "Processing patient# 1280 ETA: 7.2205354031175375 hrs\n",
+      "Processing patient# 1284 ETA: 7.229128276611714 hrs\n",
+      "Processing patient# 1285 ETA: 6.863715362470914 hrs\n",
+      "Processing patient# 1286 ETA: 6.868215039321618 hrs\n",
+      "Processing patient# 1287 ETA: 6.872994949424861 hrs\n",
+      "Processing patient# 1288 ETA: 6.876982837402911 hrs\n",
+      "Processing patient# 1289 ETA: 6.8752770751245915 hrs\n",
+      "Processing patient# 1290 ETA: 6.840804919596807 hrs\n",
+      "Processing patient# 1291 ETA: 6.811198504381564 hrs\n",
+      "Processing patient# 1292 ETA: 6.77358079140385 hrs\n",
+      "Processing patient# 1293 ETA: 6.830506550602018 hrs\n",
+      "Processing patient# 1294 ETA: 6.802673126880441 hrs\n",
+      "Processing patient# 1297 ETA: 6.911592493140907 hrs\n",
+      "Processing patient# 1298 ETA: 6.715936135601024 hrs\n",
+      "Processing patient# 1299 ETA: 6.67262925623777 hrs\n",
+      "Processing patient# 1300 ETA: 6.629177303820848 hrs\n",
+      "Processing patient# 1301 ETA: 6.606589229476728 hrs\n",
+      "Processing patient# 1302 ETA: 6.584290691885554 hrs\n",
+      "Processing patient# 1304 ETA: 6.567115247555663 hrs\n",
+      "Processing patient# 1306 ETA: 6.466728328926854 hrs\n",
+      "Processing patient# 1307 ETA: 6.3809614388296545 hrs\n",
+      "Processing patient# 1308 ETA: 6.346115140000612 hrs\n",
+      "Processing patient# 1309 ETA: 6.356010770753983 hrs\n",
+      "Processing patient# 1310 ETA: 6.314931866174394 hrs\n",
+      "Processing patient# 1311 ETA: 6.307182563506328 hrs\n",
+      "Processing patient# 1313 ETA: 6.279791420257635 hrs\n",
+      "Processing patient# 1314 ETA: 6.292742651619061 hrs\n",
+      "Processing patient# 1315 ETA: 6.269811408600369 hrs\n",
+      "Processing patient# 1316 ETA: 6.250402978571325 hrs\n",
+      "Processing patient# 1318 ETA: 6.2339090946913895 hrs\n",
+      "Processing patient# 1319 ETA: 6.160530481745883 hrs\n",
+      "Processing patient# 1320 ETA: 6.130471623537166 hrs\n",
+      "Processing patient# 1321 ETA: 6.121893712554752 hrs\n",
+      "Processing patient# 1323 ETA: 6.089000249715125 hrs\n",
+      "Processing patient# 1324 ETA: 5.980023680922165 hrs\n",
+      "Processing patient# 1325 ETA: 5.956943183755874 hrs\n",
+      "Processing patient# 1327 ETA: 5.946134261675508 hrs\n",
+      "Processing patient# 1328 ETA: 5.850976662812754 hrs\n",
+      "Processing patient# 1329 ETA: 5.820975284885477 hrs\n",
+      "Processing patient# 1331 ETA: 5.8138491802821815 hrs\n",
+      "Processing patient# 1333 ETA: 5.725956927912203 hrs\n",
+      "Processing patient# 1334 ETA: 5.628774382734477 hrs\n",
+      "Processing patient# 1335 ETA: 5.606204076225375 hrs\n",
+      "Processing patient# 1336 ETA: 5.579955613982735 hrs\n",
+      "Processing patient# 1337 ETA: 5.590139669641091 hrs\n",
+      "Processing patient# 1338 ETA: 5.566476827514825 hrs\n",
+      "Processing patient# 1339 ETA: 5.543931497014302 hrs\n",
+      "Processing patient# 1341 ETA: 5.524890386738948 hrs\n",
+      "Processing patient# 1342 ETA: 5.448634859161664 hrs\n",
+      "Processing patient# 1343 ETA: 5.417805575740921 hrs\n",
+      "Processing patient# 1344 ETA: 5.400687092230369 hrs\n",
+      "Processing patient# 1345 ETA: 5.3883224553303695 hrs\n",
+      "Processing patient# 1346 ETA: 5.361380771469307 hrs\n",
+      "Processing patient# 1348 ETA: 5.347543323521171 hrs\n",
+      "Processing patient# 1349 ETA: 5.270577565466265 hrs\n",
+      "Processing patient# 1350 ETA: 5.250531795351592 hrs\n",
+      "Processing patient# 1351 ETA: 5.227737224219562 hrs\n",
+      "Processing patient# 1352 ETA: 5.204326420002862 hrs\n",
+      "Processing patient# 1353 ETA: 5.186513844776084 hrs\n",
+      "Processing patient# 1354 ETA: 5.1599767990571594 hrs\n",
+      "Processing patient# 1355 ETA: 5.1349731775509415 hrs\n",
+      "Processing patient# 1356 ETA: 5.108270640927247 hrs\n",
+      "Processing patient# 1357 ETA: 5.115111228089255 hrs\n",
+      "Processing patient# 1358 ETA: 5.096474325358868 hrs\n",
+      "Processing patient# 1359 ETA: 5.068261158612955 hrs\n",
+      "Processing patient# 1360 ETA: 5.042548690814938 hrs\n",
+      "Processing patient# 1361 ETA: 5.01235181745535 hrs\n",
+      "Processing patient# 1362 ETA: 4.998160559473705 hrs\n",
+      "Processing patient# 1363 ETA: 4.974829404645009 hrs\n",
+      "Processing patient# 1364 ETA: 4.952033427223199 hrs\n",
+      "Processing patient# 1365 ETA: 4.928263342958509 hrs\n",
+      "Processing patient# 1366 ETA: 4.906512433510309 hrs\n",
+      "Processing patient# 1368 ETA: 4.884942292143961 hrs\n",
+      "Processing patient# 1369 ETA: 4.82783218746433 hrs\n",
+      "Processing patient# 1370 ETA: 4.8466363209135395 hrs\n",
+      "Processing patient# 1371 ETA: 4.8318471228541915 hrs\n",
+      "Processing patient# 1372 ETA: 4.821795012823449 hrs\n",
+      "Processing patient# 1373 ETA: 4.803408985386005 hrs\n",
+      "Processing patient# 1374 ETA: 4.777427026608132 hrs\n",
+      "Processing patient# 1375 ETA: 4.763656514637054 hrs\n",
+      "Processing patient# 1376 ETA: 4.743936037966472 hrs\n",
+      "Processing patient# 1377 ETA: 4.719618169200248 hrs\n",
+      "Processing patient# 1378 ETA: 4.690835314464851 hrs\n",
+      "Processing patient# 1379 ETA: 4.666089715291667 hrs\n",
+      "Processing patient# 1380 ETA: 4.675125468286835 hrs\n",
+      "Processing patient# 1381 ETA: 4.658687751179754 hrs\n",
+      "Processing patient# 1382 ETA: 4.635416509547295 hrs\n",
+      "Processing patient# 1383 ETA: 4.618964403185181 hrs\n",
+      "Processing patient# 1384 ETA: 4.5948020707891475 hrs\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing patient# 1385 ETA: 4.570799427429835 hrs\n",
+      "Processing patient# 1386 ETA: 4.54748295711004 hrs\n",
+      "Processing patient# 1387 ETA: 4.527965035849359 hrs\n",
+      "Processing patient# 1388 ETA: 4.499852827242081 hrs\n",
+      "Processing patient# 1389 ETA: 4.477454396751192 hrs\n",
+      "Processing patient# 1390 ETA: 4.456050586798038 hrs\n",
+      "Processing patient# 1391 ETA: 4.434381366932787 hrs\n",
+      "Processing patient# 1392 ETA: 4.407015767265111 hrs\n",
+      "Processing patient# 1394 ETA: 4.37906760837885 hrs\n",
+      "Processing patient# 1395 ETA: 4.30689841369618 hrs\n",
+      "Processing patient# 1396 ETA: 4.285369354544922 hrs\n",
+      "Processing patient# 1397 ETA: 4.268326153216628 hrs\n",
+      "Processing patient# 1398 ETA: 4.263744453107467 hrs\n",
+      "Processing patient# 1399 ETA: 4.243092987329717 hrs\n",
+      "Processing patient# 1400 ETA: 4.222136327758431 hrs\n",
+      "Processing patient# 1401 ETA: 4.198838181790889 hrs\n",
+      "Processing patient# 1402 ETA: 4.1793994686896 hrs\n",
+      "Processing patient# 1403 ETA: 4.165873683685152 hrs\n",
+      "Processing patient# 1404 ETA: 4.145204392498549 hrs\n",
+      "Processing patient# 1405 ETA: 4.127004841931144 hrs\n",
+      "Processing patient# 1406 ETA: 4.1010385624992045 hrs\n",
+      "Processing patient# 1407 ETA: 4.089603952259641 hrs\n",
+      "Processing patient# 1410 ETA: 4.0657218732484255 hrs\n",
+      "Processing patient# 1411 ETA: 3.9614282328307855 hrs\n",
+      "Processing patient# 1412 ETA: 3.935801912909784 hrs\n",
+      "Processing patient# 1414 ETA: 3.9153825925978505 hrs\n",
+      "Processing patient# 1415 ETA: 3.8567507741617604 hrs\n",
+      "Processing patient# 1417 ETA: 3.831965497984502 hrs\n",
+      "Processing patient# 1418 ETA: 3.776563055347595 hrs\n",
+      "Processing patient# 1419 ETA: 3.7698241562507033 hrs\n",
+      "Processing patient# 1420 ETA: 3.7446089217382843 hrs\n",
+      "Processing patient# 1421 ETA: 3.7240907096557123 hrs\n",
+      "Processing patient# 1422 ETA: 3.7038961130109156 hrs\n",
+      "Processing patient# 1423 ETA: 3.682920000244269 hrs\n",
+      "Processing patient# 1424 ETA: 3.6590023383365153 hrs\n",
+      "Processing patient# 1425 ETA: 3.6355938848919345 hrs\n",
+      "Processing patient# 1426 ETA: 3.6216272131716254 hrs\n",
+      "Processing patient# 1427 ETA: 3.6168983707203903 hrs\n",
+      "Processing patient# 1428 ETA: 3.5952787640760513 hrs\n",
+      "Processing patient# 1429 ETA: 3.5846346884957216 hrs\n",
+      "Processing patient# 1430 ETA: 3.564002736003503 hrs\n",
+      "Processing patient# 1431 ETA: 3.5397180199405214 hrs\n",
+      "Processing patient# 1432 ETA: 3.5171189450058673 hrs\n",
+      "Processing patient# 1433 ETA: 3.495935878656453 hrs\n",
+      "Processing patient# 1436 ETA: 3.490441271786393 hrs\n",
+      "Processing patient# 1437 ETA: 3.3961545779969957 hrs\n",
+      "Processing patient# 1438 ETA: 3.3754478486011674 hrs\n",
+      "Processing patient# 1439 ETA: 3.3569018378051583 hrs\n",
+      "Processing patient# 1440 ETA: 3.3322680721583744 hrs\n",
+      "Processing patient# 1441 ETA: 3.3100857617839687 hrs\n",
+      "Processing patient# 1443 ETA: 3.291559807825679 hrs\n",
+      "Processing patient# 1444 ETA: 3.2368162911495224 hrs\n",
+      "Processing patient# 1445 ETA: 3.219285455204192 hrs\n",
+      "Processing patient# 1446 ETA: 3.1963537533862594 hrs\n",
+      "Processing patient# 1447 ETA: 3.1745961232535005 hrs\n",
+      "Processing patient# 1448 ETA: 3.161650590983931 hrs\n",
+      "Processing patient# 1449 ETA: 3.1433429676488482 hrs\n",
+      "Processing patient# 1450 ETA: 3.1198210242682034 hrs\n",
+      "Processing patient# 1451 ETA: 3.0976603277366 hrs\n",
+      "Processing patient# 1452 ETA: 3.0779698065120384 hrs\n",
+      "Processing patient# 1453 ETA: 3.0560144474137805 hrs\n",
+      "Processing patient# 1454 ETA: 3.035682783060343 hrs\n",
+      "Processing patient# 1455 ETA: 3.01512198534926 hrs\n",
+      "Processing patient# 1457 ETA: 3.001986470494109 hrs\n",
+      "Processing patient# 1460 ETA: 2.952160390772817 hrs\n",
+      "Processing patient# 1461 ETA: 2.8634344177440965 hrs\n",
+      "Processing patient# 1462 ETA: 2.8440811097260443 hrs\n",
+      "Processing patient# 1464 ETA: 2.8232880296876375 hrs\n",
+      "Processing patient# 1465 ETA: 2.7719246257226184 hrs\n",
+      "Processing patient# 1466 ETA: 2.7521925461187706 hrs\n",
+      "Processing patient# 1468 ETA: 2.7326375998926817 hrs\n",
+      "Processing patient# 1469 ETA: 2.6889556477504155 hrs\n",
+      "Processing patient# 1471 ETA: 2.6669628266321768 hrs\n",
+      "Processing patient# 1472 ETA: 2.61743253900286 hrs\n",
+      "Processing patient# 1473 ETA: 2.59755473644354 hrs\n",
+      "Processing patient# 1474 ETA: 2.580403991718651 hrs\n",
+      "Processing patient# 1475 ETA: 2.5592905984358345 hrs\n",
+      "Processing patient# 1476 ETA: 2.537063879300142 hrs\n",
+      "Processing patient# 1477 ETA: 2.5267651822019 hrs\n",
+      "Processing patient# 1478 ETA: 2.5045537118088426 hrs\n",
+      "Processing patient# 1479 ETA: 2.481165919057026 hrs\n",
+      "Processing patient# 1480 ETA: 2.4596286582390934 hrs\n",
+      "Processing patient# 1481 ETA: 2.4357531235283094 hrs\n",
+      "Processing patient# 1482 ETA: 2.414465554013931 hrs\n",
+      "Processing patient# 1483 ETA: 2.397971823392403 hrs\n",
+      "Processing patient# 1484 ETA: 2.3773148383053253 hrs\n",
+      "Processing patient# 1485 ETA: 2.359427685907246 hrs\n",
+      "Processing patient# 1486 ETA: 2.3374206903567583 hrs\n",
+      "Processing patient# 1487 ETA: 2.316515679359436 hrs\n",
+      "Processing patient# 1488 ETA: 2.294826008774524 hrs\n",
+      "Processing patient# 1489 ETA: 2.2790878480482633 hrs\n",
+      "Processing patient# 1490 ETA: 2.2603696225018344 hrs\n",
+      "Processing patient# 1491 ETA: 2.239299383374652 hrs\n",
+      "Processing patient# 1492 ETA: 2.217807365984541 hrs\n",
+      "Processing patient# 1494 ETA: 2.1953265507487028 hrs\n",
+      "Processing patient# 1495 ETA: 2.154147859638919 hrs\n",
+      "Processing patient# 1496 ETA: 2.1373596109892876 hrs\n",
+      "Processing patient# 1498 ETA: 2.1154642674823054 hrs\n",
+      "Processing patient# 1499 ETA: 2.0658858120215506 hrs\n",
+      "Processing patient# 1500 ETA: 2.0452432717780265 hrs\n",
+      "Processing patient# 1501 ETA: 2.025089469167218 hrs\n",
+      "Processing patient# 1502 ETA: 2.006434742403083 hrs\n",
+      "Processing patient# 1503 ETA: 1.9848314131735696 hrs\n",
+      "Processing patient# 1504 ETA: 1.9642185280659386 hrs\n",
+      "Processing patient# 1505 ETA: 1.943846245320117 hrs\n",
+      "Processing patient# 1506 ETA: 1.922791921977514 hrs\n",
+      "Processing patient# 1507 ETA: 1.900297880433324 hrs\n",
+      "Processing patient# 1508 ETA: 1.8807857255689255 hrs\n",
+      "Processing patient# 1509 ETA: 1.8639425218906893 hrs\n",
+      "Processing patient# 1510 ETA: 1.8451056155743986 hrs\n",
+      "Processing patient# 1511 ETA: 1.8219857168580944 hrs\n",
+      "Processing patient# 1512 ETA: 1.7994089303411904 hrs\n",
+      "Processing patient# 1513 ETA: 1.7763814680467143 hrs\n",
+      "Processing patient# 1514 ETA: 1.7547966026234776 hrs\n",
+      "Processing patient# 1515 ETA: 1.7381065940352345 hrs\n",
+      "Processing patient# 1516 ETA: 1.7198453679184118 hrs\n",
+      "Processing patient# 1517 ETA: 1.6968657202587767 hrs\n",
+      "Processing patient# 1518 ETA: 1.6766352182999515 hrs\n",
+      "Processing patient# 1519 ETA: 1.6640672508136443 hrs\n",
+      "Processing patient# 1520 ETA: 1.6443906538188457 hrs\n",
+      "Processing patient# 1521 ETA: 1.6265579485526016 hrs\n",
+      "Processing patient# 1522 ETA: 1.6064969870503643 hrs\n",
+      "Processing patient# 1523 ETA: 1.5878849256592276 hrs\n",
+      "Processing patient# 1524 ETA: 1.5670396502469308 hrs\n",
+      "Processing patient# 1526 ETA: 1.5448135244235013 hrs\n",
+      "Processing patient# 1527 ETA: 1.5007179911218453 hrs\n",
+      "Processing patient# 1528 ETA: 1.4779989854163593 hrs\n",
+      "Processing patient# 1529 ETA: 1.455040394503777 hrs\n",
+      "Processing patient# 1530 ETA: 1.4348017759756608 hrs\n",
+      "Processing patient# 1531 ETA: 1.412959918495631 hrs\n",
+      "Processing patient# 1532 ETA: 1.3895584875255462 hrs\n",
+      "Processing patient# 1534 ETA: 1.3681563406241992 hrs\n",
+      "Processing patient# 1535 ETA: 1.32147174191119 hrs\n",
+      "Processing patient# 1536 ETA: 1.2993491290633876 hrs\n",
+      "Processing patient# 1537 ETA: 1.2765257389907825 hrs\n",
+      "Processing patient# 1538 ETA: 1.2545430914596223 hrs\n",
+      "Processing patient# 1539 ETA: 1.2330312886358439 hrs\n",
+      "Processing patient# 1540 ETA: 1.2134439415684322 hrs\n",
+      "Processing patient# 1541 ETA: 1.1903008280926082 hrs\n",
+      "Processing patient# 1542 ETA: 1.167653547794189 hrs\n",
+      "Processing patient# 1543 ETA: 1.1457156837554199 hrs\n",
+      "Processing patient# 1544 ETA: 1.1241135936051372 hrs\n",
+      "Processing patient# 1545 ETA: 1.102846443816275 hrs\n",
+      "Processing patient# 1546 ETA: 1.080637407885313 hrs\n",
+      "Processing patient# 1547 ETA: 1.0583609296631968 hrs\n",
+      "Processing patient# 1549 ETA: 1.0358784646273498 hrs\n",
+      "Processing patient# 1550 ETA: 0.9894958379524097 hrs\n",
+      "Processing patient# 1551 ETA: 0.9702932011284904 hrs\n",
+      "Processing patient# 1552 ETA: 0.9487030035562142 hrs\n",
+      "Processing patient# 1553 ETA: 0.927032014874493 hrs\n",
+      "Processing patient# 1555 ETA: 0.9052574532595503 hrs\n",
+      "Processing patient# 1556 ETA: 0.8583338576951499 hrs\n",
+      "Processing patient# 1557 ETA: 0.8364724836388824 hrs\n",
+      "Processing patient# 1558 ETA: 0.8143605039141544 hrs\n",
+      "Processing patient# 1559 ETA: 0.7939183002461303 hrs\n",
+      "Processing patient# 1560 ETA: 0.7714008516864275 hrs\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing patient# 1561 ETA: 0.7496390868227617 hrs\n",
+      "Processing patient# 1562 ETA: 0.7277910622262829 hrs\n",
+      "Processing patient# 1563 ETA: 0.70534063768343 hrs\n",
+      "Processing patient# 1564 ETA: 0.6836017272867927 hrs\n",
+      "Processing patient# 1566 ETA: 0.6618160449642033 hrs\n",
+      "Processing patient# 1568 ETA: 0.6160384095808674 hrs\n",
+      "Processing patient# 1570 ETA: 0.5702290712717382 hrs\n",
+      "Processing patient# 1571 ETA: 0.5249562931103638 hrs\n",
+      "Processing patient# 1572 ETA: 0.5033148116076286 hrs\n",
+      "Processing patient# 1573 ETA: 0.48112341017214527 hrs\n",
+      "Processing patient# 1574 ETA: 0.4594317513380257 hrs\n",
+      "Processing patient# 1575 ETA: 0.43778636997364123 hrs\n",
+      "Processing patient# 1576 ETA: 0.415779529424036 hrs\n",
+      "Processing patient# 1577 ETA: 0.3949588738670405 hrs\n",
+      "Processing patient# 1578 ETA: 0.37282882327230255 hrs\n",
+      "Processing patient# 1580 ETA: 0.3515907668564129 hrs\n",
+      "Processing patient# 1581 ETA: 0.30684781963013846 hrs\n",
+      "Processing patient# 1582 ETA: 0.28561606805205886 hrs\n",
+      "Processing patient# 1583 ETA: 0.26367890904528596 hrs\n",
+      "Processing patient# 1584 ETA: 0.241728710744323 hrs\n",
+      "Processing patient# 1585 ETA: 0.2196190606033997 hrs\n",
+      "Processing patient# 1587 ETA: 0.19766932272849366 hrs\n",
+      "Processing patient# 1588 ETA: 0.1533620524758064 hrs\n",
+      "Processing patient# 1589 ETA: 0.1314311540085944 hrs\n",
+      "Processing patient# 1590 ETA: 0.10954693088589212 hrs\n",
+      "Processing patient# 1591 ETA: 0.08776483735769842 hrs\n",
+      "Processing patient# 1592 ETA: 0.06588565785057654 hrs\n",
+      "Processing patient# 1594 ETA: 0.04392167075191941 hrs\n",
+      "31161.49111223221\n"
+     ]
+    }
+   ],
+   "source": [
+    "start_time=time.time()\n",
+    "\n",
+    "elapsed_time=0\n",
+    "totaltime=94000\n",
+    "thresh=-500 #lower HU threshold for nodule segmentation\n",
+    "noduleimages=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
+    "nodulemasks=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
+    "sample=[]\n",
+    "area=[]\n",
+    "noduleindicies=[]\n",
+    "index=0\n",
+    "start=1\n",
+    "end=400\n",
+    "for i in range(len(patients)):\n",
+    "    print(\"Processing patient#\",i,\"ETA:\",(totaltime-elapsed_time)/3600,\"hrs\")\n",
+    "    if (i-1)/400-np.floor((i-1)/400)==0:\n",
+    "        noduleimages=noduleimages[:index]\n",
+    "        nodulemasks=nodulemasks[:index]\n",
+    "        table=generatefeaturetable(nodulemasks)\n",
+    "        print(\"Saving data for patients\"+str(start)+\"-\"+str(end))\n",
+    "        np.save(datafolder+\"DSBNoduleImages\"+str(start)+\"-\"+str(end)+\".npy\",noduleimages)\n",
+    "        np.save(datafolder+\"DSBNoduleMasks\"+str(start)+\"-\"+str(end)+\".npy\",nodulemasks)\n",
+    "        table.to_csv(datafolder+\"DSBNoduleFeatures\"+str(start)+\"-\"+str(end)+\".csv\")\n",
+    "        del noduleimages, nodulemasks\n",
+    "        noduleimages=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
+    "        nodulemasks=np.ndarray([5000,1,512,512],dtype=np.float32)\n",
+    "        sample=[]\n",
+    "        area=[]\n",
+    "        noduleindicies=[]\n",
+    "        index=0  \n",
+    "    patient_scan=load_scan(INPUT_FOLDER+patients[i])\n",
+    "    patient_pix=get_pixels_hu(patient_scan)\n",
+    "    processed_pix = processimagefromfile(patient_pix)\n",
+    "    mask = predictmask(processed_pix)\n",
+    "    noduleindex = getnoduleindex(mask)\n",
+    "    trueinds=trueindicies(processed_pix,noduleindex)\n",
+    "\n",
+    "    for ind in trueinds:\n",
+    "        noduleimages[index,0]=patient_pix[ind]\n",
+    "        nodulemasks[index,0]=mask[ind]\n",
+    "        sample.append(patients[i])\n",
+    "        area.append(np.sum(mask[ind]))\n",
+    "        noduleindicies.append(ind)\n",
+    "        index+=1\n",
+    "\n",
+    "    elapsed_time=time.time()-start_time\n",
+    "    totaltime=elapsed_time/(i-start+1)*(end-start)\n",
+    "\n",
+    "\n",
+    "\n",
+    "print(elapsed_time)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}