--- a
+++ b/DATA_PROCESS/LUNA_mask_extraction.py
@@ -0,0 +1,123 @@
+from __future__ import print_function, division
+import SimpleITK as sitk
+import numpy as np
+import csv
+from glob import glob
+import pandas as pd
+import os
+try:
+    from tqdm import tqdm # long waits are not fun
+except:
+    print('TQDM does make much nicer wait bars...')
+    tqdm = lambda x: x
+
+#Some helper functions
+
+def make_mask(center,diam,z,width,height,spacing,origin):
+    '''
+Center : centers of circles px -- list of coordinates x,y,z
+diam : diameters of circles px -- diameter
+widthXheight : pixel dim of image
+spacing = mm/px conversion rate np array x,y,z
+origin = x,y,z mm np.array
+z = z position of slice in world coordinates mm
+    '''
+    mask = np.zeros([height,width]) # 0's everywhere except nodule swapping x,y to match img
+    #convert to nodule space from world coordinates
+
+    # Defining the voxel range in which the nodule falls
+    v_center = (center-origin)/spacing
+    v_diam = int(diam/spacing[0]+5)
+    v_xmin = np.max([0,int(v_center[0]-v_diam)-5])
+    v_xmax = np.min([width-1,int(v_center[0]+v_diam)+5])
+    v_ymin = np.max([0,int(v_center[1]-v_diam)-5]) 
+    v_ymax = np.min([height-1,int(v_center[1]+v_diam)+5])
+
+    v_xrange = range(v_xmin,v_xmax+1)
+    v_yrange = range(v_ymin,v_ymax+1)
+
+    # Convert back to world coordinates for distance calculation
+    x_data = [x*spacing[0]+origin[0] for x in range(width)]
+    y_data = [x*spacing[1]+origin[1] for x in range(height)]
+
+    # Fill in 1 within sphere around nodule
+    for v_x in v_xrange:
+        for v_y in v_yrange:
+            p_x = spacing[0]*v_x + origin[0]
+            p_y = spacing[1]*v_y + origin[1]
+            if np.linalg.norm(center-np.array([p_x,p_y,z]))<=diam:
+                mask[int((p_y-origin[1])/spacing[1]),int((p_x-origin[0])/spacing[0])] = 1.0
+    return(mask)
+
+def matrix2int16(matrix):
+    ''' 
+matrix must be a numpy array NXN
+Returns uint16 version
+    '''
+    m_min= np.min(matrix)
+    m_max= np.max(matrix)
+    matrix = matrix-m_min
+    return(np.array(np.rint( (matrix-m_min)/float(m_max-m_min) * 65535.0),dtype=np.uint16))
+
+############
+#
+
+
+
+#####################
+#
+# Helper function to get rows in data frame associated 
+# with each file
+def get_filename(file_list, case):
+    for f in file_list:
+        if case in f:
+            return(f)
+#
+
+
+
+if __name__ == '__main__':
+    # Getting list of image files
+    
+    luna_path = "/home/cse/dual/cs5130287/Luna2016/"
+    luna_scratch_path = "/scratch/cse/dual/cs5130287/Luna2016/"
+    luna_subset_path = luna_scratch_path+"subset0/"
+    output_path = "/scratch/cse/dual/cs5130287/Luna2016/output_final/"
+    file_list=glob(luna_subset_path+"*.mhd")
+    # The locations of the nodes
+    df_node = pd.read_csv(luna_path+"annotations.csv")
+    df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list, file_name))
+    df_node = df_node.dropna()
+
+    #####
+    #
+    # Looping over the image files
+    #
+    for fcount, img_file in enumerate(tqdm(file_list)):
+        mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
+        if mini_df.shape[0]>0: # some files may not have a nodule--skipping those 
+            # load the data once
+            itk_img = sitk.ReadImage(img_file) 
+            img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
+            num_z, height, width = img_array.shape        #heightXwidth constitute the transverse plane
+            origin = np.array(itk_img.GetOrigin())      # x,y,z  Origin in world coordinates (mm)
+            spacing = np.array(itk_img.GetSpacing())    # spacing of voxels in world coor. (mm)
+            # go through all nodes (why just the biggest?)
+            for node_idx, cur_row in mini_df.iterrows():       
+                node_x = cur_row["coordX"]
+                node_y = cur_row["coordY"]
+                node_z = cur_row["coordZ"]
+                diam = cur_row["diameter_mm"]
+                # just keep 3 slices
+                imgs = np.ndarray([3,height,width],dtype=np.float32)
+                masks = np.ndarray([3,height,width],dtype=np.uint8)
+                center = np.array([node_x, node_y, node_z])   # nodule center
+                v_center = np.rint((center-origin)/spacing)  # nodule center in voxel space (still x,y,z ordering)
+                for i, i_z in enumerate(np.arange(int(v_center[2])-1,
+                                 int(v_center[2])+2).clip(0, num_z-1)): # clip prevents going out of bounds in Z
+                    mask = make_mask(center, diam, i_z*spacing[2]+origin[2],
+                                     width, height, spacing, origin)
+                    masks[i] = mask
+                    imgs[i] = img_array[i_z]
+                np.save(os.path.join(output_path,"images_%04d_%04d.npy" % (fcount, node_idx)),imgs)
+                np.save(os.path.join(output_path,"masks_%04d_%04d.npy" % (fcount, node_idx)),masks)