Luna2016-Lung-Nodule-Det / Git / [8a73bc] /DATA_PROCESS/LUNA_segment_lung

Models:
SCallahan/
Luna2016-Lung-Nodule-Det
Downloads: 1
[8a73bc]: / DATA_PROCESS / LUNA_segment_lung_ROI.py
History
Download this file
181 lines (169 with data), 7.7 kB

import numpy as np
from skimage import morphology
from skimage import measure
from sklearn.cluster import KMeans
from skimage.transform import resize
from glob import glob

if __name__ == '__main__':
    working_path = "/scratch/cse/dual/cs5130287/Luna2016/output_final/"
    file_list=glob(working_path+"images_*.npy")

    for img_file in file_list:
        # I ran into an error when using Kmean on np.float16, so I'm using np.float64 here
        imgs_to_process = np.load(img_file).astype(np.float64) 
        print "on image", img_file
        for i in range(len(imgs_to_process)):
            img = imgs_to_process[i]
            #Standardize the pixel values
            mean = np.mean(img)
            std = np.std(img)
            img = img-mean
            img = img/std
            # Find the average pixel value near the lungs
            # to renormalize washed out images
            middle = img[100:400,100:400] 
            mean = np.mean(middle)  
            max_im = np.max(img)
            min_im = np.min(img)
            # To improve threshold finding, I'm moving the 
            # underflow and overflow on the pixel spectrum
            img[img==max_im]=mean
            img[img==min_im]=mean
            #
            # Using Kmeans to separate foreground (radio-opaque tissue)
            # and background (radio transparent tissue ie lungs)
            # Doing this only on the center of the image to avoid 
            # the non-tissue parts of the image as much as possible
            #
            kmeans = KMeans(n_clusters=2).fit(np.reshape(middle,[np.prod(middle.shape),1]))
            centers = sorted(kmeans.cluster_centers_.flatten())
            threshold = np.mean(centers)
            thresh_img = np.where(img<threshold,1.0,0.0)  # threshold the image
            #
            # I found an initial erosion helful for removing graininess from some of the regions
            # and then large dialation is used to make the lung region 
            # engulf the vessels and incursions into the lung cavity by 
            # radio opaque tissue
            #
            eroded = morphology.erosion(thresh_img,np.ones([4,4]))
            dilation = morphology.dilation(eroded,np.ones([10,10]))
            #
            #  Label each region and obtain the region properties
            #  The background region is removed by removing regions 
            #  with a bbox that is to large in either dimnsion
            #  Also, the lungs are generally far away from the top 
            #  and bottom of the image, so any regions that are too
            #  close to the top and bottom are removed
            #  This does not produce a perfect segmentation of the lungs
            #  from the image, but it is surprisingly good considering its
            #  simplicity. 
            #
            labels = measure.label(dilation)
            label_vals = np.unique(labels)
            regions = measure.regionprops(labels)
            good_labels = []
            for prop in regions:
                B = prop.bbox
                if B[2]-B[0]<475 and B[3]-B[1]<475 and B[0]>40 and B[2]<472:
                    good_labels.append(prop.label)
            mask = np.ndarray([512,512],dtype=np.int8)
            mask[:] = 0
            #
            #  The mask here is the mask for the lungs--not the nodes
            #  After just the lungs are left, we do another large dilation
            #  in order to fill in and out the lung mask 
            #
            for N in good_labels:
                mask = mask + np.where(labels==N,1,0)
            mask = morphology.dilation(mask,np.ones([10,10])) # one last dilation
            imgs_to_process[i] = mask
        np.save(img_file.replace("images","lungmask"),imgs_to_process)
        
    
    #
    #    Here we're applying the masks and cropping and resizing the image
    #


    file_list=glob(working_path+"lungmask_*.npy")
    out_images = []      #final set of images
    out_nodemasks = []   #final set of nodemasks
    for fname in file_list:
        print "working on file ", fname
        imgs_to_process = np.load(fname.replace("lungmask","images"))
        masks = np.load(fname)
        node_masks = np.load(fname.replace("lungmask","masks"))
        for i in range(len(imgs_to_process)):
            mask = masks[i]
            node_mask = node_masks[i]
            img = imgs_to_process[i]
            new_size = [512,512]   # we're scaling back up to the original size of the image
            img= mask*img          # apply lung mask
            #
            # renormalizing the masked image (in the mask region)
            #
            new_mean = np.mean(img[mask>0])  
            new_std = np.std(img[mask>0])
            #
            #  Pulling the background color up to the lower end
            #  of the pixel range for the lungs
            #
            # old_min = np.min(img)       # background color
            # img[img==old_min] = new_mean-1.2*new_std   # resetting backgound color
            # img = img-new_mean
            # img = img/new_std
            #make image bounding box  (min row, min col, max row, max col)
            labels = measure.label(mask)
            regions = measure.regionprops(labels)
            #
            # Finding the global min and max row over all regions
            #
            min_row = 512
            max_row = 0
            min_col = 512
            max_col = 0
            for prop in regions:
                B = prop.bbox
                if min_row > B[0]:
                    min_row = B[0]
                if min_col > B[1]:
                    min_col = B[1]
                if max_row < B[2]:
                    max_row = B[2]
                if max_col < B[3]:
                    max_col = B[3]
            width = max_col-min_col
            height = max_row - min_row
            if width > height:
                max_row=min_row+width
            else:
                max_col = min_col+height
            # 
            # cropping the image down to the bounding box for all regions
            # (there's probably an skimage command that can do this in one line)
            # 
            img = img[min_row:max_row,min_col:max_col]
            mask =  mask[min_row:max_row,min_col:max_col]
            if max_row-min_row <5 or max_col-min_col<5:  # skipping all images with no god regions
                pass
            else:
                # moving range to -1 to 1 to accomodate the resize function
                mean = np.mean(img)
                img = img - mean
                min = np.min(img)
                max = np.max(img)
                img = img/(max-min)
                new_img = resize(img,[512,512])
                new_node_mask = resize(node_mask[min_row:max_row,min_col:max_col],[512,512])
                out_images.append(new_img)
                out_nodemasks.append(new_node_mask)


    num_images = len(out_images)
    #
    #  Writing out images and masks as 1 channel arrays for input into network
    #
    final_images = np.ndarray([num_images,1,512,512],dtype=np.float32)
    final_masks = np.ndarray([num_images,1,512,512],dtype=np.float32)
    for i in range(num_images):
        final_images[i,0] = out_images[i]
        final_masks[i,0] = out_nodemasks[i]

    rand_i = np.random.choice(range(num_images),size=num_images,replace=False)
    test_i = int(0.2*num_images)
    np.save(working_path+"trainImages.npy",final_images[rand_i[test_i:]])
    np.save(working_path+"trainMasks.npy",final_masks[rand_i[test_i:]])
    np.save(working_path+"testImages.npy",final_images[rand_i[:test_i]])
    np.save(working_path+"testMasks.npy",final_masks[rand_i[:test_i]])