Diff of /code/preprocessing.py [000000] .. [ebf7be]

Switch to unified view

a b/code/preprocessing.py
1
"""
2
    Preprocessing for U-net
3
    Thresholding and mask the lung part
4
    Use annotation to mask the nodules 
5
"""
6
7
import numpy as np
8
import pandas as pd
9
import os
10
11
from skimage.segmentation import clear_border
12
from skimage.measure import label,regionprops, perimeter
13
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing
14
from skimage.filters import roberts, sobel
15
from scipy import ndimage as ndi
16
17
from glob import glob
18
from tqdm import tqdm
19
20
import SimpleITK as sitk
21
import scipy.misc
22
import matplotlib.pyplot as plt
23
24
25
def get_segmented_lungs(im, plot=False):
26
    binary = im < 604
27
    cleared = clear_border(binary)
28
    label_image = label(cleared)
29
    areas = [r.area for r in regionprops(label_image)]
30
    areas.sort()
31
    if len(areas) > 2:
32
        for region in regionprops(label_image):
33
            if region.area < areas[-2]:
34
                for coordinates in region.coords:                
35
                       label_image[coordinates[0], coordinates[1]] = 0
36
    binary = label_image > 0
37
    selem = disk(2)
38
    binary = binary_erosion(binary, selem)
39
    selem = disk(10)
40
    binary = binary_closing(binary, selem)
41
    edges = roberts(binary)
42
    binary = ndi.binary_fill_holes(edges)
43
    get_high_vals = binary == 0
44
    im[get_high_vals] = 0
45
    return im
46
47
def segment_lung_from_ct_scan(ct_scan):
48
    return np.asarray([get_segmented_lungs(slice) for slice in ct_scan])
49
50
def load_itk(filename):
51
    itkimage = sitk.ReadImage(filename)
52
    ct_scan = sitk.GetArrayFromImage(itkimage)
53
    origin = np.array(list(reversed(itkimage.GetOrigin())))
54
    spacing = np.array(list(reversed(itkimage.GetSpacing())))
55
    return ct_scan, origin, spacing
56
57
def world_2_voxel(world_coordinates, origin, spacing):
58
    stretched_voxel_coordinates = np.absolute(world_coordinates - origin)
59
    voxel_coordinates = stretched_voxel_coordinates / spacing
60
    return voxel_coordinates
61
62
def voxel_2_world(voxel_coordinates, origin, spacing):
63
    stretched_voxel_coordinates = voxel_coordinates * spacing
64
    world_coordinates = stretched_voxel_coordinates + origin
65
    return world_coordinates
66
67
def seq(start, stop, step=1):
68
    n = int(round((stop - start)/float(step)))
69
    if n > 1:
70
        return([start + step*i for i in range(n+1)])
71
    else:
72
        return([])
73
74
def draw_circles(image,cands,origin,spacing):
75
    #make empty matrix, which will be filled with the mask
76
    RESIZE_SPACING = [1, 1, 1]
77
    image_mask = np.zeros(image.shape)
78
79
    #run over all the nodules in the lungs
80
    for ca in cands.values:
81
        #get middel x-,y-, and z-worldcoordinate of the nodule
82
        radius = np.ceil(ca[4])/2
83
        coord_x = ca[1]
84
        coord_y = ca[2]
85
        coord_z = ca[3]
86
        image_coord = np.array((coord_z,coord_y,coord_x))
87
88
        #determine voxel coordinate given the worldcoordinate
89
        image_coord = world_2_voxel(image_coord,origin,spacing)
90
91
        #determine the range of the nodule
92
        noduleRange = seq(-radius, radius, RESIZE_SPACING[0])
93
94
        #create the mask
95
        for x in noduleRange:
96
            for y in noduleRange:
97
                for z in noduleRange:
98
                    coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing)
99
                    if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius:
100
                        image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1)
101
    
102
    return image_mask
103
104
def create_nodule_mask(imagePath, cands, fcount, subsetnum,final_lung_mask,final_nodule_mask):
105
    #if os.path.isfile(imagePath.replace('original',SAVE_FOLDER_image)) == False:
106
    img, origin, spacing = load_itk(imagePath)
107
    #calculate resize factor
108
    RESIZE_SPACING = [1, 1, 1]
109
    resize_factor = spacing / RESIZE_SPACING
110
    new_real_shape = img.shape * resize_factor
111
    new_shape = np.round(new_real_shape)
112
    real_resize = new_shape / img.shape
113
    new_spacing = spacing / real_resize
114
    
115
    #resize image
116
    lung_img = scipy.ndimage.interpolation.zoom(img, real_resize)
117
    
118
    # Segment the lung structure
119
    lung_img = lung_img + 1024
120
    lung_mask = segment_lung_from_ct_scan(lung_img)
121
    lung_img = lung_img - 1024
122
123
    #create nodule mask
124
    nodule_mask = draw_circles(lung_img,cands,origin,new_spacing)
125
126
    lung_img_512, lung_mask_512, nodule_mask_512 = np.zeros((lung_img.shape[0], 512, 512)), np.zeros((lung_mask.shape[0], 512, 512)), np.zeros((nodule_mask.shape[0], 512, 512))
127
128
    original_shape = lung_img.shape 
129
    i_start = 0
130
    i_end = 0
131
    flag = 0
132
    for z in range(lung_img.shape[0]):
133
        offset = (512 - original_shape[1])
134
        upper_offset = int(np.round(offset/2))
135
        lower_offset = int(offset - upper_offset)
136
137
        new_origin = voxel_2_world([-upper_offset,-lower_offset,0],origin,new_spacing)
138
139
        lung_mask_512[z, upper_offset:-lower_offset,upper_offset:-lower_offset] = lung_mask[z,:,:]
140
        nodule_mask_512[z, upper_offset:-lower_offset,upper_offset:-lower_offset] = nodule_mask[z,:,:]
141
142
    # return final_lung_mask,final_nodule_mask
143
    # save images.
144
    np.save(os.path.join(OUTPUT_PATH,"lung_mask_%04d_%04d.npy" % (subsetnum, fcount)),lung_mask_512)
145
    np.save(os.path.join(OUTPUT_PATH,"nodule_mask_%04d_%04d.npy" % (subsetnum, fcount)),nodule_mask_512)
146
147
148
149
# Helper function to get rows in data frame associated with each file
150
def get_filename(file_list, case):
151
    for f in file_list:
152
        if case in f:
153
            return(f)
154
155
# Getting list of image files
156
LUNA_DATA_PATH = '/home/marshallee/Documents/lung/'
157
OUTPUT_PATH = '/home/marshallee/Documents/lung/output'
158
159
final_lung_mask = np.zeros((1,512,512))
160
final_nodule_mask = np.zeros((1,512,512))
161
162
# create a list of subsets, which are lists of file paths
163
FILE_LIST = []
164
for i in range(0, 9):
165
    LUNA_SUBSET_PATH = LUNA_DATA_PATH + 'subset'+str(i)+'/'
166
    FILE_LIST.append(glob(LUNA_SUBSET_PATH + '*.mhd'))
167
168
169
for subsetnum, subsetlist in enumerate(FILE_LIST):
170
    # The locations of the nodes
171
    df_node = pd.read_csv(LUNA_DATA_PATH + "mask-generate/CSVFILES/annotations.csv")
172
    #df_node = pd.read_csv(LUNA_DATA_PATH + "CSVFILES/annotations.csv")
173
    df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(subsetlist, file_name))
174
    df_node = df_node.dropna()
175
176
    # Looping over the image files
177
    for fcount, img_file in enumerate(tqdm(subsetlist)):
178
        mini_df = df_node[df_node["file"]==img_file] # get all nodules associate with file
179
        if mini_df.shape[0]>0: # some files may not have a nodule--skipping those
180
            # feeding mini_df to the function will work for "cands"
181
            final_lung_mask, final_nodule_mask =create_nodule_mask(img_file, mini_df, fcount, subsetnum,final_lung_mask,final_nodule_mask)
182
183
final_lung_mask = final_lung_mask[1:]
184
final_nodule_mask = final_nodule_mask[1:]
185
print(final_lung_mask.shape)
186
print(final_nodule_mask.shape)
187
np.save(os.path.join(OUTPUT_PATH,'final_lung_mask.npy'),final_lung_mask)
188
np.save(os.path.join(OUTPUT_PATH,'final_nodule_mask.npy'),final_nodule_mask)
189