|
a |
|
b/code/preprocessing.py |
|
|
1 |
""" |
|
|
2 |
Preprocessing for U-net |
|
|
3 |
Thresholding and mask the lung part |
|
|
4 |
Use annotation to mask the nodules |
|
|
5 |
""" |
|
|
6 |
|
|
|
7 |
import numpy as np |
|
|
8 |
import pandas as pd |
|
|
9 |
import os |
|
|
10 |
|
|
|
11 |
from skimage.segmentation import clear_border |
|
|
12 |
from skimage.measure import label,regionprops, perimeter |
|
|
13 |
from skimage.morphology import ball, disk, dilation, binary_erosion, remove_small_objects, erosion, closing, reconstruction, binary_closing |
|
|
14 |
from skimage.filters import roberts, sobel |
|
|
15 |
from scipy import ndimage as ndi |
|
|
16 |
|
|
|
17 |
from glob import glob |
|
|
18 |
from tqdm import tqdm |
|
|
19 |
|
|
|
20 |
import SimpleITK as sitk |
|
|
21 |
import scipy.misc |
|
|
22 |
import matplotlib.pyplot as plt |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
def get_segmented_lungs(im, plot=False): |
|
|
26 |
binary = im < 604 |
|
|
27 |
cleared = clear_border(binary) |
|
|
28 |
label_image = label(cleared) |
|
|
29 |
areas = [r.area for r in regionprops(label_image)] |
|
|
30 |
areas.sort() |
|
|
31 |
if len(areas) > 2: |
|
|
32 |
for region in regionprops(label_image): |
|
|
33 |
if region.area < areas[-2]: |
|
|
34 |
for coordinates in region.coords: |
|
|
35 |
label_image[coordinates[0], coordinates[1]] = 0 |
|
|
36 |
binary = label_image > 0 |
|
|
37 |
selem = disk(2) |
|
|
38 |
binary = binary_erosion(binary, selem) |
|
|
39 |
selem = disk(10) |
|
|
40 |
binary = binary_closing(binary, selem) |
|
|
41 |
edges = roberts(binary) |
|
|
42 |
binary = ndi.binary_fill_holes(edges) |
|
|
43 |
get_high_vals = binary == 0 |
|
|
44 |
im[get_high_vals] = 0 |
|
|
45 |
return im |
|
|
46 |
|
|
|
47 |
def segment_lung_from_ct_scan(ct_scan): |
|
|
48 |
return np.asarray([get_segmented_lungs(slice) for slice in ct_scan]) |
|
|
49 |
|
|
|
50 |
def load_itk(filename): |
|
|
51 |
itkimage = sitk.ReadImage(filename) |
|
|
52 |
ct_scan = sitk.GetArrayFromImage(itkimage) |
|
|
53 |
origin = np.array(list(reversed(itkimage.GetOrigin()))) |
|
|
54 |
spacing = np.array(list(reversed(itkimage.GetSpacing()))) |
|
|
55 |
return ct_scan, origin, spacing |
|
|
56 |
|
|
|
57 |
def world_2_voxel(world_coordinates, origin, spacing): |
|
|
58 |
stretched_voxel_coordinates = np.absolute(world_coordinates - origin) |
|
|
59 |
voxel_coordinates = stretched_voxel_coordinates / spacing |
|
|
60 |
return voxel_coordinates |
|
|
61 |
|
|
|
62 |
def voxel_2_world(voxel_coordinates, origin, spacing): |
|
|
63 |
stretched_voxel_coordinates = voxel_coordinates * spacing |
|
|
64 |
world_coordinates = stretched_voxel_coordinates + origin |
|
|
65 |
return world_coordinates |
|
|
66 |
|
|
|
67 |
def seq(start, stop, step=1): |
|
|
68 |
n = int(round((stop - start)/float(step))) |
|
|
69 |
if n > 1: |
|
|
70 |
return([start + step*i for i in range(n+1)]) |
|
|
71 |
else: |
|
|
72 |
return([]) |
|
|
73 |
|
|
|
74 |
def draw_circles(image,cands,origin,spacing): |
|
|
75 |
#make empty matrix, which will be filled with the mask |
|
|
76 |
RESIZE_SPACING = [1, 1, 1] |
|
|
77 |
image_mask = np.zeros(image.shape) |
|
|
78 |
|
|
|
79 |
#run over all the nodules in the lungs |
|
|
80 |
for ca in cands.values: |
|
|
81 |
#get middel x-,y-, and z-worldcoordinate of the nodule |
|
|
82 |
radius = np.ceil(ca[4])/2 |
|
|
83 |
coord_x = ca[1] |
|
|
84 |
coord_y = ca[2] |
|
|
85 |
coord_z = ca[3] |
|
|
86 |
image_coord = np.array((coord_z,coord_y,coord_x)) |
|
|
87 |
|
|
|
88 |
#determine voxel coordinate given the worldcoordinate |
|
|
89 |
image_coord = world_2_voxel(image_coord,origin,spacing) |
|
|
90 |
|
|
|
91 |
#determine the range of the nodule |
|
|
92 |
noduleRange = seq(-radius, radius, RESIZE_SPACING[0]) |
|
|
93 |
|
|
|
94 |
#create the mask |
|
|
95 |
for x in noduleRange: |
|
|
96 |
for y in noduleRange: |
|
|
97 |
for z in noduleRange: |
|
|
98 |
coords = world_2_voxel(np.array((coord_z+z,coord_y+y,coord_x+x)),origin,spacing) |
|
|
99 |
if (np.linalg.norm(image_coord-coords) * RESIZE_SPACING[0]) < radius: |
|
|
100 |
image_mask[int(np.round(coords[0])),int(np.round(coords[1])),int(np.round(coords[2]))] = int(1) |
|
|
101 |
|
|
|
102 |
return image_mask |
|
|
103 |
|
|
|
104 |
def create_nodule_mask(imagePath, cands, fcount, subsetnum,final_lung_mask,final_nodule_mask): |
|
|
105 |
#if os.path.isfile(imagePath.replace('original',SAVE_FOLDER_image)) == False: |
|
|
106 |
img, origin, spacing = load_itk(imagePath) |
|
|
107 |
#calculate resize factor |
|
|
108 |
RESIZE_SPACING = [1, 1, 1] |
|
|
109 |
resize_factor = spacing / RESIZE_SPACING |
|
|
110 |
new_real_shape = img.shape * resize_factor |
|
|
111 |
new_shape = np.round(new_real_shape) |
|
|
112 |
real_resize = new_shape / img.shape |
|
|
113 |
new_spacing = spacing / real_resize |
|
|
114 |
|
|
|
115 |
#resize image |
|
|
116 |
lung_img = scipy.ndimage.interpolation.zoom(img, real_resize) |
|
|
117 |
|
|
|
118 |
# Segment the lung structure |
|
|
119 |
lung_img = lung_img + 1024 |
|
|
120 |
lung_mask = segment_lung_from_ct_scan(lung_img) |
|
|
121 |
lung_img = lung_img - 1024 |
|
|
122 |
|
|
|
123 |
#create nodule mask |
|
|
124 |
nodule_mask = draw_circles(lung_img,cands,origin,new_spacing) |
|
|
125 |
|
|
|
126 |
lung_img_512, lung_mask_512, nodule_mask_512 = np.zeros((lung_img.shape[0], 512, 512)), np.zeros((lung_mask.shape[0], 512, 512)), np.zeros((nodule_mask.shape[0], 512, 512)) |
|
|
127 |
|
|
|
128 |
original_shape = lung_img.shape |
|
|
129 |
i_start = 0 |
|
|
130 |
i_end = 0 |
|
|
131 |
flag = 0 |
|
|
132 |
for z in range(lung_img.shape[0]): |
|
|
133 |
offset = (512 - original_shape[1]) |
|
|
134 |
upper_offset = int(np.round(offset/2)) |
|
|
135 |
lower_offset = int(offset - upper_offset) |
|
|
136 |
|
|
|
137 |
new_origin = voxel_2_world([-upper_offset,-lower_offset,0],origin,new_spacing) |
|
|
138 |
|
|
|
139 |
lung_mask_512[z, upper_offset:-lower_offset,upper_offset:-lower_offset] = lung_mask[z,:,:] |
|
|
140 |
nodule_mask_512[z, upper_offset:-lower_offset,upper_offset:-lower_offset] = nodule_mask[z,:,:] |
|
|
141 |
|
|
|
142 |
# return final_lung_mask,final_nodule_mask |
|
|
143 |
# save images. |
|
|
144 |
np.save(os.path.join(OUTPUT_PATH,"lung_mask_%04d_%04d.npy" % (subsetnum, fcount)),lung_mask_512) |
|
|
145 |
np.save(os.path.join(OUTPUT_PATH,"nodule_mask_%04d_%04d.npy" % (subsetnum, fcount)),nodule_mask_512) |
|
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
|
149 |
# Helper function to get rows in data frame associated with each file |
|
|
150 |
def get_filename(file_list, case): |
|
|
151 |
for f in file_list: |
|
|
152 |
if case in f: |
|
|
153 |
return(f) |
|
|
154 |
|
|
|
155 |
# Getting list of image files |
|
|
156 |
LUNA_DATA_PATH = '/home/marshallee/Documents/lung/' |
|
|
157 |
OUTPUT_PATH = '/home/marshallee/Documents/lung/output' |
|
|
158 |
|
|
|
159 |
final_lung_mask = np.zeros((1,512,512)) |
|
|
160 |
final_nodule_mask = np.zeros((1,512,512)) |
|
|
161 |
|
|
|
162 |
# create a list of subsets, which are lists of file paths |
|
|
163 |
FILE_LIST = [] |
|
|
164 |
for i in range(0, 9): |
|
|
165 |
LUNA_SUBSET_PATH = LUNA_DATA_PATH + 'subset'+str(i)+'/' |
|
|
166 |
FILE_LIST.append(glob(LUNA_SUBSET_PATH + '*.mhd')) |
|
|
167 |
|
|
|
168 |
|
|
|
169 |
for subsetnum, subsetlist in enumerate(FILE_LIST): |
|
|
170 |
# The locations of the nodes |
|
|
171 |
df_node = pd.read_csv(LUNA_DATA_PATH + "mask-generate/CSVFILES/annotations.csv") |
|
|
172 |
#df_node = pd.read_csv(LUNA_DATA_PATH + "CSVFILES/annotations.csv") |
|
|
173 |
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(subsetlist, file_name)) |
|
|
174 |
df_node = df_node.dropna() |
|
|
175 |
|
|
|
176 |
# Looping over the image files |
|
|
177 |
for fcount, img_file in enumerate(tqdm(subsetlist)): |
|
|
178 |
mini_df = df_node[df_node["file"]==img_file] # get all nodules associate with file |
|
|
179 |
if mini_df.shape[0]>0: # some files may not have a nodule--skipping those |
|
|
180 |
# feeding mini_df to the function will work for "cands" |
|
|
181 |
final_lung_mask, final_nodule_mask =create_nodule_mask(img_file, mini_df, fcount, subsetnum,final_lung_mask,final_nodule_mask) |
|
|
182 |
|
|
|
183 |
final_lung_mask = final_lung_mask[1:] |
|
|
184 |
final_nodule_mask = final_nodule_mask[1:] |
|
|
185 |
print(final_lung_mask.shape) |
|
|
186 |
print(final_nodule_mask.shape) |
|
|
187 |
np.save(os.path.join(OUTPUT_PATH,'final_lung_mask.npy'),final_lung_mask) |
|
|
188 |
np.save(os.path.join(OUTPUT_PATH,'final_nodule_mask.npy'),final_nodule_mask) |
|
|
189 |
|