|
a |
|
b/src/preprocessing2.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as pd |
|
|
3 |
from PIL import Image, ImageFile |
|
|
4 |
from scipy import ndimage |
|
|
5 |
import pydicom |
|
|
6 |
import os |
|
|
7 |
from tqdm import tqdm |
|
|
8 |
from time import time |
|
|
9 |
from joblib import Parallel, delayed |
|
|
10 |
ImageFile.LOAD_TRUNCATED_IMAGES = True |
|
|
11 |
|
|
|
12 |
|
|
|
13 |
data_path = "/home/lab/bac/kaggle_data/rsna/" |
|
|
14 |
|
|
|
15 |
|
|
|
16 |
class CropHead(object): |
|
|
17 |
def __init__(self, offset=5): |
|
|
18 |
""" |
|
|
19 |
Originally made as a image transform, but too slow to run on the fly :( |
|
|
20 |
:param offset: Pixel offset to apply to the crop so that it isn't too tight |
|
|
21 |
""" |
|
|
22 |
self.offset = offset |
|
|
23 |
|
|
|
24 |
def __call__(self, img): |
|
|
25 |
""" |
|
|
26 |
Crops a CT image to so that as much black area is removed as possible |
|
|
27 |
:param img: PIL image |
|
|
28 |
:return: Cropped image |
|
|
29 |
""" |
|
|
30 |
try: |
|
|
31 |
if type(img) != np.array: |
|
|
32 |
img_array = np.array(img) |
|
|
33 |
|
|
|
34 |
labeled_blobs, number_of_blobs = ndimage.label(img_array) |
|
|
35 |
blob_sizes = np.bincount(labeled_blobs.flatten()) |
|
|
36 |
head_blob = labeled_blobs == np.argmax(blob_sizes[1:]) + 1 # The number of the head blob |
|
|
37 |
head_blob = np.max(head_blob, axis=-1) |
|
|
38 |
|
|
|
39 |
mask = head_blob == 0 |
|
|
40 |
rows = np.flatnonzero((~mask).sum(axis=1)) |
|
|
41 |
cols = np.flatnonzero((~mask).sum(axis=0)) |
|
|
42 |
|
|
|
43 |
x_min = max([rows.min() - self.offset, 0]) |
|
|
44 |
x_max = min([rows.max() + self.offset + 1, img_array.shape[0]]) |
|
|
45 |
y_min = max([cols.min() - self.offset, 0]) |
|
|
46 |
y_max = min([cols.max() + self.offset + 1, img_array.shape[1]]) |
|
|
47 |
|
|
|
48 |
return Image.fromarray(np.uint8(img_array[x_min:x_max, y_min:y_max])) |
|
|
49 |
except ValueError: |
|
|
50 |
return img |
|
|
51 |
|
|
|
52 |
def __repr__(self): |
|
|
53 |
return self.__class__.__name__ + '(offset={})'.format(self.offset) |
|
|
54 |
|
|
|
55 |
|
|
|
56 |
def window_img(dcm, width=None, level=None, norm=True): |
|
|
57 |
try: |
|
|
58 |
pixels = dcm.pixel_array.astype(np.float32) * dcm.RescaleSlope + dcm.RescaleIntercept |
|
|
59 |
except ValueError: |
|
|
60 |
return np.zeros((512, 512)) |
|
|
61 |
|
|
|
62 |
# Pad the image if it isn't square |
|
|
63 |
if pixels.shape[0] != pixels.shape[1]: |
|
|
64 |
(a, b) = pixels.shape |
|
|
65 |
if a > b: |
|
|
66 |
padding = ((0, 0), ((a - b) // 2, (a - b) // 2)) |
|
|
67 |
else: |
|
|
68 |
padding = (((b - a) // 2, (b - a) // 2), (0, 0)) |
|
|
69 |
pixels = np.pad(pixels, padding, mode='constant', constant_values=0) |
|
|
70 |
|
|
|
71 |
if not width: |
|
|
72 |
width = dcm.WindowWidth |
|
|
73 |
if type(width) != pydicom.valuerep.DSfloat: |
|
|
74 |
width = width[0] |
|
|
75 |
if not level: |
|
|
76 |
level = dcm.WindowCenter |
|
|
77 |
if type(level) != pydicom.valuerep.DSfloat: |
|
|
78 |
level = level[0] |
|
|
79 |
lower = level - (width / 2) |
|
|
80 |
upper = level + (width / 2) |
|
|
81 |
img = np.clip(pixels, lower, upper) |
|
|
82 |
|
|
|
83 |
if norm: |
|
|
84 |
return (img - lower) / (upper - lower) |
|
|
85 |
else: |
|
|
86 |
return img |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
def dcm_to_png(row, image_dirs, dataset, width, level, crop, crop_head, output_path): |
|
|
90 |
r_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["red"] + ".dcm")) |
|
|
91 |
g_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["green"] + ".dcm")) |
|
|
92 |
b_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["blue"] + ".dcm")) |
|
|
93 |
r = window_img(r_dcm, width, level) |
|
|
94 |
g = window_img(g_dcm, width, level) |
|
|
95 |
b = window_img(b_dcm, width, level) |
|
|
96 |
img = np.stack([r, g, b], -1) |
|
|
97 |
img = (img * 255).astype(np.uint8) |
|
|
98 |
im = Image.fromarray(img) |
|
|
99 |
|
|
|
100 |
if crop: |
|
|
101 |
im = crop_head(im) |
|
|
102 |
|
|
|
103 |
im.save(os.path.join(output_path, row["green"] + ".png")) |
|
|
104 |
|
|
|
105 |
|
|
|
106 |
def prepare_png_images(dataset, folder_name, width=None, level=None, crop=True): |
|
|
107 |
start = time() |
|
|
108 |
|
|
|
109 |
triplet_dfs = { |
|
|
110 |
"train": os.path.join(data_path, "train_triplets.csv"), |
|
|
111 |
"test_stage_1": os.path.join(data_path, "stage_1_test_triplets.csv") |
|
|
112 |
} |
|
|
113 |
|
|
|
114 |
image_dirs = { |
|
|
115 |
"train": os.path.join(data_path, "stage_1_train_images"), |
|
|
116 |
"test_stage_1": os.path.join(data_path, "stage_1_test_images") |
|
|
117 |
} |
|
|
118 |
|
|
|
119 |
output_path = os.path.join(data_path, "png", dataset, f"{folder_name}") |
|
|
120 |
|
|
|
121 |
if not os.path.exists(output_path): |
|
|
122 |
os.makedirs(output_path) |
|
|
123 |
|
|
|
124 |
triplets = pd.read_csv(triplet_dfs[dataset]) |
|
|
125 |
crop_head = CropHead() |
|
|
126 |
|
|
|
127 |
Parallel(n_jobs=4)(delayed(dcm_to_png)(row, image_dirs, dataset, width, level, crop, crop_head, output_path) for _, row in tqdm( |
|
|
128 |
tqdm(triplets.iterrows(), total=len(triplets), desc=dataset) |
|
|
129 |
)) |
|
|
130 |
|
|
|
131 |
# for _, row in tqdm(triplets.iterrows(), total=len(triplets), desc=dataset): |
|
|
132 |
# r_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["red"] + ".dcm")) |
|
|
133 |
# g_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["green"] + ".dcm")) |
|
|
134 |
# b_dcm = pydicom.dcmread(os.path.join(image_dirs[dataset], row["blue"] + ".dcm")) |
|
|
135 |
# r = window_img(r_dcm, width, level) |
|
|
136 |
# g = window_img(g_dcm, width, level) |
|
|
137 |
# b = window_img(b_dcm, width, level) |
|
|
138 |
# img = np.stack([r, g, b], -1) |
|
|
139 |
# img = (img * 255).astype(np.uint8) |
|
|
140 |
# im = Image.fromarray(img) |
|
|
141 |
# |
|
|
142 |
# if crop: |
|
|
143 |
# im = crop_head(im) |
|
|
144 |
# |
|
|
145 |
# im.save(os.path.join(output_path, row["green"] + ".png")) |
|
|
146 |
|
|
|
147 |
print("Done in", (time() - start) // 60, "minutes") |
|
|
148 |
|
|
|
149 |
|
|
|
150 |
if __name__ == '__main__': |
|
|
151 |
prepare_png_images("train", "adjacent-brain-cropped", 80, 40, crop=True) |
|
|
152 |
# prepare_png_images("test_stage_1", "adjacent-brain-cropped", 80, 40, crop=True) |