[95f789]: / src / preprocessing_3w.py

Download this file

186 lines (157 with data), 5.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
import numpy as np
import pandas as pd
import os
import click
import glob
import cv2
import pydicom
from tqdm import tqdm
from joblib import delayed, Parallel
import random
import pydicom
from scipy import ndimage
import pydicom
from skimage import exposure
def window_image(img, window_center, window_width, intercept, slope):
img = (img * slope + intercept)
img_min = window_center - window_width // 2
img_max = window_center + window_width // 2
img[img < img_min] = img_min
img[img > img_max] = img_max
return img
def get_first_of_dicom_field_as_int(x):
# get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
if type(x) == pydicom.multival.MultiValue:
return int(x[0])
else:
return int(x)
def get_windowing(data):
dicom_fields = [data[('0028', '1050')].value, # window center
data[('0028', '1051')].value, # window width
data[('0028', '1052')].value, # intercept
data[('0028', '1053')].value] # slope
return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]
@click.group()
def cli():
print("CLI")
windows_range = {
'brain': [40, 80],
'bone': [600, 2800],
'subdual': [75, 215]
}
def refine_label(label_mask):
label_mask = label_mask.astype(np.bool)
# Fill hole
label_mask = ndimage.binary_fill_holes(label_mask)
# Get largest connected component
label_im, nb_labels = ndimage.label(label_mask)
sizes = ndimage.sum(label_mask, label_im, range(nb_labels + 1))
mask_size = sizes < max(sizes)
remove_pixel = mask_size[label_im]
label_im[remove_pixel] = 0
labels = np.unique(label_im)
label_mask = np.searchsorted(labels, label_im)
return label_mask
def cut_edge(image, keep_margin):
'''
function that cuts zero edge
'''
H, W = image.shape
H_s, H_e = 0, H - 1
W_s, W_e = 0, W - 1
while H_s < H:
if image[H_s, :].sum() != 0:
break
H_s += 1
while H_e > H_s:
if image[H_e, :].sum() != 0:
break
H_e -= 1
while W_s < W:
if image[:, W_s].sum() != 0:
break
W_s += 1
while W_e > W_s:
if image[:, W_e].sum() != 0:
break
W_e -= 1
if keep_margin != 0:
H_s = max(0, H_s - keep_margin)
H_e = min(H - 1, H_e + keep_margin)
W_s = max(0, W_s - keep_margin)
W_e = min(W - 1, W_e + keep_margin)
return int(H_s), int(H_e) + 1, int(W_s), int(W_e) + 1
def pre_preocessing(image, pad_size=(512, 512)):
# Convert to [0, 255]
# image = (image-image.min()) / (image.max() - image.min())
# image= image*255
image[image < 0] = 0
# Remove unwanted region
mask = image > 0
mask = refine_label(mask)
image = image * mask
# Center crop and pad to size
# mask = image>0
# min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, 32)
# image = image[min_H_s: max_H_e, min_W_s:max_W_e]
# Pad to size
H, W = image.shape
pad_H, pad_W = pad_size[0], pad_size[1]
pad_H0 = max((pad_H - H) // 2, 0)
pad_H1 = max(pad_H - H - pad_H0, 0)
pad_W0 = max((pad_W - W) // 2, 0)
pad_W1 = max(pad_W - W - pad_W0, 0)
image = np.pad(image, [(pad_H0, pad_H1), (pad_W0, pad_W1)], mode='constant', constant_values=0)
return image
def convert_dicom_to_jpg(dicomfile, outputdir):
try:
data = pydicom.read_file(dicomfile)
image = data.pixel_array
window_center, window_width, intercept, slope = get_windowing(data)
id = dicomfile.split("/")[-1].split(".")[0]
images = []
# count =0
for k, v in windows_range.items():
image_windowed = window_image(image, v[0], v[1], intercept, slope)
image_windowed = pre_preocessing(image_windowed, pad_size=(512, 512))
images.append(image_windowed)
# image_windowed = exposure.equalize_adapthist(image_windowed, clip_limit=0.01)
# min_value= image_windowed.min()
# max_value = image_windowed.max()
# print (image_windowed.min(),image_windowed.max())
# if count ==0:
# image_windowed=np.uint8(image_windowed)
# clahe = cv2.createCLAHE(clipLimit = 1.0, tileGridSize = (8,8))
# image_windowed = clahe.apply(image_windowed)
# images.append(image_windowed)
# print (image_windowed.min(),image_windowed.max())
# count +=1
images = np.asarray(images).transpose((1, 2, 0))
# print (images.shape)
output_image = os.path.join(outputdir, id + ".jpg")
cv2.imwrite(output_image, images)
except:
print(dicomfile)
@cli.command()
@click.option('--inputdir', type=str)
@click.option('--outputdir', type=str)
def extract_images(
inputdir,
outputdir,
):
os.makedirs(outputdir, exist_ok=True)
files = glob.glob(inputdir + "/*.dcm")
Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files)))
def split_by_patient(
train_csv,
train_meta_csv,
n_folds,
outdir
):
os.makedirs(outdir, exist_ok=True)
train_df = pd.read_csv(train_csv)
train_meta_df = pd.read_csv(train_meta_csv)
train_meta_df['ID'] = train_meta_df['ID'].apply(lambda x: "_".join(x.split("_")[:2]))
train_meta_df = train_meta_df[['ID', 'PatientID']]
if __name__ == '__main__':
cli()