import numpy as np
import pandas as pd
import os
import click
import glob
import cv2
import pydicom
from tqdm import tqdm
from joblib import delayed, Parallel
import random
import pydicom
from scipy import ndimage
import pydicom
from skimage import exposure
def window_image(img, window_center, window_width, intercept, slope):
img = (img * slope + intercept)
img_min = window_center - window_width // 2
img_max = window_center + window_width // 2
img[img < img_min] = img_min
img[img > img_max] = img_max
return img
def get_first_of_dicom_field_as_int(x):
# get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
if type(x) == pydicom.multival.MultiValue:
return int(x[0])
else:
return int(x)
def get_windowing(data):
dicom_fields = [data[('0028', '1050')].value, # window center
data[('0028', '1051')].value, # window width
data[('0028', '1052')].value, # intercept
data[('0028', '1053')].value] # slope
return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]
@click.group()
def cli():
print("CLI")
windows_range = {
'brain': [40, 80],
'bone': [600, 2800],
'subdual': [75, 215]
}
def refine_label(label_mask):
label_mask = label_mask.astype(np.bool)
# Fill hole
label_mask = ndimage.binary_fill_holes(label_mask)
# Get largest connected component
label_im, nb_labels = ndimage.label(label_mask)
sizes = ndimage.sum(label_mask, label_im, range(nb_labels + 1))
mask_size = sizes < max(sizes)
remove_pixel = mask_size[label_im]
label_im[remove_pixel] = 0
labels = np.unique(label_im)
label_mask = np.searchsorted(labels, label_im)
return label_mask
def cut_edge(image, keep_margin):
'''
function that cuts zero edge
'''
H, W = image.shape
H_s, H_e = 0, H - 1
W_s, W_e = 0, W - 1
while H_s < H:
if image[H_s, :].sum() != 0:
break
H_s += 1
while H_e > H_s:
if image[H_e, :].sum() != 0:
break
H_e -= 1
while W_s < W:
if image[:, W_s].sum() != 0:
break
W_s += 1
while W_e > W_s:
if image[:, W_e].sum() != 0:
break
W_e -= 1
if keep_margin != 0:
H_s = max(0, H_s - keep_margin)
H_e = min(H - 1, H_e + keep_margin)
W_s = max(0, W_s - keep_margin)
W_e = min(W - 1, W_e + keep_margin)
return int(H_s), int(H_e) + 1, int(W_s), int(W_e) + 1
def pre_preocessing(image, pad_size=(512, 512)):
# Convert to [0, 255]
# image = (image-image.min()) / (image.max() - image.min())
# image= image*255
image[image < 0] = 0
# Remove unwanted region
mask = image > 0
mask = refine_label(mask)
image = image * mask
# Center crop and pad to size
# mask = image>0
# min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, 32)
# image = image[min_H_s: max_H_e, min_W_s:max_W_e]
# Pad to size
H, W = image.shape
pad_H, pad_W = pad_size[0], pad_size[1]
pad_H0 = max((pad_H - H) // 2, 0)
pad_H1 = max(pad_H - H - pad_H0, 0)
pad_W0 = max((pad_W - W) // 2, 0)
pad_W1 = max(pad_W - W - pad_W0, 0)
image = np.pad(image, [(pad_H0, pad_H1), (pad_W0, pad_W1)], mode='constant', constant_values=0)
return image
def convert_dicom_to_jpg(dicomfile, outputdir):
try:
data = pydicom.read_file(dicomfile)
image = data.pixel_array
window_center, window_width, intercept, slope = get_windowing(data)
id = dicomfile.split("/")[-1].split(".")[0]
images = []
# count =0
for k, v in windows_range.items():
image_windowed = window_image(image, v[0], v[1], intercept, slope)
image_windowed = pre_preocessing(image_windowed, pad_size=(512, 512))
images.append(image_windowed)
# image_windowed = exposure.equalize_adapthist(image_windowed, clip_limit=0.01)
# min_value= image_windowed.min()
# max_value = image_windowed.max()
# print (image_windowed.min(),image_windowed.max())
# if count ==0:
# image_windowed=np.uint8(image_windowed)
# clahe = cv2.createCLAHE(clipLimit = 1.0, tileGridSize = (8,8))
# image_windowed = clahe.apply(image_windowed)
# images.append(image_windowed)
# print (image_windowed.min(),image_windowed.max())
# count +=1
images = np.asarray(images).transpose((1, 2, 0))
# print (images.shape)
output_image = os.path.join(outputdir, id + ".jpg")
cv2.imwrite(output_image, images)
except:
print(dicomfile)
@cli.command()
@click.option('--inputdir', type=str)
@click.option('--outputdir', type=str)
def extract_images(
inputdir,
outputdir,
):
os.makedirs(outputdir, exist_ok=True)
files = glob.glob(inputdir + "/*.dcm")
Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files)))
def split_by_patient(
train_csv,
train_meta_csv,
n_folds,
outdir
):
os.makedirs(outdir, exist_ok=True)
train_df = pd.read_csv(train_csv)
train_meta_df = pd.read_csv(train_meta_csv)
train_meta_df['ID'] = train_meta_df['ID'].apply(lambda x: "_".join(x.split("_")[:2]))
train_meta_df = train_meta_df[['ID', 'PatientID']]
if __name__ == '__main__':
cli()