--- a +++ b/src/preprocessing_3w.py @@ -0,0 +1,186 @@ +import numpy as np +import pandas as pd +import os +import click +import glob +import cv2 +import pydicom +from tqdm import tqdm +from joblib import delayed, Parallel +import random +import pydicom +from scipy import ndimage +import pydicom +from skimage import exposure + + +def window_image(img, window_center, window_width, intercept, slope): + img = (img * slope + intercept) + img_min = window_center - window_width // 2 + img_max = window_center + window_width // 2 + img[img < img_min] = img_min + img[img > img_max] = img_max + return img + + +def get_first_of_dicom_field_as_int(x): + # get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x) + if type(x) == pydicom.multival.MultiValue: + return int(x[0]) + else: + return int(x) + + +def get_windowing(data): + dicom_fields = [data[('0028', '1050')].value, # window center + data[('0028', '1051')].value, # window width + data[('0028', '1052')].value, # intercept + data[('0028', '1053')].value] # slope + return [get_first_of_dicom_field_as_int(x) for x in dicom_fields] + + +@click.group() +def cli(): + print("CLI") + + +windows_range = { + 'brain': [40, 80], + 'bone': [600, 2800], + 'subdual': [75, 215] +} + + +def refine_label(label_mask): + label_mask = label_mask.astype(np.bool) + # Fill hole + label_mask = ndimage.binary_fill_holes(label_mask) + # Get largest connected component + label_im, nb_labels = ndimage.label(label_mask) + sizes = ndimage.sum(label_mask, label_im, range(nb_labels + 1)) + mask_size = sizes < max(sizes) + remove_pixel = mask_size[label_im] + label_im[remove_pixel] = 0 + labels = np.unique(label_im) + label_mask = np.searchsorted(labels, label_im) + return label_mask + + +def cut_edge(image, keep_margin): + ''' + function that cuts zero edge + ''' + H, W = image.shape + H_s, H_e = 0, H - 1 + W_s, W_e = 0, W - 1 + + while H_s < H: + if image[H_s, :].sum() != 0: + break + H_s += 1 + while H_e > H_s: + if image[H_e, :].sum() != 0: + break + H_e -= 1 + while W_s < W: + if image[:, W_s].sum() != 0: + break + W_s += 1 + while W_e > W_s: + if image[:, W_e].sum() != 0: + break + W_e -= 1 + if keep_margin != 0: + H_s = max(0, H_s - keep_margin) + H_e = min(H - 1, H_e + keep_margin) + W_s = max(0, W_s - keep_margin) + W_e = min(W - 1, W_e + keep_margin) + return int(H_s), int(H_e) + 1, int(W_s), int(W_e) + 1 + + +def pre_preocessing(image, pad_size=(512, 512)): + # Convert to [0, 255] + # image = (image-image.min()) / (image.max() - image.min()) + # image= image*255 + image[image < 0] = 0 + # Remove unwanted region + mask = image > 0 + mask = refine_label(mask) + image = image * mask + # Center crop and pad to size + # mask = image>0 + # min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, 32) + # image = image[min_H_s: max_H_e, min_W_s:max_W_e] + # Pad to size + H, W = image.shape + pad_H, pad_W = pad_size[0], pad_size[1] + pad_H0 = max((pad_H - H) // 2, 0) + pad_H1 = max(pad_H - H - pad_H0, 0) + pad_W0 = max((pad_W - W) // 2, 0) + pad_W1 = max(pad_W - W - pad_W0, 0) + image = np.pad(image, [(pad_H0, pad_H1), (pad_W0, pad_W1)], mode='constant', constant_values=0) + return image + + +def convert_dicom_to_jpg(dicomfile, outputdir): + try: + data = pydicom.read_file(dicomfile) + image = data.pixel_array + window_center, window_width, intercept, slope = get_windowing(data) + id = dicomfile.split("/")[-1].split(".")[0] + + images = [] + # count =0 + + for k, v in windows_range.items(): + image_windowed = window_image(image, v[0], v[1], intercept, slope) + image_windowed = pre_preocessing(image_windowed, pad_size=(512, 512)) + images.append(image_windowed) + + # image_windowed = exposure.equalize_adapthist(image_windowed, clip_limit=0.01) + # min_value= image_windowed.min() + # max_value = image_windowed.max() + # print (image_windowed.min(),image_windowed.max()) + # if count ==0: + # image_windowed=np.uint8(image_windowed) + # clahe = cv2.createCLAHE(clipLimit = 1.0, tileGridSize = (8,8)) + # image_windowed = clahe.apply(image_windowed) + # images.append(image_windowed) + # print (image_windowed.min(),image_windowed.max()) + # count +=1 + images = np.asarray(images).transpose((1, 2, 0)) + # print (images.shape) + + output_image = os.path.join(outputdir, id + ".jpg") + cv2.imwrite(output_image, images) + except: + print(dicomfile) + + +@cli.command() +@click.option('--inputdir', type=str) +@click.option('--outputdir', type=str) +def extract_images( + inputdir, + outputdir, +): + os.makedirs(outputdir, exist_ok=True) + files = glob.glob(inputdir + "/*.dcm") + Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files))) + + +def split_by_patient( + train_csv, + train_meta_csv, + n_folds, + outdir +): + os.makedirs(outdir, exist_ok=True) + train_df = pd.read_csv(train_csv) + train_meta_df = pd.read_csv(train_meta_csv) + train_meta_df['ID'] = train_meta_df['ID'].apply(lambda x: "_".join(x.split("_")[:2])) + train_meta_df = train_meta_df[['ID', 'PatientID']] + + +if __name__ == '__main__': + cli() \ No newline at end of file