Kaggle-RSNA / Git / [95f789] /src/preprocessing

Models:
DavidFeaster/
Kaggle-RSNA
Downloads: 1
[95f789]: / src / preprocessing_3w.py
History
Download this file
186 lines (157 with data), 5.5 kB

import numpy as np
import pandas as pd
import os
import click
import glob
import cv2
import pydicom
from tqdm import tqdm
from joblib import delayed, Parallel
import random
import pydicom
from scipy import ndimage
import pydicom
from skimage import exposure


def window_image(img, window_center, window_width, intercept, slope):
    img = (img * slope + intercept)
    img_min = window_center - window_width // 2
    img_max = window_center + window_width // 2
    img[img < img_min] = img_min
    img[img > img_max] = img_max
    return img


def get_first_of_dicom_field_as_int(x):
    # get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)


def get_windowing(data):
    dicom_fields = [data[('0028', '1050')].value,  # window center
                    data[('0028', '1051')].value,  # window width
                    data[('0028', '1052')].value,  # intercept
                    data[('0028', '1053')].value]  # slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]


@click.group()
def cli():
    print("CLI")


windows_range = {
    'brain': [40, 80],
    'bone': [600, 2800],
    'subdual': [75, 215]
}


def refine_label(label_mask):
    label_mask = label_mask.astype(np.bool)
    # Fill hole
    label_mask = ndimage.binary_fill_holes(label_mask)
    # Get largest connected component
    label_im, nb_labels = ndimage.label(label_mask)
    sizes = ndimage.sum(label_mask, label_im, range(nb_labels + 1))
    mask_size = sizes < max(sizes)
    remove_pixel = mask_size[label_im]
    label_im[remove_pixel] = 0
    labels = np.unique(label_im)
    label_mask = np.searchsorted(labels, label_im)
    return label_mask


def cut_edge(image, keep_margin):
    '''
    function that cuts zero edge
    '''
    H, W = image.shape
    H_s, H_e = 0, H - 1
    W_s, W_e = 0, W - 1

    while H_s < H:
        if image[H_s, :].sum() != 0:
            break
        H_s += 1
    while H_e > H_s:
        if image[H_e, :].sum() != 0:
            break
        H_e -= 1
    while W_s < W:
        if image[:, W_s].sum() != 0:
            break
        W_s += 1
    while W_e > W_s:
        if image[:, W_e].sum() != 0:
            break
        W_e -= 1
    if keep_margin != 0:
        H_s = max(0, H_s - keep_margin)
        H_e = min(H - 1, H_e + keep_margin)
        W_s = max(0, W_s - keep_margin)
        W_e = min(W - 1, W_e + keep_margin)
    return int(H_s), int(H_e) + 1, int(W_s), int(W_e) + 1


def pre_preocessing(image, pad_size=(512, 512)):
    # Convert to [0, 255]
    # image = (image-image.min()) / (image.max() - image.min())
    # image= image*255
    image[image < 0] = 0
    # Remove unwanted region
    mask = image > 0
    mask = refine_label(mask)
    image = image * mask
    # Center crop and pad to size
    # mask = image>0
    # min_H_s, max_H_e, min_W_s, max_W_e = cut_edge(mask, 32)
    # image = image[min_H_s: max_H_e, min_W_s:max_W_e]
    # Pad to size
    H, W = image.shape
    pad_H, pad_W = pad_size[0], pad_size[1]
    pad_H0 = max((pad_H - H) // 2, 0)
    pad_H1 = max(pad_H - H - pad_H0, 0)
    pad_W0 = max((pad_W - W) // 2, 0)
    pad_W1 = max(pad_W - W - pad_W0, 0)
    image = np.pad(image, [(pad_H0, pad_H1), (pad_W0, pad_W1)], mode='constant', constant_values=0)
    return image


def convert_dicom_to_jpg(dicomfile, outputdir):
    try:
        data = pydicom.read_file(dicomfile)
        image = data.pixel_array
        window_center, window_width, intercept, slope = get_windowing(data)
        id = dicomfile.split("/")[-1].split(".")[0]

        images = []
        # count =0

        for k, v in windows_range.items():
            image_windowed = window_image(image, v[0], v[1], intercept, slope)
            image_windowed = pre_preocessing(image_windowed, pad_size=(512, 512))
            images.append(image_windowed)

            # image_windowed = exposure.equalize_adapthist(image_windowed, clip_limit=0.01)
            # min_value= image_windowed.min()
            # max_value = image_windowed.max()
            # print (image_windowed.min(),image_windowed.max())
            # if count ==0:
            #     image_windowed=np.uint8(image_windowed)
            #     clahe = cv2.createCLAHE(clipLimit = 1.0, tileGridSize = (8,8))
            #     image_windowed = clahe.apply(image_windowed)
            #     images.append(image_windowed)
            # print (image_windowed.min(),image_windowed.max())
            # count +=1
        images = np.asarray(images).transpose((1, 2, 0))
        # print (images.shape)

        output_image = os.path.join(outputdir, id + ".jpg")
        cv2.imwrite(output_image, images)
    except:
        print(dicomfile)


@cli.command()
@click.option('--inputdir', type=str)
@click.option('--outputdir', type=str)
def extract_images(
        inputdir,
        outputdir,
):
    os.makedirs(outputdir, exist_ok=True)
    files = glob.glob(inputdir + "/*.dcm")
    Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files)))


def split_by_patient(
        train_csv,
        train_meta_csv,
        n_folds,
        outdir
):
    os.makedirs(outdir, exist_ok=True)
    train_df = pd.read_csv(train_csv)
    train_meta_df = pd.read_csv(train_meta_csv)
    train_meta_df['ID'] = train_meta_df['ID'].apply(lambda x: "_".join(x.split("_")[:2]))
    train_meta_df = train_meta_df[['ID', 'PatientID']]


if __name__ == '__main__':
    cli()