[95f789]: / src / preprocessing.py

Download this file

72 lines (56 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import pandas as pd
import os
import click
import glob
import cv2
import pydicom
from tqdm import tqdm
from utils import get_windowing, window_image
from joblib import delayed, Parallel
@click.group()
def cli():
print("CLI")
windows_range = {
'brain': [40, 80],
'bone': [600, 2800],
'subdual': [75, 215]
}
def convert_dicom_to_jpg(dicomfile, outputdir):
try:
data = pydicom.read_file(dicomfile)
image = data.pixel_array
window_center, window_width, intercept, slope = get_windowing(data)
id = dicomfile.split("/")[-1].split(".")[0]
images = []
for k, v in windows_range.items():
image_windowed = window_image(image, v[0], v[1], intercept, slope)
images.append(image_windowed)
images = np.asarray(images).transpose((1, 2, 0))
output_image = os.path.join(outputdir, id + ".jpg")
cv2.imwrite(output_image, images)
except:
print(dicomfile)
@cli.command()
@click.option('--inputdir', type=str)
@click.option('--outputdir', type=str)
def extract_images(
inputdir,
outputdir,
):
os.makedirs(outputdir, exist_ok=True)
files = glob.glob(inputdir + "/*.dcm")
Parallel(n_jobs=8)(delayed(convert_dicom_to_jpg)(file, outputdir) for file in tqdm(files, total=len(files)))
def split_by_patient(
train_csv,
train_meta_csv,
n_folds,
outdir
):
os.makedirs(outdir, exist_ok=True)
train_df = pd.read_csv(train_csv)
train_meta_df = pd.read_csv(train_meta_csv)
train_meta_df['ID'] = train_meta_df['ID'].apply(lambda x: "_".join(x.split("_")[:2]))
train_meta_df = train_meta_df[['ID', 'PatientID']]
if __name__ == '__main__':
cli()