|
a |
|
b/parse_raw.py |
|
|
1 |
import sys |
|
|
2 |
import argparse |
|
|
3 |
import os |
|
|
4 |
from glob import glob |
|
|
5 |
import json |
|
|
6 |
import SimpleITK as sitk |
|
|
7 |
|
|
|
8 |
# importing utils and |
|
|
9 |
from utils.logger import logger |
|
|
10 |
from utils.dataset import read_raw |
|
|
11 |
from enums.dtype import DataTypes |
|
|
12 |
|
|
|
13 |
|
|
|
14 |
if __name__ == "__main__": |
|
|
15 |
# optional arguments from the command line |
|
|
16 |
parser = argparse.ArgumentParser() |
|
|
17 |
|
|
|
18 |
parser.add_argument('--dataset_path', type=str, default='dataset/train', help='root dir for raw training data') |
|
|
19 |
|
|
|
20 |
# parse the arguments |
|
|
21 |
args = parser.parse_args() |
|
|
22 |
|
|
|
23 |
# check if the dataset_path exists |
|
|
24 |
if not os.path.exists(args.dataset_path): |
|
|
25 |
logger.error(f"Path {args.dataset_path} does not exist") |
|
|
26 |
sys.exit(1) |
|
|
27 |
|
|
|
28 |
# get the list of exhale and inhale files from the dataset_path |
|
|
29 |
logger.info(f"Reading raw data from '{args.dataset_path}'") |
|
|
30 |
exhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*eBHCT.img"), recursive=True))] |
|
|
31 |
inhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*iBHCT.img"), recursive=True))] |
|
|
32 |
|
|
|
33 |
# log the number of exhale and inhale files |
|
|
34 |
logger.info(f"Found {len(exhale_volumes)} exhale volumes: ({[subject.split('/')[-2] for subject in exhale_volumes]})") |
|
|
35 |
logger.info(f"Found {len(inhale_volumes)} inhale volumes: ({[subject.split('/')[-2] for subject in inhale_volumes]})\n") |
|
|
36 |
|
|
|
37 |
# read the data dictionary |
|
|
38 |
with open(os.path.join(args.dataset_path.replace("train", "", 1).replace("test", "", 1), 'description.json'), 'r') as json_file: |
|
|
39 |
dictionary = json.loads(json_file.read()) |
|
|
40 |
|
|
|
41 |
# iterate over all of the raw inhale and exhale volumes and export them as nifti files |
|
|
42 |
for exhale_volume, inhale_volume in zip(exhale_volumes, inhale_volumes): |
|
|
43 |
# get the subject name and information |
|
|
44 |
subject_name = exhale_volume.split('/')[-2] |
|
|
45 |
subject_information = dictionary[args.dataset_path.replace('\\', '/').split("/")[-1]][subject_name] |
|
|
46 |
|
|
|
47 |
# Access the sitkPixelType value for RAW_DATA |
|
|
48 |
sitk_pixel_type = DataTypes.RAW_DATA.value |
|
|
49 |
|
|
|
50 |
# parse the data |
|
|
51 |
print(f"Parsing exhale and inhale volume of subject: {subject_name}, dtype: {sitk_pixel_type}") |
|
|
52 |
exhale_raw = read_raw( |
|
|
53 |
binary_file_name = exhale_volume, |
|
|
54 |
image_size = subject_information['image_dim'], |
|
|
55 |
sitk_pixel_type = sitk_pixel_type, |
|
|
56 |
image_spacing = subject_information['voxel_dim'], |
|
|
57 |
image_origin = subject_information['origin'], |
|
|
58 |
big_endian=False |
|
|
59 |
) |
|
|
60 |
inhale_raw = read_raw( |
|
|
61 |
binary_file_name = inhale_volume, |
|
|
62 |
image_size = subject_information['image_dim'], |
|
|
63 |
sitk_pixel_type = sitk_pixel_type, |
|
|
64 |
image_spacing = subject_information['voxel_dim'], |
|
|
65 |
image_origin = subject_information['origin'], |
|
|
66 |
big_endian=False |
|
|
67 |
) |
|
|
68 |
|
|
|
69 |
# log the image sizes |
|
|
70 |
assert exhale_raw.GetSize() == inhale_raw.GetSize(), "Exhale and inhale image sizes do not match" |
|
|
71 |
assert exhale_raw.GetSize() == tuple(subject_information['image_dim']), "Image size does not match the size in the data dictionary" |
|
|
72 |
|
|
|
73 |
logger.info(f"Exhale image size: {exhale_raw.GetSize()}") |
|
|
74 |
logger.info(f"Inhale image size: {inhale_raw.GetSize()}") |
|
|
75 |
|
|
|
76 |
# saving the nifti files |
|
|
77 |
logger.info(f"Saving the nifti files for subject {subject_name}. \n") |
|
|
78 |
sitk.WriteImage(exhale_raw, exhale_volume.replace('.img', '.nii.gz')) |
|
|
79 |
sitk.WriteImage(inhale_raw, inhale_volume.replace('.img', '.nii.gz')) |