Diff of /parse_raw.py [000000] .. [6fe801]

Switch to unified view

a b/parse_raw.py
1
import sys
2
import argparse
3
import os
4
from glob import glob
5
import json
6
import SimpleITK as sitk
7
8
# importing utils and 
9
from utils.logger import logger
10
from utils.dataset import read_raw
11
from enums.dtype import DataTypes
12
13
14
if __name__ == "__main__":
15
    # optional arguments from the command line 
16
    parser = argparse.ArgumentParser()
17
18
    parser.add_argument('--dataset_path', type=str, default='dataset/train', help='root dir for raw training data')
19
20
    # parse the arguments
21
    args = parser.parse_args()
22
23
    # check if the dataset_path exists
24
    if not os.path.exists(args.dataset_path):
25
        logger.error(f"Path {args.dataset_path} does not exist")
26
        sys.exit(1)
27
28
    # get the list of exhale and inhale files from the dataset_path
29
    logger.info(f"Reading raw data from '{args.dataset_path}'")
30
    exhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*eBHCT.img"), recursive=True))]
31
    inhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*iBHCT.img"), recursive=True))]
32
33
    # log the number of exhale and inhale files
34
    logger.info(f"Found {len(exhale_volumes)} exhale volumes: ({[subject.split('/')[-2] for subject in exhale_volumes]})")
35
    logger.info(f"Found {len(inhale_volumes)} inhale volumes: ({[subject.split('/')[-2] for subject in inhale_volumes]})\n")
36
37
    # read the data dictionary
38
    with open(os.path.join(args.dataset_path.replace("train", "", 1).replace("test", "", 1), 'description.json'), 'r') as json_file:
39
        dictionary = json.loads(json_file.read())
40
41
    # iterate over all of the raw inhale and exhale volumes and export them as nifti files
42
    for exhale_volume, inhale_volume in zip(exhale_volumes, inhale_volumes):
43
        # get the subject name and information
44
        subject_name = exhale_volume.split('/')[-2]
45
        subject_information = dictionary[args.dataset_path.replace('\\', '/').split("/")[-1]][subject_name]
46
47
        # Access the sitkPixelType value for RAW_DATA
48
        sitk_pixel_type = DataTypes.RAW_DATA.value
49
50
        # parse the data
51
        print(f"Parsing exhale and inhale volume of subject: {subject_name}, dtype: {sitk_pixel_type}")
52
        exhale_raw = read_raw(
53
            binary_file_name = exhale_volume, 
54
            image_size = subject_information['image_dim'], 
55
            sitk_pixel_type = sitk_pixel_type,
56
            image_spacing = subject_information['voxel_dim'],
57
            image_origin = subject_information['origin'],
58
            big_endian=False
59
            )
60
        inhale_raw = read_raw(
61
            binary_file_name = inhale_volume, 
62
            image_size = subject_information['image_dim'], 
63
            sitk_pixel_type = sitk_pixel_type,
64
            image_spacing = subject_information['voxel_dim'],
65
            image_origin = subject_information['origin'],
66
            big_endian=False
67
            )
68
        
69
        # log the image sizes
70
        assert exhale_raw.GetSize() == inhale_raw.GetSize(), "Exhale and inhale image sizes do not match"
71
        assert exhale_raw.GetSize() == tuple(subject_information['image_dim']), "Image size does not match the size in the data dictionary"
72
        
73
        logger.info(f"Exhale image size: {exhale_raw.GetSize()}")
74
        logger.info(f"Inhale image size: {inhale_raw.GetSize()}")
75
        
76
        # saving the nifti files
77
        logger.info(f"Saving the nifti files for subject {subject_name}. \n")
78
        sitk.WriteImage(exhale_raw, exhale_volume.replace('.img', '.nii.gz'))
79
        sitk.WriteImage(inhale_raw, inhale_volume.replace('.img', '.nii.gz'))