Diff of /parse_raw.py [000000] .. [6fe801]

Switch to side-by-side view

--- a
+++ b/parse_raw.py
@@ -0,0 +1,79 @@
+import sys
+import argparse
+import os
+from glob import glob
+import json
+import SimpleITK as sitk
+
+# importing utils and 
+from utils.logger import logger
+from utils.dataset import read_raw
+from enums.dtype import DataTypes
+
+
+if __name__ == "__main__":
+    # optional arguments from the command line 
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--dataset_path', type=str, default='dataset/train', help='root dir for raw training data')
+
+    # parse the arguments
+    args = parser.parse_args()
+
+    # check if the dataset_path exists
+    if not os.path.exists(args.dataset_path):
+        logger.error(f"Path {args.dataset_path} does not exist")
+        sys.exit(1)
+
+    # get the list of exhale and inhale files from the dataset_path
+    logger.info(f"Reading raw data from '{args.dataset_path}'")
+    exhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*eBHCT.img"), recursive=True))]
+    inhale_volumes = [path.replace('\\', '/') for path in sorted(glob(os.path.join(args.dataset_path, "***" , "*iBHCT.img"), recursive=True))]
+
+    # log the number of exhale and inhale files
+    logger.info(f"Found {len(exhale_volumes)} exhale volumes: ({[subject.split('/')[-2] for subject in exhale_volumes]})")
+    logger.info(f"Found {len(inhale_volumes)} inhale volumes: ({[subject.split('/')[-2] for subject in inhale_volumes]})\n")
+
+    # read the data dictionary
+    with open(os.path.join(args.dataset_path.replace("train", "", 1).replace("test", "", 1), 'description.json'), 'r') as json_file:
+        dictionary = json.loads(json_file.read())
+
+    # iterate over all of the raw inhale and exhale volumes and export them as nifti files
+    for exhale_volume, inhale_volume in zip(exhale_volumes, inhale_volumes):
+        # get the subject name and information
+        subject_name = exhale_volume.split('/')[-2]
+        subject_information = dictionary[args.dataset_path.replace('\\', '/').split("/")[-1]][subject_name]
+
+        # Access the sitkPixelType value for RAW_DATA
+        sitk_pixel_type = DataTypes.RAW_DATA.value
+
+        # parse the data
+        print(f"Parsing exhale and inhale volume of subject: {subject_name}, dtype: {sitk_pixel_type}")
+        exhale_raw = read_raw(
+            binary_file_name = exhale_volume, 
+            image_size = subject_information['image_dim'], 
+            sitk_pixel_type = sitk_pixel_type,
+            image_spacing = subject_information['voxel_dim'],
+            image_origin = subject_information['origin'],
+            big_endian=False
+            )
+        inhale_raw = read_raw(
+            binary_file_name = inhale_volume, 
+            image_size = subject_information['image_dim'], 
+            sitk_pixel_type = sitk_pixel_type,
+            image_spacing = subject_information['voxel_dim'],
+            image_origin = subject_information['origin'],
+            big_endian=False
+            )
+        
+        # log the image sizes
+        assert exhale_raw.GetSize() == inhale_raw.GetSize(), "Exhale and inhale image sizes do not match"
+        assert exhale_raw.GetSize() == tuple(subject_information['image_dim']), "Image size does not match the size in the data dictionary"
+        
+        logger.info(f"Exhale image size: {exhale_raw.GetSize()}")
+        logger.info(f"Inhale image size: {inhale_raw.GetSize()}")
+        
+        # saving the nifti files
+        logger.info(f"Saving the nifti files for subject {subject_name}. \n")
+        sitk.WriteImage(exhale_raw, exhale_volume.replace('.img', '.nii.gz'))
+        sitk.WriteImage(inhale_raw, inhale_volume.replace('.img', '.nii.gz'))