[6f9c00]: / utils / convert_h5.py

Download this file

121 lines (101 with data), 6.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
Convert to h5 utility.
Sample command to create new dataset
- python3 utils/convert_h5.py -dd /home/masterthesis/shayan/nas_drive/Data_Neuro/OASISchallenge/FS -ld /home/masterthesis/shayan/nas_drive/Data_Neuro/OASISchallenge -trv datasets/train_volumes.txt -tev datasets/test_volumes.txt -id MALC -rc Neo -o COR -df datasets/MALC/coronal
- python utils/convert_h5.py -dd /home/masterthesis/shayan/nas_drive/Data_Neuro/IXI/IXI_FS -ld /home/masterthesis/shayan/nas_drive/Data_Neuro/IXI/IXI_FS -ds 98,2 -rc FS -o COR -df datasets/IXI/coronal
"""
import argparse
import os
import h5py
import numpy as np
import common_utils
import data_utils as du
import preprocessor as preprocessor
def apply_split(data_split, data_dir, label_dir):
file_paths = du.load_file_paths(data_dir, label_dir)
print("Total no of volumes to process : %d" % len(file_paths))
train_ratio, test_ratio = data_split.split(",")
train_len = int((int(train_ratio) / 100) * len(file_paths))
train_idx = np.random.choice(len(file_paths), train_len, replace=False)
test_idx = np.array([i for i in range(len(file_paths)) if i not in train_idx])
train_file_paths = [file_paths[i] for i in train_idx]
test_file_paths = [file_paths[i] for i in test_idx]
return train_file_paths, test_file_paths
def _write_h5(data, label, class_weights, weights, f, mode):
no_slices, H, W = data[0].shape
with h5py.File(f[mode]['data'], "w") as data_handle:
data_handle.create_dataset("data", data=np.concatenate(data).reshape((-1, H, W)))
with h5py.File(f[mode]['label'], "w") as label_handle:
label_handle.create_dataset("label", data=np.concatenate(label).reshape((-1, H, W)))
with h5py.File(f[mode]['weights'], "w") as weights_handle:
weights_handle.create_dataset("weights", data=np.concatenate(weights))
with h5py.File(f[mode]['class_weights'], "w") as class_weights_handle:
class_weights_handle.create_dataset("class_weights", data=np.concatenate(
class_weights).reshape((-1, H, W)))
def convert_h5(data_dir, label_dir, data_split, train_volumes, test_volumes, f, data_id, remap_config='Neo',
orientation=preprocessor.ORIENTATION['coronal']):
# Data splitting
if data_split:
train_file_paths, test_file_paths = apply_split(data_split, data_dir, label_dir)
elif train_volumes and test_volumes:
train_file_paths = du.load_file_paths(data_dir, label_dir, data_id, train_volumes)
test_file_paths = du.load_file_paths(data_dir, label_dir, data_id, test_volumes)
else:
raise ValueError('You must either provide the split ratio or a train, train dataset list')
print("Train dataset size: %d, Test dataset size: %d" % (len(train_file_paths), len(test_file_paths)))
# loading,pre-processing and writing train data
print("===Train data===")
data_train, label_train, class_weights_train, weights_train, _ = du.load_dataset(train_file_paths,
orientation,
remap_config=remap_config,
return_weights=True,
reduce_slices=True,
remove_black=True)
_write_h5(data_train, label_train, class_weights_train, weights_train, f, mode='train')
# loading,pre-processing and writing test data
print("===Test data===")
data_test, label_test, class_weights_test, weights_test, _ = du.load_dataset(test_file_paths,
orientation,
remap_config=remap_config,
return_weights=True,
reduce_slices=True,
remove_black=True)
_write_h5(data_test, label_test, class_weights_test, weights_test, f, mode='test')
if __name__ == "__main__":
print("* Start *")
parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', '-dd', required=True,
help='Base directory of the data folder. This folder should contain one folder per volume.')
parser.add_argument('--label_dir', '-ld', required=True,
help='Base directory of all the label files. This folder should have one file per volumn with same name as the corresponding volumn folder name inside data_dir')
parser.add_argument('--data_split', '-ds', required=False,
help='Ratio to split data randomly into train and test. input e.g. 80,20')
parser.add_argument('--train_volumes', '-trv', required=False,
help='Path to a text file containing the list of volumes to be used for training')
parser.add_argument('--test_volumes', '-tev', required=False,
help='Path to a text file containing the list of volumes to be used for testing')
parser.add_argument('--data_id', '-id', required=True, help='Valid options are "MALC", "ADNI", "CANDI" and "IBSR"')
parser.add_argument('--remap_config', '-rc', required=True, help='Valid options are "FS" and "Neo"')
parser.add_argument('--orientation', '-o', required=True, help='Valid options are COR, AXI, SAG')
parser.add_argument('--destination_folder', '-df', required=True, help='Path where to generate the h5 files')
args = parser.parse_args()
common_utils.create_if_not(args.destination_folder)
f = {
'train': {
"data": os.path.join(args.destination_folder, "Data_train.h5"),
"label": os.path.join(args.destination_folder, "Label_train.h5"),
"weights": os.path.join(args.destination_folder, "Weight_train.h5"),
"class_weights": os.path.join(args.destination_folder, "Class_Weight_train.h5"),
},
'test': {
"data": os.path.join(args.destination_folder, "Data_test.h5"),
"label": os.path.join(args.destination_folder, "Label_test.h5"),
"weights": os.path.join(args.destination_folder, "Weight_test.h5"),
"class_weights": os.path.join(args.destination_folder, "Class_Weight_test.h5")
}
}
convert_h5(args.data_dir, args.label_dir, args.data_split, args.train_volumes, args.test_volumes, f,
args.data_id,
args.remap_config,
args.orientation)
print("* Finish *")