|
a |
|
b/data/image_folder.py |
|
|
1 |
# Manuel A. Morales (moralesq@mit.edu) |
|
|
2 |
# Harvard-MIT Department of Health Sciences & Technology |
|
|
3 |
# Athinoula A. Martinos Center for Biomedical Imaging |
|
|
4 |
|
|
|
5 |
import os |
|
|
6 |
import pydicom |
|
|
7 |
|
|
|
8 |
EXTENSIONS = {} |
|
|
9 |
EXTENSIONS['NIFTI'] = ['.nii.gz', '.nii'] |
|
|
10 |
EXTENSIONS['DICOM'] = ['SAX'] |
|
|
11 |
EXTENSIONS['H5PY'] = ['.h5'] |
|
|
12 |
|
|
|
13 |
def is_data_file(filename, dformat="NIFTI"): |
|
|
14 |
return any(filename.endswith(extension) for extension in EXTENSIONS[dformat]) |
|
|
15 |
|
|
|
16 |
def make_dataset(dir, max_dataset_size=float("inf"), dformat="NIFTI"): |
|
|
17 |
|
|
|
18 |
filenames = [] |
|
|
19 |
assert os.path.isdir(dir), '%s is not a valid directory' % dir |
|
|
20 |
|
|
|
21 |
for root, _, fnames in sorted(os.walk(dir)): |
|
|
22 |
for fname in fnames: |
|
|
23 |
if dformat == 'DICOM': |
|
|
24 |
try: |
|
|
25 |
protocol_name = pydicom.read_file(os.path.join(root, fname)).ProtocolName |
|
|
26 |
except: |
|
|
27 |
continue |
|
|
28 |
if any(protocol in protocol_name for protocol in EXTENSIONS[dformat]): |
|
|
29 |
path = os.path.join(root, fname) |
|
|
30 |
filenames.append(path) |
|
|
31 |
elif is_data_file(fname, dformat=dformat): |
|
|
32 |
path = os.path.join(root, fname) |
|
|
33 |
filenames.append(path) |
|
|
34 |
return filenames[:min(max_dataset_size, len(filenames))] |