|
a |
|
b/data/dicom_dataset.py |
|
|
1 |
# Manuel A. Morales (moralesq@mit.edu) |
|
|
2 |
# Harvard-MIT Department of Health Sciences & Technology |
|
|
3 |
# Athinoula A. Martinos Center for Biomedical Imaging |
|
|
4 |
|
|
|
5 |
import os |
|
|
6 |
import h5py |
|
|
7 |
import glob |
|
|
8 |
import pydicom |
|
|
9 |
import warnings |
|
|
10 |
import numpy as np |
|
|
11 |
import pandas as pd |
|
|
12 |
import nibabel as nib |
|
|
13 |
|
|
|
14 |
from data.base_dataset import BaseDataset, Transforms |
|
|
15 |
from data.image_folder import make_dataset |
|
|
16 |
|
|
|
17 |
class DICOMDataset(BaseDataset): |
|
|
18 |
|
|
|
19 |
def __init__(self, opt): |
|
|
20 |
BaseDataset.__init__(self, opt) |
|
|
21 |
self.filenames = sorted(make_dataset(opt.dataroot, opt.max_dataset_size, 'DICOM')) |
|
|
22 |
self.metadata, self.acquisitions = self.read_metadata() |
|
|
23 |
|
|
|
24 |
def __len__(self): |
|
|
25 |
return len(self.acquisitions) |
|
|
26 |
|
|
|
27 |
def __getitem__(self, idx): |
|
|
28 |
pid = '_'.join(self.acquisitions[idx].split('_')[:-1]) #PatientName |
|
|
29 |
uid = self.acquisitions[idx].split('_')[-1] #AcquisitionInstanceUID |
|
|
30 |
df = self.metadata[(self.metadata.PatientName==pid)&(self.metadata.AcquisitionInstanceUID==uid)] |
|
|
31 |
return self.load_acquisition(df) |
|
|
32 |
|
|
|
33 |
def read_metadata(self): |
|
|
34 |
|
|
|
35 |
metadata = {'FileName':[], |
|
|
36 |
'PatientName':[], |
|
|
37 |
'SeriesInstanceUID':[], |
|
|
38 |
'StudyInstanceUID':[], |
|
|
39 |
'ProtocolName':[], |
|
|
40 |
'SeriesTime':[], |
|
|
41 |
'TriggerTime':[], |
|
|
42 |
'InstanceNumber':[], |
|
|
43 |
'ImageOrientationPatient':[], |
|
|
44 |
'ImagePositionPatient':[], |
|
|
45 |
'SliceLocation':[], |
|
|
46 |
'PixelSpacing':[], |
|
|
47 |
'SliceThickness':[], |
|
|
48 |
'AcquisitionInstanceUID':[], |
|
|
49 |
'SliceInstanceUID':[]} |
|
|
50 |
|
|
|
51 |
for filename in self.filenames: |
|
|
52 |
dicom = pydicom.read_file(filename) |
|
|
53 |
|
|
|
54 |
metadata['FileName'] += [filename] |
|
|
55 |
metadata['PatientName'] += [str(dicom[0x0010, 0x0010].value)] |
|
|
56 |
metadata['SeriesInstanceUID'] += [dicom.SeriesInstanceUID] |
|
|
57 |
metadata['StudyInstanceUID'] += [dicom.StudyInstanceUID] |
|
|
58 |
metadata['ProtocolName'] += [dicom.ProtocolName] |
|
|
59 |
metadata['SeriesTime'] += [dicom.SeriesTime] |
|
|
60 |
metadata['TriggerTime'] += [dicom.TriggerTime] |
|
|
61 |
metadata['InstanceNumber'] += [dicom.InstanceNumber] |
|
|
62 |
metadata['ImageOrientationPatient'] += [dicom.ImageOrientationPatient] |
|
|
63 |
metadata['ImagePositionPatient'] += [dicom.ImagePositionPatient] |
|
|
64 |
metadata['SliceLocation'] += [dicom.SliceLocation] |
|
|
65 |
metadata['PixelSpacing'] += [dicom.PixelSpacing] |
|
|
66 |
metadata['SliceThickness'] += [dicom.SliceThickness] |
|
|
67 |
|
|
|
68 |
metadata['AcquisitionInstanceUID'] += [dicom.SeriesInstanceUID.split('.')[9]] |
|
|
69 |
metadata['SliceInstanceUID'] += [dicom.SeriesInstanceUID.split('.')[10]] |
|
|
70 |
|
|
|
71 |
metadata = pd.DataFrame(metadata) |
|
|
72 |
|
|
|
73 |
acquisitions = [] |
|
|
74 |
print('Found %d patient(s):'%(len(metadata.PatientName.unique()))) |
|
|
75 |
for patient in metadata.PatientName.unique(): |
|
|
76 |
acqs = metadata[metadata.PatientName==patient].AcquisitionInstanceUID.unique().tolist() |
|
|
77 |
print(patient, ': with', len(acqs), 'acquisitions:') |
|
|
78 |
for acq in sorted(acqs): |
|
|
79 |
acquisitions += [patient+'_'+acq] |
|
|
80 |
print(' ', acquisitions[-1]) |
|
|
81 |
|
|
|
82 |
return metadata, acquisitions |
|
|
83 |
|
|
|
84 |
|
|
|
85 |
def load_acquisition(self, df): |
|
|
86 |
|
|
|
87 |
slices = df.SeriesInstanceUID.unique().tolist() |
|
|
88 |
phases = df.SeriesInstanceUID.value_counts().unique() |
|
|
89 |
assert len(phases)==1, 'Number of phases does not match!' |
|
|
90 |
number_of_slices = len(slices) |
|
|
91 |
number_of_phases = int(phases) |
|
|
92 |
pixel_array = pydicom.read_file(df.iloc[0].FileName).pixel_array |
|
|
93 |
|
|
|
94 |
sax_4D = np.zeros((pixel_array.shape +(number_of_slices, number_of_phases)), dtype=pixel_array.dtype) |
|
|
95 |
|
|
|
96 |
for z_slice, series in enumerate(slices): |
|
|
97 |
for phase in range(number_of_phases): |
|
|
98 |
filename = df[df.SeriesInstanceUID==series].sort_values('InstanceNumber').iloc[phase].FileName |
|
|
99 |
dicom = pydicom.read_file(filename) |
|
|
100 |
|
|
|
101 |
sax_4D[:,:,z_slice,phase] += dicom.pixel_array |
|
|
102 |
|
|
|
103 |
affine = read_affine(df.iloc[df.SliceLocation.argmin()]) |
|
|
104 |
|
|
|
105 |
return nib.Nifti1Image(sax_4D, affine) |
|
|
106 |
|
|
|
107 |
def extract_cosines(ImageOrientationPatient): |
|
|
108 |
row_cosine = np.array(ImageOrientationPatient[:3]) |
|
|
109 |
column_cosine = np.array(ImageOrientationPatient[3:]) |
|
|
110 |
slice_cosine = np.cross(row_cosine, column_cosine) |
|
|
111 |
return np.stack((row_cosine, column_cosine, slice_cosine)) |
|
|
112 |
|
|
|
113 |
def read_affine(df, viewer='slicer'): |
|
|
114 |
Zooms = np.array(list(df.PixelSpacing)+[df.SliceThickness], dtype=float) |
|
|
115 |
ImageOrientationPatient = np.array(df.ImageOrientationPatient, dtype=float) |
|
|
116 |
ImagePositionPatient = np.array(df.ImagePositionPatient, dtype=float) |
|
|
117 |
|
|
|
118 |
ijk2ras = extract_cosines(ImageOrientationPatient) |
|
|
119 |
if viewer == "slicer": |
|
|
120 |
ijk2ras = (ijk2ras*np.array([-1,-1,1])).T |
|
|
121 |
ImagePositionPatient = ImagePositionPatient*np.array([-1,-1,1]) |
|
|
122 |
|
|
|
123 |
affine = np.stack((ijk2ras[:,0]*Zooms[0], |
|
|
124 |
ijk2ras[:,1]*Zooms[1], |
|
|
125 |
ijk2ras[:,2]*Zooms[2], |
|
|
126 |
ImagePositionPatient), axis=1) |
|
|
127 |
|
|
|
128 |
|
|
|
129 |
|
|
|
130 |
|
|
|
131 |
|
|
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
|
|
136 |
|
|
|
137 |
|
|
|
138 |
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
return np.vstack((affine,[[0,0,0,1]])) |