[637b40]: / adpkd_segmentation / data / data_utils.py

Download this file

354 lines (257 with data), 9.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# %%
import functools
import glob
from collections import defaultdict, OrderedDict
from pathlib import Path
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pydicom
from PIL import Image
from adpkd_segmentation.data.data_config import LABELED, UNLABELED
MIN_VALUE = "global_min_int16"
MAX_VALUE = "global_max_int16"
MIN_IMAGE_VALUE = "min_image_int16"
MAX_IMAGE_VALUE = "max_image_int16"
PATIENT = "patient"
SEQUENCE = "seq"
KIDNEY_PIXELS = "kidney_pixels"
MR = "MR"
VOXEL_VOLUME = "vox_vol"
DIMENSION = "dim"
STUDY_TKV = "study_tkv"
# %%
def int16_to_uint8(int16):
return cv2.normalize(
int16,
None,
alpha=0,
beta=255,
norm_type=cv2.NORM_MINMAX,
dtype=cv2.CV_8U,
)
def normalize(int16, attribs):
min_value = attribs[MIN_VALUE]
max_value = attribs[MAX_VALUE]
new_min = 0
new_max = 255
# linear scaling
scaled = (new_max - new_min) / (max_value - min_value) * (
int16 - min_value
) + new_min
return scaled.astype(dtype=np.uint8)
def add_patient_sequence_min_max(dcm2attribs):
# modifies dcm2attribs with additional info
patient_seq_dict_mins = {}
patient_seq_dict_maximums = {}
for dcm, attribs in dcm2attribs.items():
patient = attribs[PATIENT]
seq = attribs[SEQUENCE]
im_min_value = attribs[MIN_IMAGE_VALUE]
im_max_value = attribs[MAX_IMAGE_VALUE]
current_min = patient_seq_dict_mins.get((patient, seq), float("inf"))
current_max = patient_seq_dict_maximums.get(
(patient, seq), float("-inf")
)
if im_min_value <= current_min:
patient_seq_dict_mins[(patient, seq)] = im_min_value
if im_max_value >= current_max:
patient_seq_dict_maximums[(patient, seq)] = im_max_value
# store global min and max for each dcm
for dcm, attribs in dcm2attribs.items():
patient = attribs[PATIENT]
seq = attribs[SEQUENCE]
attribs[MIN_VALUE] = patient_seq_dict_mins[(patient, seq)]
attribs[MAX_VALUE] = patient_seq_dict_maximums[(patient, seq)]
def TKV_update(dcm2attribs):
studies = defaultdict(int)
for dcm, attribs in dcm2attribs.items():
study_id = (attribs[PATIENT], attribs[MR])
studies[study_id] += attribs[KIDNEY_PIXELS] * attribs[VOXEL_VOLUME]
for dcm, attribs in dcm2attribs.items():
tkv = studies[(attribs[PATIENT], attribs[MR])]
attribs[STUDY_TKV] = tkv
return studies, dcm2attribs
def tensor_dict_to_device(tensor_dict, device):
out = {}
for k, v in tensor_dict.items():
out[k] = v.to(device)
return out
class NormalizePatientSeq:
@staticmethod
def __call__(int16, attribs):
return normalize(int16, attribs)
@staticmethod
def update_dcm2attribs(dcm2attribs):
print(
"Adding global min and max image value for each "
"(patient, sequence) tuple"
)
add_patient_sequence_min_max(dcm2attribs)
def get_dcms_paths(dir_list):
all_files = []
for study_dir in dir_list:
print("processing {} ".format(study_dir))
files = glob.glob("{}/**/*.dcm".format(study_dir), recursive=True)
all_files.extend(files)
print("total files... --> {} \n".format(len(all_files)))
return all_files
def get_labeled():
dcms = glob.glob("{}/*.dcm".format(LABELED))
return dcms
def get_unlabeled():
dcms = glob.glob("{}/*.dcm".format(UNLABELED))
return dcms
def get_y_Path(x):
"""Get label path from dicom path"""
if isinstance(x, str):
x = Path(x)
y = str(x.absolute()).replace("DICOM_anon", "Ground")
y = y.replace(".dcm", ".png")
y = Path(y)
return y
def path_2dcm_int16(fname):
if not isinstance(fname, str):
fname = str(fname)
dcm = pydicom.dcmread(fname)
return dcm.pixel_array
def path_2label(fname):
if not isinstance(fname, str):
fname = str(fname)
label = Image.open(fname)
return np.array(label)
def dcm_attributes(dcm, label_status=True, WCM=True):
attribs = {}
# dicom header attribs
pdcm = pydicom.dcmread(dcm)
arr_int16 = pdcm.pixel_array
# TODO refactor for this PatientID clause
# WCM PatientIDs are of length 10
if WCM is True and len(pdcm.PatientID) != 10:
attribs[PATIENT] = pdcm.PatientID[:-3]
attribs[MR] = pdcm.PatientID[-3:]
attribs[SEQUENCE] = pdcm.SeriesDescription
else:
attribs[PATIENT] = pdcm.PatientID
attribs[MR] = pdcm.SeriesDescription
attribs[SEQUENCE] = pdcm.SeriesDescription
attribs[MIN_IMAGE_VALUE] = arr_int16.min()
attribs[MAX_IMAGE_VALUE] = arr_int16.max()
# pixels in mask --> kidney
if label_status is True:
label = np.array(Image.open(get_y_Path(dcm)))
pos_pixels = np.sum(label > 0)
attribs[KIDNEY_PIXELS] = pos_pixels
else:
attribs[KIDNEY_PIXELS] = None
"""
Volume for pixels in mask = VOXEL_VOLUME * pos_pixels
TKV calculated as summation of dcm volumes in a study
Note: Dimension (which determines pixel-count) must be normal to calc. TKV
"""
dX_Y = float(pdcm.PixelSpacing[0])
dZ = None
if 'SpacingBetweenSlices' in pdcm:
dZ = float(pdcm.SpacingBetweenSlices)
elif 'SliceThickness' in pdcm:
dZ = float(pdcm.SliceThickness)
else:
raise "dZ not available -- no SpacingBetweenSlices nor SliceThickness"
attribs[VOXEL_VOLUME] = dZ * (dX_Y ** 2)
attribs[DIMENSION] = arr_int16.shape
return attribs
@functools.lru_cache()
def make_dcmdicts(dcms, label_status=True, WCM=True):
"""creates two dictionares with dcm attributes
Arguments:
dcms (tuple): tuple of dicoms. Note, tuple is used, rather than a list,
so the input is hashable for LRU.
Returns:
dcm2attribs (dict), pt2dcm (dict):
Dictionaries with dcms to attribs and patients to dcms
"""
# convert tuple back to list
if not isinstance(dcms, list):
dcms = list(dcms)
dcm2attribs = OrderedDict()
patient2dcm = OrderedDict()
exceptions = []
for dcm in dcms:
try:
attribs = dcm_attributes(dcm, label_status, WCM=WCM)
dcm2attribs[dcm] = attribs
patient2dcm.setdefault(attribs[PATIENT], []).append(dcm)
except Exception as e:
exceptions.append(f"{e} with dcm:{dcm.name} ")
if len(exceptions) > 0:
print(
"\n\nThe following exceptions were encountered: \n"
f" {exceptions}\n\n"
)
return dcm2attribs, patient2dcm
# deprecated function
def filter_dcm2attribs(filters, dcm2attribs):
"""filters input dcm2attribs dict based on dict of filters
(Note: Modifies input dcm2attribs)
Arguments:
filters {dict} -- dict of filters
e.g. filters = {'seq':'AX SSFSE ABD/PEL'}
dcm2attribs {dict} -- dict of dcms:
attributes generated by function make_dcmdicts()
Returns:
dcm2attribs {dict} -- dict of dcms to attributes after filter
"""
remove = []
for dcm, attribs in dcm2attribs.items():
for key, value in filters.items():
if key not in attribs or value != attribs[key]:
remove.append(dcm)
for dcm in remove:
del dcm2attribs[dcm]
return dcm2attribs
def masks_to_colorimg(masks):
"""converts one hot encoded mask to color encoded image"""
if np.ndim(masks) == 2:
masks = np.expand_dims(masks, 0)
# color codes for mask .png labels
colors = [
(201, 58, 64), # Red
(242, 207, 1), # Yellow
(0, 152, 75), # Green
(101, 172, 228), # Blue
(245, 203, 250), # Pink
(239, 159, 40),
] # Orange
colors = np.asarray(colors)[: masks.shape[0]]
_, height, width = masks.shape
colorimg = np.ones((height, width, 3), dtype=np.float32) * 255
for y in range(height):
for x in range(width):
pixel_color = np.asarray(masks[:, y, x] > 0.5)
selected_colors = colors[pixel_color]
# assign pixels mean color RGB for display
if len(selected_colors) > 0:
colorimg[y, x, :] = np.mean(selected_colors, axis=0)
return colorimg.astype(np.uint8)
def display_sample(sample):
dcm, mask = sample
f, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(dcm, cmap="gray")
ax2.imshow(dcm, cmap="gray")
ax2.imshow(masks_to_colorimg(mask), alpha=0.5)
def display_verbose_sample(verbose_sample):
(dcm, mask), path, attribs = verbose_sample
f, (ax1, ax2) = plt.subplots(1, 2)
ax1.imshow(dcm, cmap="gray")
ax2.imshow(dcm, cmap="gray")
ax2.imshow(masks_to_colorimg(mask), alpha=0.5)
print("\nPath: {}".format(path))
print("\nAttribs: {}".format(attribs))
def display_traindata(inputs, labels):
for index in range(0, inputs.shape[0]):
f, axarr = plt.subplots(1, 2)
axarr[0].imshow(inputs[index][1], cmap="gray")
axarr[1].imshow(inputs[index][1], cmap="gray") # background for mask
axarr[1].imshow(masks_to_colorimg(labels[index]), alpha=0.5)
img = inputs[index][0]
lb = masks_to_colorimg(labels[index])