|
a |
|
b/Section 3 Simulate DIMSE/src/inference_dcm.py |
|
|
1 |
""" |
|
|
2 |
Here we do inference on a DICOM volume, constructing the volume first, and then sending it to the |
|
|
3 |
clinical archive |
|
|
4 |
|
|
|
5 |
This code will do the following: |
|
|
6 |
1. Identify the series to run HippoCrop.AI algorithm on from a folder containing multiple studies |
|
|
7 |
2. Construct a NumPy volume from a set of DICOM files |
|
|
8 |
3. Run inference on the constructed volume |
|
|
9 |
4. Create report from the inference |
|
|
10 |
5. Call a shell script to push report to the storage archive |
|
|
11 |
""" |
|
|
12 |
|
|
|
13 |
import os |
|
|
14 |
import sys |
|
|
15 |
import datetime |
|
|
16 |
import time |
|
|
17 |
import shutil |
|
|
18 |
import subprocess |
|
|
19 |
|
|
|
20 |
import numpy as np |
|
|
21 |
import pydicom |
|
|
22 |
|
|
|
23 |
from PIL import Image |
|
|
24 |
from PIL import ImageFont |
|
|
25 |
from PIL import ImageDraw |
|
|
26 |
|
|
|
27 |
from inference.UNetInferenceAgent import UNetInferenceAgent |
|
|
28 |
|
|
|
29 |
def load_dicom_volume_as_numpy_from_list(dcmlist): |
|
|
30 |
"""Loads a list of PyDicom objects into a Numpy array. |
|
|
31 |
Assumes that only one series is in the array |
|
|
32 |
|
|
|
33 |
Arguments: |
|
|
34 |
dcmlist {list of PyDicom objects} -- path to directory |
|
|
35 |
|
|
|
36 |
Returns: |
|
|
37 |
tuple of (3D volume, header of the 1st image) |
|
|
38 |
""" |
|
|
39 |
|
|
|
40 |
# In the real world you would do a lot of validation here |
|
|
41 |
slices = [np.flip(dcm.pixel_array).T for dcm in sorted(dcmlist, key=lambda dcm: dcm.InstanceNumber)] |
|
|
42 |
|
|
|
43 |
# Make sure that you have correctly constructed the volume from your axial slices! |
|
|
44 |
hdr = dcmlist[0] |
|
|
45 |
|
|
|
46 |
# We return header so that we can inspect metadata properly. |
|
|
47 |
# Since for our purposes we are interested in "Series" header, we grab header of the |
|
|
48 |
# first file (assuming that any instance-specific values will be ignored - common approach) |
|
|
49 |
# We also zero-out Pixel Data since the users of this function are only interested in metadata |
|
|
50 |
hdr.PixelData = None |
|
|
51 |
return (np.stack(slices, 2), hdr) |
|
|
52 |
|
|
|
53 |
def get_predicted_volumes(pred): |
|
|
54 |
"""Gets volumes of two hippocampal structures from the predicted array |
|
|
55 |
|
|
|
56 |
Arguments: |
|
|
57 |
pred {Numpy array} -- array with labels. Assuming 0 is bg, 1 is anterior, 2 is posterior |
|
|
58 |
|
|
|
59 |
Returns: |
|
|
60 |
A dictionary with respective volumes |
|
|
61 |
""" |
|
|
62 |
|
|
|
63 |
# TASK: Compute the volume of your hippocampal prediction |
|
|
64 |
volume_ant = np.sum(pred[pred==1]) |
|
|
65 |
volume_post = np.sum(pred[pred==2])/2 |
|
|
66 |
total_volume = volume_ant + volume_post |
|
|
67 |
|
|
|
68 |
|
|
|
69 |
return {"anterior": volume_ant, "posterior": volume_post, "total": total_volume} |
|
|
70 |
|
|
|
71 |
def create_report(inference, header, orig_vol, pred_vol): |
|
|
72 |
"""Generates an image with inference report |
|
|
73 |
|
|
|
74 |
Arguments: |
|
|
75 |
inference {Dictionary} -- dict containing anterior, posterior and full volume values |
|
|
76 |
header {PyDicom Dataset} -- DICOM header |
|
|
77 |
orig_vol {Numpy array} -- original volume |
|
|
78 |
pred_vol {Numpy array} -- predicted label |
|
|
79 |
|
|
|
80 |
Returns: |
|
|
81 |
PIL image |
|
|
82 |
""" |
|
|
83 |
|
|
|
84 |
# The code below uses PIL image library to compose an RGB image that will go into the report |
|
|
85 |
# A standard way of storing measurement data in DICOM archives is creating such report and |
|
|
86 |
# sending them on as Secondary Capture IODs (http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.8.html) |
|
|
87 |
# Essentially, our report is just a standard RGB image, with some metadata, packed into |
|
|
88 |
# DICOM format. |
|
|
89 |
|
|
|
90 |
pimg = Image.new("RGB", (1000, 1000)) |
|
|
91 |
draw = ImageDraw.Draw(pimg) |
|
|
92 |
|
|
|
93 |
header_font = ImageFont.truetype("assets/Roboto-Regular.ttf", size=40) |
|
|
94 |
main_font = ImageFont.truetype("assets/Roboto-Regular.ttf", size=20) |
|
|
95 |
|
|
|
96 |
#TASK: Select volume slices that you think will be of interest to clinicians. |
|
|
97 |
|
|
|
98 |
slice_nums = [orig_vol.shape[2]//3, orig_vol.shape[2]//2, orig_vol.shape[2]*3//4] # is there a better choice? |
|
|
99 |
''' |
|
|
100 |
#Another choice is to find where the three maximum labeled hippocampus volume slices are located. |
|
|
101 |
|
|
|
102 |
pred_vol[pred_vol>1]=1 |
|
|
103 |
slice_nums = [0,0,0] |
|
|
104 |
for s in range(pred_vol.shape[2]): |
|
|
105 |
vol = get_predicted_volumes(pred_vol[:,:,s])['total'] |
|
|
106 |
if vol>get_predicted_volumes(pred_vol[:,:,slice_nums[0]])['total']: |
|
|
107 |
slice_nums[2]=slice_nums[1] |
|
|
108 |
slice_nums[1]=slice_nums[0] |
|
|
109 |
slice_nums[0]=s |
|
|
110 |
''' |
|
|
111 |
|
|
|
112 |
# TASK: Create the report here and show information that you think would be relevant to |
|
|
113 |
# clinicians. A sample code is provided below, but feel free to use your creative |
|
|
114 |
# genius to make if shine. After all, the is the only part of all our machine learning |
|
|
115 |
# efforts that will be visible to the world. The usefulness of your computations will largely |
|
|
116 |
# depend on how you present them. |
|
|
117 |
|
|
|
118 |
pred_ant, pred_post, pred_tot = get_predicted_volumes(pred_vol) |
|
|
119 |
# SAMPLE CODE BELOW: UNCOMMENT AND CUSTOMIZE |
|
|
120 |
draw.text((10, 0), "HippoVolume.AI", (255, 255, 255), font=header_font) |
|
|
121 |
draw.multiline_text((10, 90), |
|
|
122 |
f"Patient ID: {header.PatientID}\n Total Images in Acquisition: {header[(0x0020,0x1002)].value}\n Axial Slices: {slice_nums}\n Calculated Hippocampus Total Volue: {inference['total']} \n Calculated Hippocampus Ant Volume: {inference['anterior']}\n Calculated Hippocampus Posterior Volume: {inference['posterior']}", |
|
|
123 |
(255, 255, 255), font=main_font) |
|
|
124 |
|
|
|
125 |
# STAND-OUT SUGGESTION: |
|
|
126 |
# In addition to text data in the snippet above, can you show some images? |
|
|
127 |
# Think, what would be relevant to show? Can you show an overlay of mask on top of original data? |
|
|
128 |
# Hint: here's one way to convert a numpy array into a PIL image and draw it inside our pimg object: |
|
|
129 |
# |
|
|
130 |
# Create a PIL image from array: |
|
|
131 |
# Numpy array needs to flipped, transposed and normalized to a matrix of values in the range of [0..255] |
|
|
132 |
pred_norm = pred_vol |
|
|
133 |
pred_norm = pred_norm*0xff |
|
|
134 |
orig_norm = (orig_vol/np.max(orig_vol))*0xff |
|
|
135 |
overlay_norm = ((pred_norm+orig_norm)/np.max(pred_norm+orig_norm))*0xff |
|
|
136 |
|
|
|
137 |
dt = datetime.date.today().strftime("%Y%m%d") |
|
|
138 |
tm = datetime.datetime.now().strftime("%H%M%S") |
|
|
139 |
|
|
|
140 |
for n in range(len(slice_nums)): |
|
|
141 |
nd_img = np.flip(overlay_norm[:,:,slice_nums[n]]).T.astype(np.uint8) |
|
|
142 |
|
|
|
143 |
# This is how you create a PIL image from numpy array |
|
|
144 |
#pil_i = Image.fromarray(nd_img, mode="L").convert("RGBA").resize((header[(0x0028,0x0011)].value,header[(0x0028,0x0010)].value)) |
|
|
145 |
pil_i = Image.fromarray(nd_img, mode="L").convert("RGBA").resize((300, 400)) |
|
|
146 |
# Paste the PIL image into our main report image object (pimg) |
|
|
147 |
pimg.paste(pil_i, box=(10+325*n, 400)) |
|
|
148 |
# Save PIL image to out folder |
|
|
149 |
pil_i.save(os.path.join("..", "out", header.PatientID+"_"+dt+"_"+tm+'_slice'+str(slice_nums[n])+'.png'), format= 'png') |
|
|
150 |
|
|
|
151 |
return pimg |
|
|
152 |
|
|
|
153 |
def save_report_as_dcm(header, report, path): |
|
|
154 |
"""Writes the supplied image as a DICOM Secondary Capture file |
|
|
155 |
|
|
|
156 |
Arguments: |
|
|
157 |
header {PyDicom Dataset} -- original DICOM file header |
|
|
158 |
report {PIL image} -- image representing the report |
|
|
159 |
path {Where to save the report} |
|
|
160 |
|
|
|
161 |
Returns: |
|
|
162 |
N/A |
|
|
163 |
""" |
|
|
164 |
|
|
|
165 |
# Code below creates a DICOM Secondary Capture instance that will be correctly |
|
|
166 |
# interpreted by most imaging viewers including our OHIF |
|
|
167 |
# The code here is complete as it is unlikely that as a data scientist you will |
|
|
168 |
# have to dive that deep into generating DICOMs. However, if you still want to understand |
|
|
169 |
# the subject, there are some suggestions below |
|
|
170 |
|
|
|
171 |
# Set up DICOM metadata fields. Most of them will be the same as original file header |
|
|
172 |
out = pydicom.Dataset(header) |
|
|
173 |
|
|
|
174 |
out.file_meta = pydicom.Dataset() |
|
|
175 |
out.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian |
|
|
176 |
|
|
|
177 |
# STAND OUT SUGGESTION: |
|
|
178 |
# If you want to understand better the generation of valid DICOM, remove everything below |
|
|
179 |
# and try writing your own DICOM generation code from scratch. |
|
|
180 |
# Refer to this part of the standard to see what are the requirements for the valid |
|
|
181 |
# Secondary Capture IOD: http://dicom.nema.org/medical/dicom/2019e/output/html/part03.html#sect_A.8 |
|
|
182 |
# The Modules table (A.8-1) contains a list of modules with a notice which ones are mandatory (M) |
|
|
183 |
# and which ones are conditional (C) and which ones are user-optional (U) |
|
|
184 |
# Note that we are building an RGB image which would have three 8-bit samples per pixel |
|
|
185 |
# Also note that writing code that generates valid DICOM has a very calming effect |
|
|
186 |
# on mind and body :) |
|
|
187 |
|
|
|
188 |
out.is_little_endian = True |
|
|
189 |
out.is_implicit_VR = False |
|
|
190 |
|
|
|
191 |
# We need to change class to Secondary Capture |
|
|
192 |
out.SOPClassUID = "1.2.840.10008.5.1.4.1.1.7" |
|
|
193 |
out.file_meta.MediaStorageSOPClassUID = out.SOPClassUID |
|
|
194 |
|
|
|
195 |
# Our report is a separate image series of one image |
|
|
196 |
out.SeriesInstanceUID = pydicom.uid.generate_uid() |
|
|
197 |
out.SOPInstanceUID = pydicom.uid.generate_uid() |
|
|
198 |
out.file_meta.MediaStorageSOPInstanceUID = out.SOPInstanceUID |
|
|
199 |
out.Modality = "OT" # Other |
|
|
200 |
out.SeriesDescription = "HippoVolume.AI" |
|
|
201 |
|
|
|
202 |
out.Rows = report.height |
|
|
203 |
out.Columns = report.width |
|
|
204 |
|
|
|
205 |
out.ImageType = r"DERIVED\PRIMARY\AXIAL" # We are deriving this image from patient data |
|
|
206 |
out.SamplesPerPixel = 3 # we are building an RGB image. |
|
|
207 |
out.PhotometricInterpretation = "RGB" |
|
|
208 |
out.PlanarConfiguration = 0 # means that bytes encode pixels as R1G1B1R2G2B2... as opposed to R1R2R3...G1G2G3... |
|
|
209 |
out.BitsAllocated = 8 # we are using 8 bits/pixel |
|
|
210 |
out.BitsStored = 8 |
|
|
211 |
out.HighBit = 7 |
|
|
212 |
out.PixelRepresentation = 0 |
|
|
213 |
|
|
|
214 |
# Set time and date |
|
|
215 |
dt = datetime.date.today().strftime("%Y%m%d") |
|
|
216 |
tm = datetime.datetime.now().strftime("%H%M%S") |
|
|
217 |
out.StudyDate = dt |
|
|
218 |
out.StudyTime = tm |
|
|
219 |
out.SeriesDate = dt |
|
|
220 |
out.SeriesTime = tm |
|
|
221 |
|
|
|
222 |
out.ImagesInAcquisition = 1 |
|
|
223 |
|
|
|
224 |
# We empty these since most viewers will then default to auto W/L |
|
|
225 |
out.WindowCenter = "" |
|
|
226 |
out.WindowWidth = "" |
|
|
227 |
|
|
|
228 |
# Data imprinted directly into image pixels is called "burned in annotation" |
|
|
229 |
out.BurnedInAnnotation = "YES" |
|
|
230 |
|
|
|
231 |
out.PixelData = report.tobytes() |
|
|
232 |
|
|
|
233 |
pydicom.filewriter.dcmwrite(path, out, write_like_original=False) |
|
|
234 |
|
|
|
235 |
def get_series_for_inference(path): |
|
|
236 |
"""Reads multiple series from one folder and picks the one |
|
|
237 |
to run inference on. |
|
|
238 |
|
|
|
239 |
Arguments: |
|
|
240 |
path {string} -- location of the DICOM files |
|
|
241 |
|
|
|
242 |
Returns: |
|
|
243 |
Numpy array representing the series |
|
|
244 |
""" |
|
|
245 |
|
|
|
246 |
# Here we are assuming that path is a directory that contains a full study as a collection |
|
|
247 |
# of files |
|
|
248 |
# We are reading all files into a list of PyDicom objects so that we can filter them later |
|
|
249 |
dicoms = [pydicom.dcmread(os.path.join(path, f)) for f in os.listdir(path)] |
|
|
250 |
# print('dicoms') |
|
|
251 |
# print(dicoms) |
|
|
252 |
|
|
|
253 |
# TASK: create a series_for_inference variable that will contain a list of only |
|
|
254 |
# those PyDicom objects that represent files that belong to the series that you |
|
|
255 |
# will run inference on. |
|
|
256 |
# It is important to note that radiological modalities most often operate in terms |
|
|
257 |
# of studies, and it will most likely be on you to establish criteria for figuring |
|
|
258 |
# out which one of the multiple series sent by the scanner is the one you need to feed to |
|
|
259 |
# your algorithm. In our case it's rather easy - we have reached an agreement with |
|
|
260 |
# people who configured the HippoCrop tool and they label the output of their tool in a |
|
|
261 |
# certain way. Can you figure out which is that? |
|
|
262 |
# Hint: inspect the metadata of HippoCrop series |
|
|
263 |
|
|
|
264 |
series_for_inference = [] |
|
|
265 |
for d in dicoms: |
|
|
266 |
if (d[(0x0008,0x103e)].repval) == "'HippoCrop'": |
|
|
267 |
series_for_inference.append(d) |
|
|
268 |
# print('series for inference is ') |
|
|
269 |
# print(series_for_inference) |
|
|
270 |
|
|
|
271 |
# Check if there are more than one series (using set comprehension). |
|
|
272 |
if len({f.SeriesInstanceUID for f in series_for_inference}) != 1: |
|
|
273 |
print("Error: can not figure out what series to run inference on") |
|
|
274 |
return [] |
|
|
275 |
|
|
|
276 |
return series_for_inference |
|
|
277 |
|
|
|
278 |
def os_command(command): |
|
|
279 |
# Comment this if running under Windows |
|
|
280 |
# sp = subprocess.Popen(["/bin/bash", "-i", "-c", command]) |
|
|
281 |
# sp.communicate() |
|
|
282 |
|
|
|
283 |
# Uncomment this if running under Windows |
|
|
284 |
os.system(command) |
|
|
285 |
|
|
|
286 |
if __name__ == "__main__": |
|
|
287 |
# This code expects a single command line argument with link to the directory containing |
|
|
288 |
# routed studies |
|
|
289 |
if len(sys.argv) != 2: |
|
|
290 |
print("You should supply one command line argument pointing to the routing folder. Exiting.") |
|
|
291 |
sys.exit() |
|
|
292 |
|
|
|
293 |
# Find all subdirectories within the supplied directory. We assume that |
|
|
294 |
# one subdirectory contains a full study |
|
|
295 |
subdirs = [os.path.join(sys.argv[1], d) for d in os.listdir(sys.argv[1]) if |
|
|
296 |
os.path.isdir(os.path.join(sys.argv[1], d))] |
|
|
297 |
|
|
|
298 |
# Get the latest directory |
|
|
299 |
study_dir = sorted(subdirs, key=lambda dir: os.stat(dir).st_mtime, reverse=True)[0] |
|
|
300 |
# print('study_dir is ' + study_dir) |
|
|
301 |
|
|
|
302 |
print(f"Looking for series to run inference on in directory {study_dir}...") |
|
|
303 |
|
|
|
304 |
# TASK: get_series_for_inference is not complete. Go and complete it |
|
|
305 |
volume, header = load_dicom_volume_as_numpy_from_list(get_series_for_inference(study_dir)) |
|
|
306 |
print(f"Found series of {volume.shape[2]} axial slices") |
|
|
307 |
|
|
|
308 |
print("HippoVolume.AI: Running inference...") |
|
|
309 |
# TASK: Use the UNetInferenceAgent class and model parameter file from the previous section |
|
|
310 |
inference_agent = UNetInferenceAgent( |
|
|
311 |
device="cpu", |
|
|
312 |
parameter_file_path=r"inference/model.pth") |
|
|
313 |
|
|
|
314 |
# Run inference |
|
|
315 |
# TASK: single_volume_inference_unpadded takes a volume of arbitrary size |
|
|
316 |
# and reshapes y and z dimensions to the patch size used by the model before |
|
|
317 |
# running inference. Your job is to implement it. |
|
|
318 |
pred_label = inference_agent.single_volume_inference_unpadded(np.array(volume)) |
|
|
319 |
|
|
|
320 |
# TASK: get_predicted_volumes is not complete. Go and complete it |
|
|
321 |
pred_volumes = get_predicted_volumes(pred_label) |
|
|
322 |
|
|
|
323 |
# Create and save the report |
|
|
324 |
dt = datetime.date.today().strftime("%Y%m%d") |
|
|
325 |
tm = datetime.datetime.now().strftime("%H%M%S") |
|
|
326 |
|
|
|
327 |
print("Creating and pushing report...") |
|
|
328 |
report_save_path = os.path.join('..', 'out', dt+tm+'_report.dcm') |
|
|
329 |
|
|
|
330 |
# TASK: create_report is not complete. Go and complete it. |
|
|
331 |
# STAND OUT SUGGESTION: save_report_as_dcm has some suggestions if you want to expand your |
|
|
332 |
# knowledge of DICOM format |
|
|
333 |
report_img = create_report(pred_volumes, header, volume, pred_label) |
|
|
334 |
save_report_as_dcm(header, report_img, report_save_path) |
|
|
335 |
|
|
|
336 |
# Send report to our storage archive |
|
|
337 |
# TASK: Write a command line string that will issue a DICOM C-STORE request to send our report |
|
|
338 |
# to our Orthanc server (that runs on port 4242 of the local machine), using storescu tool |
|
|
339 |
# Comment out when Orthanc server is not active. |
|
|
340 |
# os_command(f'storescu 127.0.0.1 4242 -v -aec HIPPOAI {"src/"+report_save_path}') |
|
|
341 |
|
|
|
342 |
# This line will remove the study dir if run as root user |
|
|
343 |
# Sleep to let our StoreSCP server process the report (remember - in our setup |
|
|
344 |
# the main archive is routing everyting that is sent to it, including our freshly generated |
|
|
345 |
# report) - we want to give it time to save before cleaning it up |
|
|
346 |
# Comment out to keep files |
|
|
347 |
# time.sleep(2) |
|
|
348 |
# shutil.rmtree(study_dir, onerror=lambda f, p, e: print(f"Error deleting: {e[1]}")) |
|
|
349 |
|
|
|
350 |
print(f"Inference successful on {header['SOPInstanceUID'].value}, MRI Dimensions: {pred_label.shape}", |
|
|
351 |
f"volume anterior: {pred_volumes['anterior']}, ", |
|
|
352 |
f"volume posterior: {pred_volumes['posterior']}, total volume: {pred_volumes['total']}") |