Switch to unified view

a b/Section 3 Simulate DIMSE/src/inference_dcm.py
1
"""
2
Here we do inference on a DICOM volume, constructing the volume first, and then sending it to the
3
clinical archive
4
5
This code will do the following:
6
    1. Identify the series to run HippoCrop.AI algorithm on from a folder containing multiple studies
7
    2. Construct a NumPy volume from a set of DICOM files
8
    3. Run inference on the constructed volume
9
    4. Create report from the inference
10
    5. Call a shell script to push report to the storage archive
11
"""
12
13
import os
14
import sys
15
import datetime
16
import time
17
import shutil
18
import subprocess
19
20
import numpy as np
21
import pydicom
22
23
from PIL import Image
24
from PIL import ImageFont
25
from PIL import ImageDraw
26
27
from inference.UNetInferenceAgent import UNetInferenceAgent
28
29
def load_dicom_volume_as_numpy_from_list(dcmlist):
30
    """Loads a list of PyDicom objects into a Numpy array.
31
    Assumes that only one series is in the array
32
33
    Arguments:
34
        dcmlist {list of PyDicom objects} -- path to directory
35
36
    Returns:
37
        tuple of (3D volume, header of the 1st image)
38
    """
39
40
    # In the real world you would do a lot of validation here
41
    slices = [np.flip(dcm.pixel_array).T for dcm in sorted(dcmlist, key=lambda dcm: dcm.InstanceNumber)]
42
43
    # Make sure that you have correctly constructed the volume from your axial slices!
44
    hdr = dcmlist[0]
45
46
    # We return header so that we can inspect metadata properly.
47
    # Since for our purposes we are interested in "Series" header, we grab header of the
48
    # first file (assuming that any instance-specific values will be ignored - common approach)
49
    # We also zero-out Pixel Data since the users of this function are only interested in metadata
50
    hdr.PixelData = None
51
    return (np.stack(slices, 2), hdr)
52
53
def get_predicted_volumes(pred):
54
    """Gets volumes of two hippocampal structures from the predicted array
55
56
    Arguments:
57
        pred {Numpy array} -- array with labels. Assuming 0 is bg, 1 is anterior, 2 is posterior
58
59
    Returns:
60
        A dictionary with respective volumes
61
    """
62
63
    # TASK: Compute the volume of your hippocampal prediction
64
    volume_ant = np.sum(pred[pred==1])
65
    volume_post = np.sum(pred[pred==2])/2
66
    total_volume = volume_ant + volume_post
67
    
68
69
    return {"anterior": volume_ant, "posterior": volume_post, "total": total_volume}
70
71
def create_report(inference, header, orig_vol, pred_vol):
72
    """Generates an image with inference report
73
74
    Arguments:
75
        inference {Dictionary} -- dict containing anterior, posterior and full volume values
76
        header {PyDicom Dataset} -- DICOM header
77
        orig_vol {Numpy array} -- original volume
78
        pred_vol {Numpy array} -- predicted label
79
80
    Returns:
81
        PIL image
82
    """
83
84
    # The code below uses PIL image library to compose an RGB image that will go into the report
85
    # A standard way of storing measurement data in DICOM archives is creating such report and
86
    # sending them on as Secondary Capture IODs (http://dicom.nema.org/medical/dicom/current/output/chtml/part03/sect_A.8.html)
87
    # Essentially, our report is just a standard RGB image, with some metadata, packed into 
88
    # DICOM format. 
89
90
    pimg = Image.new("RGB", (1000, 1000))
91
    draw = ImageDraw.Draw(pimg)
92
93
    header_font = ImageFont.truetype("assets/Roboto-Regular.ttf", size=40)
94
    main_font = ImageFont.truetype("assets/Roboto-Regular.ttf", size=20)
95
    
96
    #TASK: Select volume slices that you think will be of interest to clinicians.
97
    
98
    slice_nums = [orig_vol.shape[2]//3, orig_vol.shape[2]//2, orig_vol.shape[2]*3//4] # is there a better choice?
99
    '''
100
    #Another choice is to find where the three maximum labeled hippocampus volume slices are located.
101
    
102
    pred_vol[pred_vol>1]=1
103
    slice_nums = [0,0,0]
104
    for s in range(pred_vol.shape[2]):
105
        vol = get_predicted_volumes(pred_vol[:,:,s])['total']
106
        if vol>get_predicted_volumes(pred_vol[:,:,slice_nums[0]])['total']:
107
            slice_nums[2]=slice_nums[1]
108
            slice_nums[1]=slice_nums[0]
109
            slice_nums[0]=s
110
    '''  
111
112
    # TASK: Create the report here and show information that you think would be relevant to
113
    # clinicians. A sample code is provided below, but feel free to use your creative 
114
    # genius to make if shine. After all, the is the only part of all our machine learning 
115
    # efforts that will be visible to the world. The usefulness of your computations will largely
116
    # depend on how you present them.
117
    
118
    pred_ant, pred_post, pred_tot = get_predicted_volumes(pred_vol)
119
    # SAMPLE CODE BELOW: UNCOMMENT AND CUSTOMIZE
120
    draw.text((10, 0), "HippoVolume.AI", (255, 255, 255), font=header_font)
121
    draw.multiline_text((10, 90),
122
                         f"Patient ID: {header.PatientID}\n Total Images in Acquisition: {header[(0x0020,0x1002)].value}\n Axial Slices: {slice_nums}\n Calculated Hippocampus Total Volue: {inference['total']} \n Calculated Hippocampus Ant Volume: {inference['anterior']}\n Calculated Hippocampus Posterior Volume: {inference['posterior']}",
123
                         (255, 255, 255), font=main_font)
124
125
    # STAND-OUT SUGGESTION:
126
    # In addition to text data in the snippet above, can you show some images?
127
    # Think, what would be relevant to show? Can you show an overlay of mask on top of original data?
128
    # Hint: here's one way to convert a numpy array into a PIL image and draw it inside our pimg object:
129
    #
130
    # Create a PIL image from array:
131
    # Numpy array needs to flipped, transposed and normalized to a matrix of values in the range of [0..255]
132
    pred_norm = pred_vol
133
    pred_norm = pred_norm*0xff
134
    orig_norm = (orig_vol/np.max(orig_vol))*0xff
135
    overlay_norm = ((pred_norm+orig_norm)/np.max(pred_norm+orig_norm))*0xff
136
137
    dt = datetime.date.today().strftime("%Y%m%d")
138
    tm = datetime.datetime.now().strftime("%H%M%S")
139
    
140
    for n in range(len(slice_nums)):
141
        nd_img = np.flip(overlay_norm[:,:,slice_nums[n]]).T.astype(np.uint8)
142
143
        # This is how you create a PIL image from numpy array
144
        #pil_i = Image.fromarray(nd_img, mode="L").convert("RGBA").resize((header[(0x0028,0x0011)].value,header[(0x0028,0x0010)].value))
145
        pil_i = Image.fromarray(nd_img, mode="L").convert("RGBA").resize((300, 400))
146
        # Paste the PIL image into our main report image object (pimg)
147
        pimg.paste(pil_i, box=(10+325*n, 400))
148
        # Save PIL image to out folder
149
        pil_i.save(os.path.join("..", "out", header.PatientID+"_"+dt+"_"+tm+'_slice'+str(slice_nums[n])+'.png'), format= 'png')
150
151
    return pimg
152
153
def save_report_as_dcm(header, report, path):
154
    """Writes the supplied image as a DICOM Secondary Capture file
155
156
    Arguments:
157
        header {PyDicom Dataset} -- original DICOM file header
158
        report {PIL image} -- image representing the report
159
        path {Where to save the report}
160
161
    Returns:
162
        N/A
163
    """
164
165
    # Code below creates a DICOM Secondary Capture instance that will be correctly
166
    # interpreted by most imaging viewers including our OHIF
167
    # The code here is complete as it is unlikely that as a data scientist you will 
168
    # have to dive that deep into generating DICOMs. However, if you still want to understand
169
    # the subject, there are some suggestions below
170
171
    # Set up DICOM metadata fields. Most of them will be the same as original file header
172
    out = pydicom.Dataset(header)
173
174
    out.file_meta = pydicom.Dataset()
175
    out.file_meta.TransferSyntaxUID = pydicom.uid.ExplicitVRLittleEndian
176
177
    # STAND OUT SUGGESTION: 
178
    # If you want to understand better the generation of valid DICOM, remove everything below
179
    # and try writing your own DICOM generation code from scratch.
180
    # Refer to this part of the standard to see what are the requirements for the valid
181
    # Secondary Capture IOD: http://dicom.nema.org/medical/dicom/2019e/output/html/part03.html#sect_A.8
182
    # The Modules table (A.8-1) contains a list of modules with a notice which ones are mandatory (M)
183
    # and which ones are conditional (C) and which ones are user-optional (U)
184
    # Note that we are building an RGB image which would have three 8-bit samples per pixel
185
    # Also note that writing code that generates valid DICOM has a very calming effect
186
    # on mind and body :)
187
188
    out.is_little_endian = True
189
    out.is_implicit_VR = False
190
191
    # We need to change class to Secondary Capture
192
    out.SOPClassUID = "1.2.840.10008.5.1.4.1.1.7"
193
    out.file_meta.MediaStorageSOPClassUID = out.SOPClassUID
194
195
    # Our report is a separate image series of one image
196
    out.SeriesInstanceUID = pydicom.uid.generate_uid()
197
    out.SOPInstanceUID = pydicom.uid.generate_uid()
198
    out.file_meta.MediaStorageSOPInstanceUID = out.SOPInstanceUID
199
    out.Modality = "OT" # Other
200
    out.SeriesDescription = "HippoVolume.AI"
201
202
    out.Rows = report.height
203
    out.Columns = report.width
204
205
    out.ImageType = r"DERIVED\PRIMARY\AXIAL" # We are deriving this image from patient data
206
    out.SamplesPerPixel = 3 # we are building an RGB image.
207
    out.PhotometricInterpretation = "RGB"
208
    out.PlanarConfiguration = 0 # means that bytes encode pixels as R1G1B1R2G2B2... as opposed to R1R2R3...G1G2G3...
209
    out.BitsAllocated = 8 # we are using 8 bits/pixel
210
    out.BitsStored = 8
211
    out.HighBit = 7
212
    out.PixelRepresentation = 0
213
214
    # Set time and date
215
    dt = datetime.date.today().strftime("%Y%m%d")
216
    tm = datetime.datetime.now().strftime("%H%M%S")
217
    out.StudyDate = dt
218
    out.StudyTime = tm
219
    out.SeriesDate = dt
220
    out.SeriesTime = tm
221
222
    out.ImagesInAcquisition = 1
223
224
    # We empty these since most viewers will then default to auto W/L
225
    out.WindowCenter = ""
226
    out.WindowWidth = ""
227
228
    # Data imprinted directly into image pixels is called "burned in annotation"
229
    out.BurnedInAnnotation = "YES"
230
231
    out.PixelData = report.tobytes()
232
233
    pydicom.filewriter.dcmwrite(path, out, write_like_original=False)
234
235
def get_series_for_inference(path):
236
    """Reads multiple series from one folder and picks the one
237
    to run inference on.
238
239
    Arguments:
240
        path {string} -- location of the DICOM files
241
242
    Returns:
243
        Numpy array representing the series
244
    """
245
246
    # Here we are assuming that path is a directory that contains a full study as a collection
247
    # of files
248
    # We are reading all files into a list of PyDicom objects so that we can filter them later
249
    dicoms = [pydicom.dcmread(os.path.join(path, f)) for f in os.listdir(path)]
250
    # print('dicoms')
251
    # print(dicoms)
252
253
    # TASK: create a series_for_inference variable that will contain a list of only 
254
    # those PyDicom objects that represent files that belong to the series that you 
255
    # will run inference on.
256
    # It is important to note that radiological modalities most often operate in terms
257
    # of studies, and it will most likely be on you to establish criteria for figuring 
258
    # out which one of the multiple series sent by the scanner is the one you need to feed to 
259
    # your algorithm. In our case it's rather easy - we have reached an agreement with 
260
    # people who configured the HippoCrop tool and they label the output of their tool in a 
261
    # certain way. Can you figure out which is that? 
262
    # Hint: inspect the metadata of HippoCrop series
263
264
    series_for_inference = []
265
    for d in dicoms:
266
        if (d[(0x0008,0x103e)].repval) == "'HippoCrop'":
267
            series_for_inference.append(d)
268
    # print('series for inference is ')
269
    # print(series_for_inference)
270
    
271
    # Check if there are more than one series (using set comprehension).
272
    if len({f.SeriesInstanceUID for f in series_for_inference}) != 1:
273
        print("Error: can not figure out what series to run inference on")
274
        return []
275
276
    return series_for_inference
277
278
def os_command(command):
279
    # Comment this if running under Windows
280
    # sp = subprocess.Popen(["/bin/bash", "-i", "-c", command])
281
    # sp.communicate()
282
283
    # Uncomment this if running under Windows
284
    os.system(command)
285
286
if __name__ == "__main__":
287
    # This code expects a single command line argument with link to the directory containing
288
    # routed studies
289
    if len(sys.argv) != 2:
290
        print("You should supply one command line argument pointing to the routing folder. Exiting.")
291
        sys.exit()
292
293
    # Find all subdirectories within the supplied directory. We assume that 
294
    # one subdirectory contains a full study
295
    subdirs = [os.path.join(sys.argv[1], d) for d in os.listdir(sys.argv[1]) if
296
                os.path.isdir(os.path.join(sys.argv[1], d))]
297
298
    # Get the latest directory
299
    study_dir = sorted(subdirs, key=lambda dir: os.stat(dir).st_mtime, reverse=True)[0]
300
    # print('study_dir is ' + study_dir)
301
302
    print(f"Looking for series to run inference on in directory {study_dir}...")
303
304
    # TASK: get_series_for_inference is not complete. Go and complete it
305
    volume, header = load_dicom_volume_as_numpy_from_list(get_series_for_inference(study_dir))
306
    print(f"Found series of {volume.shape[2]} axial slices")
307
308
    print("HippoVolume.AI: Running inference...")
309
    # TASK: Use the UNetInferenceAgent class and model parameter file from the previous section
310
    inference_agent = UNetInferenceAgent(
311
        device="cpu",
312
        parameter_file_path=r"inference/model.pth")
313
314
    # Run inference
315
    # TASK: single_volume_inference_unpadded takes a volume of arbitrary size 
316
    # and reshapes y and z dimensions to the patch size used by the model before 
317
    # running inference. Your job is to implement it.
318
    pred_label = inference_agent.single_volume_inference_unpadded(np.array(volume))
319
    
320
    # TASK: get_predicted_volumes is not complete. Go and complete it
321
    pred_volumes = get_predicted_volumes(pred_label)
322
323
    # Create and save the report
324
    dt = datetime.date.today().strftime("%Y%m%d")
325
    tm = datetime.datetime.now().strftime("%H%M%S")
326
    
327
    print("Creating and pushing report...")
328
    report_save_path = os.path.join('..', 'out', dt+tm+'_report.dcm')
329
    
330
    # TASK: create_report is not complete. Go and complete it. 
331
    # STAND OUT SUGGESTION: save_report_as_dcm has some suggestions if you want to expand your
332
    # knowledge of DICOM format
333
    report_img = create_report(pred_volumes, header, volume, pred_label)
334
    save_report_as_dcm(header, report_img, report_save_path)
335
336
    # Send report to our storage archive
337
    # TASK: Write a command line string that will issue a DICOM C-STORE request to send our report
338
    # to our Orthanc server (that runs on port 4242 of the local machine), using storescu tool
339
    # Comment out when Orthanc server is not active.
340
    # os_command(f'storescu 127.0.0.1 4242 -v -aec HIPPOAI {"src/"+report_save_path}')
341
342
    # This line will remove the study dir if run as root user
343
    # Sleep to let our StoreSCP server process the report (remember - in our setup
344
    # the main archive is routing everyting that is sent to it, including our freshly generated
345
    # report) - we want to give it time to save before cleaning it up
346
    # Comment out to keep files
347
    # time.sleep(2)
348
    # shutil.rmtree(study_dir, onerror=lambda f, p, e: print(f"Error deleting: {e[1]}"))
349
350
    print(f"Inference successful on {header['SOPInstanceUID'].value}, MRI Dimensions: {pred_label.shape}",
351
          f"volume anterior: {pred_volumes['anterior']}, ",
352
          f"volume posterior: {pred_volumes['posterior']}, total volume: {pred_volumes['total']}")