|
a |
|
b/tile_generator.py |
|
|
1 |
# System |
|
|
2 |
import json |
|
|
3 |
import multiprocessing |
|
|
4 |
import os |
|
|
5 |
import warnings |
|
|
6 |
|
|
|
7 |
# Advanced |
|
|
8 |
import xml.etree.ElementTree as ET |
|
|
9 |
from argparse import ArgumentParser |
|
|
10 |
from pathlib import Path |
|
|
11 |
import pandas as pd |
|
|
12 |
import cv2 |
|
|
13 |
import matplotlib.pyplot as plt |
|
|
14 |
|
|
|
15 |
# Numpy |
|
|
16 |
import numpy as np |
|
|
17 |
|
|
|
18 |
# Image Processing |
|
|
19 |
from PIL import Image |
|
|
20 |
|
|
|
21 |
# # Fix to get the dlls to load properly under python >= 3.8 and windows |
|
|
22 |
script_dir = os.path.dirname(os.path.realpath(__file__)) |
|
|
23 |
try: |
|
|
24 |
openslide_dll_path = os.path.join(script_dir, "..", "openslide-win64-20171122", "bin") |
|
|
25 |
os.add_dll_directory(openslide_dll_path) |
|
|
26 |
# print(openslide_dll_path) |
|
|
27 |
|
|
|
28 |
except Exception as e: |
|
|
29 |
pass |
|
|
30 |
|
|
|
31 |
# noinspection PyPep8 |
|
|
32 |
import openslide |
|
|
33 |
|
|
|
34 |
# Custom |
|
|
35 |
# noinspection PyPep8 |
|
|
36 |
import tissue_detection |
|
|
37 |
|
|
|
38 |
_MULTIPROCESS = True |
|
|
39 |
|
|
|
40 |
global lock |
|
|
41 |
|
|
|
42 |
|
|
|
43 |
class WSIHandler: |
|
|
44 |
def __init__(self, config_path="resources/config.json"): |
|
|
45 |
self.slide = None |
|
|
46 |
self.output_path = None |
|
|
47 |
self.total_width = 0 |
|
|
48 |
self.total_height = 0 |
|
|
49 |
self.levels = 0 |
|
|
50 |
self.current_level = 0 |
|
|
51 |
self.annotation_list = None |
|
|
52 |
self.annotation_dict = None |
|
|
53 |
self.config = self.load_config(config_path) |
|
|
54 |
assert "save_annotated_only" in self.config.keys() |
|
|
55 |
self.annotated_only = self.config["save_annotated_only"] |
|
|
56 |
self.scanner = None |
|
|
57 |
|
|
|
58 |
self.res_x = None |
|
|
59 |
self.res_y = None |
|
|
60 |
|
|
|
61 |
self.validate_label_dict() |
|
|
62 |
|
|
|
63 |
def validate_label_dict(self): |
|
|
64 |
self.check_at_most_one_unannotated_label() |
|
|
65 |
self.check_unannotated_label_first() |
|
|
66 |
|
|
|
67 |
def check_at_most_one_unannotated_label(self): |
|
|
68 |
label_dict = self.config["label_dict"] |
|
|
69 |
unannotated_labels = [] |
|
|
70 |
for label, label_config in label_dict.items(): |
|
|
71 |
if not label_config["annotated"]: |
|
|
72 |
unannotated_labels.append(label) |
|
|
73 |
assert len(unannotated_labels) < 2, (f"More than one label (=tissue type) is marked as unannotated in the " |
|
|
74 |
f"config.label_dict. Please make sure that at most one type (usually " |
|
|
75 |
f"non-tumor) is marked as unannotated. The labels in question are " |
|
|
76 |
f"{unannotated_labels}.") |
|
|
77 |
|
|
|
78 |
def check_unannotated_label_first(self): |
|
|
79 |
label_dict = self.config["label_dict"] |
|
|
80 |
for label in list(label_dict)[1:]: |
|
|
81 |
assert label_dict[label]["annotated"], (f"WSIHandler requires the unannotated label to be located in the " |
|
|
82 |
f"first position in config.label_dict. Please move the unannotated " |
|
|
83 |
f"tissue type '{label}' to the first position.") |
|
|
84 |
|
|
|
85 |
def print_and_log_slide_error(self, slide_name, error_msg, method_name): |
|
|
86 |
print(f"Error in slide {slide_name}. The error is: {type(error_msg).__name__}: {error_msg} in method: " |
|
|
87 |
f"{method_name}.") |
|
|
88 |
with lock: |
|
|
89 |
with open(os.path.join(self.config["output_path"], "error_log.txt"), "a") as f: |
|
|
90 |
f.write(f"Error in slide {slide_name}. The error is: {type(error_msg).__name__}: {error_msg} in " |
|
|
91 |
f"method: {method_name}.") |
|
|
92 |
|
|
|
93 |
@staticmethod |
|
|
94 |
def load_config(config_path): |
|
|
95 |
assert os.path.exists(config_path), "Cannot find " + config_path |
|
|
96 |
with open(config_path) as json_file: |
|
|
97 |
config = json.load(json_file) |
|
|
98 |
|
|
|
99 |
assert 1 >= config["tissue_coverage"] >= 0, "Tissue coverage must be between 1 and 0" |
|
|
100 |
assert config["blocked_threads"] >= 0 |
|
|
101 |
assert config["patches_per_tile"] >= 1, "Patches per tile must be >= 1" |
|
|
102 |
assert 0 <= config["overlap"] < 1, "Overlap must be between 1 and 0" |
|
|
103 |
assert config["annotation_overlap"] >= 0 and config["overlap"] < 1, "Annotation overlap must be between 1 and 0" |
|
|
104 |
|
|
|
105 |
return config |
|
|
106 |
|
|
|
107 |
def load_slide(self, slide_path): |
|
|
108 |
|
|
|
109 |
self.slide = openslide.OpenSlide(slide_path) |
|
|
110 |
self.total_width = self.slide.dimensions[0] |
|
|
111 |
self.total_height = self.slide.dimensions[1] |
|
|
112 |
self.levels = self.slide.level_count - 1 |
|
|
113 |
|
|
|
114 |
processing_level = self.config["processing_level"] |
|
|
115 |
|
|
|
116 |
if self.levels < self.config["processing_level"]: |
|
|
117 |
print("###############################################") |
|
|
118 |
print( |
|
|
119 |
"WARNING: Processing level above highest available slide level. Maximum slide level is " |
|
|
120 |
+ str(self.levels) |
|
|
121 |
+ ", processing level is " |
|
|
122 |
+ str(self.config["processing_level"]) |
|
|
123 |
+ ". Setting processing level to " |
|
|
124 |
+ str(self.levels) |
|
|
125 |
) |
|
|
126 |
print("###############################################") |
|
|
127 |
processing_level = self.levels |
|
|
128 |
|
|
|
129 |
return processing_level |
|
|
130 |
|
|
|
131 |
def load_annotation(self, annotation_path): |
|
|
132 |
annotation_dict = {} |
|
|
133 |
file_format = Path(annotation_path).suffix |
|
|
134 |
|
|
|
135 |
# QuPath exports |
|
|
136 |
if file_format == ".geojson" or file_format == ".txt": |
|
|
137 |
with open(annotation_path) as annotation_file: |
|
|
138 |
annotations = json.load(annotation_file) |
|
|
139 |
|
|
|
140 |
for polygon_nb in range(len(annotations["features"])): |
|
|
141 |
if annotations["features"][polygon_nb]["geometry"]["type"] == "Polygon": |
|
|
142 |
if (annotations["features"][polygon_nb]["properties"]["classification"]["name"] in |
|
|
143 |
self.config["label_dict"].keys()): |
|
|
144 |
annotation_dict.update({polygon_nb: { |
|
|
145 |
"coordinates": annotations["features"][polygon_nb]["geometry"]["coordinates"][0], |
|
|
146 |
"tissue_type": annotations["features"][polygon_nb]["properties"]["classification"][ |
|
|
147 |
"name"]}}) |
|
|
148 |
else: |
|
|
149 |
warnings.warn(f'Unknown annotation type in file {annotation_file.name}: The annotation label ' |
|
|
150 |
f'"{annotations["features"][polygon_nb]["properties"]["classification"]["name"]}"' |
|
|
151 |
f' is not part of the provided label dictionary ' |
|
|
152 |
f'(keys: {list(self.config["label_dict"].keys())}. Skipping.') |
|
|
153 |
else: |
|
|
154 |
warnings.warn(f'Not implemented warning in file {annotation_file.name}: The handling of the QuPath ' |
|
|
155 |
f'annotation type {annotations["features"][polygon_nb]["geometry"]["type"]} ' |
|
|
156 |
f'(id:{annotations["features"][polygon_nb]["id"]}) has not been implemented, yet. ' |
|
|
157 |
f'Skipping.') |
|
|
158 |
# xml for CAMELYON17 |
|
|
159 |
elif file_format == ".xml": |
|
|
160 |
tree = ET.parse(annotation_path) |
|
|
161 |
root = tree.getroot() |
|
|
162 |
|
|
|
163 |
for elem in root: |
|
|
164 |
polygon_nb = 0 |
|
|
165 |
for subelem in elem: |
|
|
166 |
items = subelem.attrib |
|
|
167 |
if "Type" in items.keys(): |
|
|
168 |
if items["Type"] == "Polygon": |
|
|
169 |
polygon_list = [] |
|
|
170 |
for coordinates in subelem: |
|
|
171 |
for coord in coordinates: |
|
|
172 |
polygon_list.append([float(coord.attrib["X"]), float(coord.attrib["Y"])]) |
|
|
173 |
# all annotationy in CAMELYON17 are tumor, so this is a pseudo label |
|
|
174 |
annotation_dict.update({polygon_nb: {"coordinates": polygon_list, "tissue_type": "Tumor"}}) |
|
|
175 |
polygon_nb += 1 |
|
|
176 |
else: |
|
|
177 |
return None |
|
|
178 |
|
|
|
179 |
return annotation_dict |
|
|
180 |
|
|
|
181 |
def get_img(self, level=None, show=False): |
|
|
182 |
if level is None: |
|
|
183 |
level = self.levels |
|
|
184 |
|
|
|
185 |
dims = self.slide.level_dimensions[level] |
|
|
186 |
image = np.array(self.slide.read_region((0, 0), level, dims)) |
|
|
187 |
|
|
|
188 |
if show: |
|
|
189 |
# Katja: fix for Wayland issue on my Ubuntu: |
|
|
190 |
# run 'export QT_QPA_PLATFORM=xcb' before opening pycharm (in the same terminal) |
|
|
191 |
plt.imshow(image) |
|
|
192 |
plt.title("Slide image") |
|
|
193 |
plt.show() |
|
|
194 |
|
|
|
195 |
return image, level |
|
|
196 |
|
|
|
197 |
def apply_tissue_detection(self, level=None, show=False): |
|
|
198 |
|
|
|
199 |
if level is not None: |
|
|
200 |
image, level = self.get_img(level, show) |
|
|
201 |
else: |
|
|
202 |
image, level = self.get_img(show=show) |
|
|
203 |
|
|
|
204 |
tissue_mask = tissue_detection.tissue_detection(image, remove_top_percentage=0) |
|
|
205 |
|
|
|
206 |
if show: |
|
|
207 |
plt.imshow(tissue_mask) |
|
|
208 |
plt.title("Tissue Mask") |
|
|
209 |
plt.show() |
|
|
210 |
|
|
|
211 |
return tissue_mask, level |
|
|
212 |
|
|
|
213 |
def determine_tile_size(self, level): |
|
|
214 |
|
|
|
215 |
if self.config["calibration"]["use_non_pixel_lengths"]: |
|
|
216 |
tile_size_0 = (self.config["calibration"]["patch_size_microns"] / self.res_x) * self.config[ |
|
|
217 |
"patches_per_tile" |
|
|
218 |
] |
|
|
219 |
else: |
|
|
220 |
tile_size_0 = self.config["patches_per_tile"] * self.config["patch_size"] |
|
|
221 |
|
|
|
222 |
downscale_factor = int(self.slide.level_downsamples[level]) |
|
|
223 |
tile_size = int(tile_size_0 / downscale_factor) |
|
|
224 |
|
|
|
225 |
assert self.config["patches_per_tile"] >= 1, "Patches per tile must be greater than 1." |
|
|
226 |
|
|
|
227 |
return tile_size |
|
|
228 |
|
|
|
229 |
def get_relevant_tiles(self, tissue_mask, tile_size, min_coverage, level, show=False): |
|
|
230 |
|
|
|
231 |
rows, row_residue = divmod(tissue_mask.shape[0], tile_size) |
|
|
232 |
cols, col_residue = divmod(tissue_mask.shape[1], tile_size) |
|
|
233 |
|
|
|
234 |
if row_residue: |
|
|
235 |
rows += 1 |
|
|
236 |
if col_residue: |
|
|
237 |
cols += 1 |
|
|
238 |
|
|
|
239 |
if self.config["use_tissue_detection"]: |
|
|
240 |
colored = cv2.cvtColor(tissue_mask, cv2.COLOR_GRAY2RGB) |
|
|
241 |
|
|
|
242 |
if self.annotation_dict is not None: |
|
|
243 |
annotation_mask = np.zeros(shape=(tissue_mask.shape[0], tissue_mask.shape[1])) |
|
|
244 |
scaling_factor = self.slide.level_downsamples[level] |
|
|
245 |
scaled_list = [ |
|
|
246 |
[[point[0] / scaling_factor, point[1] / scaling_factor] |
|
|
247 |
for point in self.annotation_dict[polygon]["coordinates"]] |
|
|
248 |
for polygon in self.annotation_dict |
|
|
249 |
] |
|
|
250 |
|
|
|
251 |
for polygon in scaled_list: |
|
|
252 |
cv2.fillPoly(annotation_mask, [np.array(polygon).astype(np.int32)], 1) |
|
|
253 |
|
|
|
254 |
relevant_tiles_dict = {} |
|
|
255 |
tile_nb = 0 |
|
|
256 |
|
|
|
257 |
# +1 to solve border issues |
|
|
258 |
for row in range(rows): |
|
|
259 |
for col in range(cols): |
|
|
260 |
|
|
|
261 |
tile = tissue_mask[ |
|
|
262 |
row * tile_size: row * tile_size + tile_size, col * tile_size: col * tile_size + tile_size |
|
|
263 |
] |
|
|
264 |
tissue_coverage = np.count_nonzero(tile) / tile.size |
|
|
265 |
annotated = False |
|
|
266 |
|
|
|
267 |
if self.annotation_dict is not None: |
|
|
268 |
if (np.count_nonzero(annotation_mask[row * tile_size: row * tile_size + tile_size, |
|
|
269 |
col * tile_size: col * tile_size + tile_size, ]) > 0): |
|
|
270 |
annotated = True |
|
|
271 |
|
|
|
272 |
if (tissue_coverage >= min_coverage or |
|
|
273 |
(self.config["keep_annotated_tiles_despite_too_little_tissue_coverage"] and annotated)): |
|
|
274 |
relevant_tiles_dict.update( |
|
|
275 |
{ |
|
|
276 |
tile_nb: { |
|
|
277 |
"x": col * tile_size, |
|
|
278 |
"y": row * tile_size, |
|
|
279 |
"size": tile_size, |
|
|
280 |
"level": level, |
|
|
281 |
"annotated": annotated, |
|
|
282 |
} |
|
|
283 |
} |
|
|
284 |
) |
|
|
285 |
if self.config["use_tissue_detection"]: |
|
|
286 |
if annotated: |
|
|
287 |
colored = cv2.rectangle( |
|
|
288 |
colored, |
|
|
289 |
(col * tile_size, row * tile_size), |
|
|
290 |
(col * tile_size + tile_size, row * tile_size + tile_size), |
|
|
291 |
(0, 255, 0), |
|
|
292 |
3, |
|
|
293 |
) |
|
|
294 |
else: |
|
|
295 |
colored = cv2.rectangle( |
|
|
296 |
colored, |
|
|
297 |
(col * tile_size, row * tile_size), |
|
|
298 |
(col * tile_size + tile_size, row * tile_size + tile_size), |
|
|
299 |
(255, 0, 0), |
|
|
300 |
1, |
|
|
301 |
) |
|
|
302 |
|
|
|
303 |
tile_nb += 1 |
|
|
304 |
|
|
|
305 |
if show and self.config["use_tissue_detection"]: |
|
|
306 |
plt.imshow(colored) |
|
|
307 |
plt.title("Tiled image") |
|
|
308 |
plt.show() |
|
|
309 |
|
|
|
310 |
return relevant_tiles_dict |
|
|
311 |
|
|
|
312 |
@staticmethod |
|
|
313 |
def tissue_percentage_over_threshold(label, label_dict, percentage): |
|
|
314 |
if label_dict[label]["type"] == "==": |
|
|
315 |
if label_dict[label]["threshold"] == percentage: |
|
|
316 |
return label, percentage |
|
|
317 |
elif label_dict[label]["type"] == ">=": |
|
|
318 |
if percentage >= label_dict[label]["threshold"]: |
|
|
319 |
return label, percentage |
|
|
320 |
elif label_dict[label]["type"] == ">": |
|
|
321 |
if percentage > label_dict[label]["threshold"]: |
|
|
322 |
return label, percentage |
|
|
323 |
elif label_dict[label]["type"] == "<=": |
|
|
324 |
if percentage <= percentage[label]["threshold"]: |
|
|
325 |
return label, percentage |
|
|
326 |
elif label_dict[label]["type"] == "<": |
|
|
327 |
if percentage < label_dict[label]["threshold"]: |
|
|
328 |
return label, percentage |
|
|
329 |
|
|
|
330 |
return None, None |
|
|
331 |
|
|
|
332 |
@staticmethod |
|
|
333 |
def check_tissue_percentage_over_threshold(label, label_dict, percentage): |
|
|
334 |
if label_dict[label]["type"] == "==": |
|
|
335 |
if label_dict[label]["threshold"] == percentage: |
|
|
336 |
return True |
|
|
337 |
elif label_dict[label]["type"] == ">=": |
|
|
338 |
if percentage >= label_dict[label]["threshold"]: |
|
|
339 |
return True |
|
|
340 |
elif label_dict[label]["type"] == ">": |
|
|
341 |
if percentage > label_dict[label]["threshold"]: |
|
|
342 |
return True |
|
|
343 |
elif label_dict[label]["type"] == "<=": |
|
|
344 |
if percentage <= percentage[label]["threshold"]: |
|
|
345 |
return True |
|
|
346 |
elif label_dict[label]["type"] == "<": |
|
|
347 |
if percentage < label_dict[label]["threshold"]: |
|
|
348 |
return True |
|
|
349 |
return False |
|
|
350 |
|
|
|
351 |
@staticmethod |
|
|
352 |
def get_unique_nonzero_entries(ndarray): |
|
|
353 |
return np.unique(ndarray[np.nonzero(ndarray)]).astype(int) |
|
|
354 |
|
|
|
355 |
def get_possible_labels(self, annotation_mask): |
|
|
356 |
if self.get_unique_nonzero_entries(annotation_mask).size >= 1: |
|
|
357 |
return self.get_unique_nonzero_entries(annotation_mask).tolist() |
|
|
358 |
else: |
|
|
359 |
return [0] # completely unlabeled patch -> only non-tumor |
|
|
360 |
|
|
|
361 |
@staticmethod |
|
|
362 |
def is_non_tumor(label_ids): # non-tumor tissue is unannotated tissue that's left after tissue detection |
|
|
363 |
return label_ids[0] == 0 |
|
|
364 |
|
|
|
365 |
def calculate_label_percentages(self, label_ids, annotation_mask): |
|
|
366 |
if self.is_non_tumor(label_ids): |
|
|
367 |
label_percentages = [(np.count_nonzero(np.max(annotation_mask, axis=(-1)) == 0) / |
|
|
368 |
annotation_mask[:, :, 0].size)] |
|
|
369 |
else: |
|
|
370 |
label_percentages = [] |
|
|
371 |
for label_id in label_ids: |
|
|
372 |
label_percentages.append((np.count_nonzero(annotation_mask[:, :, label_id] == label_id) / |
|
|
373 |
annotation_mask[:, :, label_id].size)) |
|
|
374 |
return label_percentages |
|
|
375 |
|
|
|
376 |
def get_labels_with_enough_tissue_annotated(self, label_dict, annotation_mask): |
|
|
377 |
label_ids = self.get_possible_labels(annotation_mask) |
|
|
378 |
label_percentages = self.calculate_label_percentages(label_ids, annotation_mask) |
|
|
379 |
|
|
|
380 |
labels_with_enough_tissue_including_non_tumor = [] |
|
|
381 |
for (label_id, label_percentage) in zip(label_ids, label_percentages): |
|
|
382 |
label = list(label_dict)[label_id] |
|
|
383 |
if self.check_tissue_percentage_over_threshold(label, label_dict, label_percentage): |
|
|
384 |
labels_with_enough_tissue_including_non_tumor.append(label) |
|
|
385 |
|
|
|
386 |
return labels_with_enough_tissue_including_non_tumor |
|
|
387 |
|
|
|
388 |
def update_overlapping_annotations_file(self, slide_name, verbose): |
|
|
389 |
with open(os.path.join(self.config["output_path"], |
|
|
390 |
"overlapping_annotations_present_in_slides.json"), "r") as file: |
|
|
391 |
overlapping_annotations_present = json.load(file) |
|
|
392 |
if not overlapping_annotations_present[slide_name]: |
|
|
393 |
overlapping_annotations_present[slide_name] = True |
|
|
394 |
with (open(os.path.join(self.config["output_path"], |
|
|
395 |
"overlapping_annotations_present_in_slides.json"), "w") |
|
|
396 |
as file): |
|
|
397 |
json.dump(overlapping_annotations_present, file, indent=4) |
|
|
398 |
if verbose: |
|
|
399 |
print(f"There are overlapping annotations in slide {slide_name}.") |
|
|
400 |
|
|
|
401 |
@staticmethod |
|
|
402 |
def normalize_to_tile_size_px(point, tile_size_px): |
|
|
403 |
if point < 0: |
|
|
404 |
return 0 |
|
|
405 |
elif point >= tile_size_px: |
|
|
406 |
return tile_size_px - 1.0 |
|
|
407 |
else: |
|
|
408 |
return point |
|
|
409 |
|
|
|
410 |
def translate_world_coordinates_to_tile_coordinates(self, point, tile_x, tile_y, tile_size_px): |
|
|
411 |
# the shrinkage of the coordinates to tile size is necessary as cv2.fillPoly only works if the annotation is |
|
|
412 |
# completely within the tile, so I set any points larger than the tile coordinates to the closest (valid) |
|
|
413 |
# tile coordinates |
|
|
414 |
return [self.normalize_to_tile_size_px(point[0] - tile_x, tile_size_px), |
|
|
415 |
self.normalize_to_tile_size_px(point[1] - tile_y, tile_size_px)] |
|
|
416 |
|
|
|
417 |
def extract_calibrated_patches( |
|
|
418 |
self, |
|
|
419 |
tile_dict, |
|
|
420 |
level, |
|
|
421 |
annotations, |
|
|
422 |
label_dict, |
|
|
423 |
overlap=0, |
|
|
424 |
annotation_overlap=0, |
|
|
425 |
slide_name=None, |
|
|
426 |
output_format="png", |
|
|
427 |
): |
|
|
428 |
|
|
|
429 |
scaling_factor = int(self.slide.level_downsamples[level]) |
|
|
430 |
|
|
|
431 |
patch_dict = {} |
|
|
432 |
patch_nb = 0 |
|
|
433 |
for tile_key in tile_dict: |
|
|
434 |
tile_x = tile_dict[tile_key]["x"] * scaling_factor |
|
|
435 |
tile_y = tile_dict[tile_key]["y"] * scaling_factor |
|
|
436 |
|
|
|
437 |
tile_size_px = tile_dict[tile_key]["size"] * scaling_factor |
|
|
438 |
|
|
|
439 |
patch_size_px_x = int(np.round(self.config["calibration"]["patch_size_microns"] / self.res_x)) |
|
|
440 |
patch_size_px_y = int(np.round(self.config["calibration"]["patch_size_microns"] / self.res_y)) |
|
|
441 |
|
|
|
442 |
tile = np.array(self.slide.read_region((tile_x, tile_y), level=0, size=(tile_size_px, tile_size_px))) |
|
|
443 |
tile = tile[:, :, 0:3] |
|
|
444 |
|
|
|
445 |
if tile_dict[tile_key]["annotated"]: |
|
|
446 |
px_overlap_x = int(patch_size_px_x * annotation_overlap) |
|
|
447 |
px_overlap_y = int(patch_size_px_y * annotation_overlap) |
|
|
448 |
|
|
|
449 |
else: |
|
|
450 |
px_overlap_x = int(patch_size_px_x * overlap) |
|
|
451 |
px_overlap_y = int(patch_size_px_y * overlap) |
|
|
452 |
|
|
|
453 |
rows = int(np.ceil(tile_size_px / (patch_size_px_y - px_overlap_y))) |
|
|
454 |
cols = int(np.ceil(tile_size_px / (patch_size_px_x - px_overlap_x))) |
|
|
455 |
|
|
|
456 |
# create annotation mask |
|
|
457 |
if annotations is not None: |
|
|
458 |
# Translate from world coordinates to tile coordinates |
|
|
459 |
tile_annotation_list = [ |
|
|
460 |
[self.translate_world_coordinates_to_tile_coordinates(point, tile_x, tile_y, tile_size_px) |
|
|
461 |
for point in annotations[polygon]["coordinates"]] for polygon in annotations] |
|
|
462 |
|
|
|
463 |
tile_annotation_list = list(zip(tile_annotation_list, [annotations[polygon]["tissue_type"] |
|
|
464 |
for polygon in annotations])) |
|
|
465 |
|
|
|
466 |
# Create mask from polygons |
|
|
467 |
tile_annotation_mask = np.zeros(shape=(tile_size_px, tile_size_px, len(self.config["label_dict"]))) |
|
|
468 |
|
|
|
469 |
annotated_tissue_types = {} |
|
|
470 |
tissue_type_number = 1 |
|
|
471 |
for tissue_type, tissue_details in label_dict.items(): |
|
|
472 |
if tissue_details["annotated"]: |
|
|
473 |
annotated_tissue_types.update({tissue_type: tissue_type_number}) |
|
|
474 |
tissue_type_number += 1 |
|
|
475 |
|
|
|
476 |
for polygon in tile_annotation_list: |
|
|
477 |
# note: the casting to a contiguous array is due to OpenCV requiring C-order (row major) for |
|
|
478 |
# implementation purposes, compare the answer by vvolhejn here |
|
|
479 |
# https://stackoverflow.com/questions/23830618/python-opencv-typeerror-layout-of-the-output-array-incompatible-with-cvmat |
|
|
480 |
# basically: many (all?) copy operations in numpy do this, ascontiguousarray is one of the more |
|
|
481 |
# verbose ones |
|
|
482 |
tile_annotation_mask[:, :, annotated_tissue_types[polygon[1]]] = ( |
|
|
483 |
cv2.fillPoly(np.ascontiguousarray( |
|
|
484 |
tile_annotation_mask[:, :, annotated_tissue_types[polygon[1]]]), |
|
|
485 |
[np.array(polygon[0]).astype(np.int32)], annotated_tissue_types[polygon[1]])) |
|
|
486 |
|
|
|
487 |
stop_y = False |
|
|
488 |
|
|
|
489 |
for row in range(rows): |
|
|
490 |
stop_x = False |
|
|
491 |
|
|
|
492 |
for col in range(cols): |
|
|
493 |
|
|
|
494 |
# Calculate patch coordinates |
|
|
495 |
patch_x = int(col * (patch_size_px_x - px_overlap_x)) |
|
|
496 |
patch_y = int(row * (patch_size_px_y - px_overlap_y)) |
|
|
497 |
|
|
|
498 |
if patch_y + patch_size_px_y >= tile_size_px: |
|
|
499 |
stop_y = True |
|
|
500 |
patch_y = tile_size_px - patch_size_px_y |
|
|
501 |
|
|
|
502 |
if patch_x + patch_size_px_x >= tile_size_px: |
|
|
503 |
stop_x = True |
|
|
504 |
patch_x = tile_size_px - patch_size_px_x |
|
|
505 |
|
|
|
506 |
global_x = patch_x + tile_x |
|
|
507 |
global_y = patch_y + tile_y |
|
|
508 |
|
|
|
509 |
patch = tile[patch_y: patch_y + patch_size_px_y, patch_x: patch_x + patch_size_px_x, :] |
|
|
510 |
|
|
|
511 |
if np.sum(patch) == 0: |
|
|
512 |
break |
|
|
513 |
|
|
|
514 |
# check if the patch is annotated |
|
|
515 |
annotated = False |
|
|
516 |
|
|
|
517 |
if annotations is not None: |
|
|
518 |
patch_mask = tile_annotation_mask[patch_y: patch_y + patch_size_px_y, |
|
|
519 |
patch_x: patch_x + patch_size_px_x, :] |
|
|
520 |
labels = self.get_labels_with_enough_tissue_annotated(label_dict, patch_mask) |
|
|
521 |
if labels is not None: |
|
|
522 |
if len(labels) > 1: |
|
|
523 |
self.update_overlapping_annotations_file( |
|
|
524 |
slide_name, verbose=self.config["overlapping_annotations_verbose"]) |
|
|
525 |
|
|
|
526 |
for label in labels: |
|
|
527 |
# this check is done to ensure that non-tumor tissue (unannotated) is handled properly |
|
|
528 |
if self.config["label_dict"][label]["annotated"]: |
|
|
529 |
annotated = True |
|
|
530 |
|
|
|
531 |
else: |
|
|
532 |
labels = "unlabeled" |
|
|
533 |
|
|
|
534 |
if labels is not None: |
|
|
535 |
if self.annotated_only and annotated or not self.annotated_only: |
|
|
536 |
|
|
|
537 |
file_name = slide_name + "_" + str(global_x) + "_" + str(global_y) + "." + output_format |
|
|
538 |
|
|
|
539 |
if self.config["calibration"]["resize"]: |
|
|
540 |
patch = cv2.resize(patch, (self.config["patch_size"], self.config["patch_size"])) |
|
|
541 |
|
|
|
542 |
patch = Image.fromarray(patch) |
|
|
543 |
for label in labels: |
|
|
544 |
patch.save(os.path.join(self.output_path, label, file_name), format=output_format) |
|
|
545 |
|
|
|
546 |
patch_dict.update( |
|
|
547 |
{ |
|
|
548 |
patch_nb: { |
|
|
549 |
"slide_name": slide_name, |
|
|
550 |
"patch_path": os.path.join(label, file_name), |
|
|
551 |
"label": label, |
|
|
552 |
"x_pos": global_x, |
|
|
553 |
"y_pos": global_y, |
|
|
554 |
"patch_size": patch_size_px_x, |
|
|
555 |
"resized": self.config["calibration"]["resize"], |
|
|
556 |
} |
|
|
557 |
} |
|
|
558 |
) |
|
|
559 |
patch_nb += 1 |
|
|
560 |
if stop_x: |
|
|
561 |
break |
|
|
562 |
if stop_y: |
|
|
563 |
break |
|
|
564 |
|
|
|
565 |
return patch_dict |
|
|
566 |
|
|
|
567 |
def make_dirs(self, output_path, slide_name, label_dict, annotated): |
|
|
568 |
try: |
|
|
569 |
slide_path = os.path.join(output_path, slide_name) |
|
|
570 |
if not os.path.exists(slide_path): |
|
|
571 |
os.makedirs(slide_path) |
|
|
572 |
if not annotated: |
|
|
573 |
unlabeled_path = os.path.join(slide_path, "unlabeled") |
|
|
574 |
if not os.path.exists(unlabeled_path): |
|
|
575 |
os.makedirs(unlabeled_path) |
|
|
576 |
else: |
|
|
577 |
for label in label_dict: |
|
|
578 |
sub_path = os.path.join(slide_path, label) |
|
|
579 |
if not os.path.exists(sub_path): |
|
|
580 |
os.makedirs(sub_path) |
|
|
581 |
for patch in os.listdir(sub_path): |
|
|
582 |
os.remove(os.path.join(sub_path, patch)) |
|
|
583 |
self.output_path = slide_path |
|
|
584 |
|
|
|
585 |
except Exception as e: |
|
|
586 |
self.print_and_log_slide_error(slide_name, e, "make_dirs") |
|
|
587 |
|
|
|
588 |
def extract_patches( |
|
|
589 |
self, |
|
|
590 |
tile_dict, |
|
|
591 |
level, |
|
|
592 |
annotations, |
|
|
593 |
label_dict, |
|
|
594 |
overlap=0, |
|
|
595 |
annotation_overlap=0, |
|
|
596 |
patch_size=256, |
|
|
597 |
slide_name=None, |
|
|
598 |
output_format="png", |
|
|
599 |
): |
|
|
600 |
patch_dict = {} |
|
|
601 |
|
|
|
602 |
scaling_factor = int(self.slide.level_downsamples[level]) |
|
|
603 |
patch_nb = 0 |
|
|
604 |
|
|
|
605 |
for tile_key in tile_dict: |
|
|
606 |
# skip unannotated tiles in case only annotated patches should be saved |
|
|
607 |
if self.annotated_only and not tile_dict[tile_key]["annotated"]: |
|
|
608 |
pass |
|
|
609 |
else: |
|
|
610 |
# ToDo: rows and cols arent calculated correctly, instead a quick fix by using breaks was applied |
|
|
611 |
|
|
|
612 |
tile_x = tile_dict[tile_key]["x"] * scaling_factor |
|
|
613 |
tile_y = tile_dict[tile_key]["y"] * scaling_factor |
|
|
614 |
tile_size = tile_dict[tile_key]["size"] * scaling_factor |
|
|
615 |
tile = np.array(self.slide.read_region((tile_x, tile_y), level=0, size=(tile_size, tile_size))) |
|
|
616 |
tile = tile[:, :, 0:3] |
|
|
617 |
|
|
|
618 |
# overlap separately for annotated and unannotated patches |
|
|
619 |
if tile_dict[tile_key]["annotated"]: |
|
|
620 |
px_overlap = int(patch_size * annotation_overlap) |
|
|
621 |
rows = int(np.ceil(tile_size / (patch_size - px_overlap))) |
|
|
622 |
cols = int(np.ceil(tile_size / (patch_size - px_overlap))) |
|
|
623 |
|
|
|
624 |
else: |
|
|
625 |
px_overlap = int(patch_size * overlap) |
|
|
626 |
rows = int(np.ceil(tile_size / (patch_size - px_overlap))) |
|
|
627 |
cols = int(np.ceil(tile_size / (patch_size - px_overlap))) |
|
|
628 |
|
|
|
629 |
# create annotation mask |
|
|
630 |
if annotations is not None: |
|
|
631 |
# Translate from world coordinates to tile coordinates |
|
|
632 |
tile_annotation_list = [ |
|
|
633 |
[self.translate_world_coordinates_to_tile_coordinates(point, tile_x, tile_y, tile_size) |
|
|
634 |
for point in annotations[polygon]["coordinates"]] for polygon in annotations] |
|
|
635 |
|
|
|
636 |
tile_annotation_list = list(zip(tile_annotation_list, [annotations[polygon]["tissue_type"] |
|
|
637 |
for polygon in annotations])) |
|
|
638 |
|
|
|
639 |
# Create mask from polygons |
|
|
640 |
tile_annotation_mask = np.zeros(shape=(tile_size, tile_size, len(self.config["label_dict"]))) |
|
|
641 |
|
|
|
642 |
annotated_tissue_types = {} |
|
|
643 |
tissue_type_number = 1 |
|
|
644 |
for tissue_type, tissue_details in label_dict.items(): |
|
|
645 |
if tissue_details["annotated"]: |
|
|
646 |
annotated_tissue_types.update({tissue_type: tissue_type_number}) |
|
|
647 |
tissue_type_number += 1 |
|
|
648 |
|
|
|
649 |
for polygon in tile_annotation_list: |
|
|
650 |
# note: the casting to a contiguous array is due to OpenCV requiring C-order (row major) for |
|
|
651 |
# implementation purposes, compare the answer by vvolhejn here |
|
|
652 |
# https://stackoverflow.com/questions/23830618/python-opencv-typeerror-layout-of-the-output-array-incompatible-with-cvmat |
|
|
653 |
# basically: many (all?) copy operations in numpy do this, ascontiguousarray is one of the more |
|
|
654 |
# verbose ones |
|
|
655 |
tile_annotation_mask[:, :, annotated_tissue_types[polygon[1]]] = ( |
|
|
656 |
cv2.fillPoly(np.ascontiguousarray( |
|
|
657 |
tile_annotation_mask[:, :, annotated_tissue_types[polygon[1]]]), |
|
|
658 |
[np.array(polygon[0]).astype(np.int32)], annotated_tissue_types[polygon[1]])) |
|
|
659 |
|
|
|
660 |
stop_y = False |
|
|
661 |
|
|
|
662 |
for row in range(rows): |
|
|
663 |
stop_x = False |
|
|
664 |
|
|
|
665 |
for col in range(cols): |
|
|
666 |
|
|
|
667 |
# Calculate patch coordinates |
|
|
668 |
patch_x = int(col * (patch_size - px_overlap)) |
|
|
669 |
patch_y = int(row * (patch_size - px_overlap)) |
|
|
670 |
|
|
|
671 |
if patch_y + patch_size >= tile_size: |
|
|
672 |
stop_y = True |
|
|
673 |
patch_y = tile_size - patch_size |
|
|
674 |
|
|
|
675 |
if patch_x + patch_size >= tile_size: |
|
|
676 |
stop_x = True |
|
|
677 |
patch_x = tile_size - patch_size |
|
|
678 |
|
|
|
679 |
global_x = patch_x + tile_x |
|
|
680 |
global_y = patch_y + tile_y |
|
|
681 |
|
|
|
682 |
patch = tile[patch_y: patch_y + patch_size, patch_x: patch_x + patch_size, :] |
|
|
683 |
|
|
|
684 |
if np.sum(patch) == 0: |
|
|
685 |
break |
|
|
686 |
|
|
|
687 |
# check if the patch is annotated |
|
|
688 |
annotated = False |
|
|
689 |
if annotations is not None: |
|
|
690 |
patch_mask = tile_annotation_mask[ |
|
|
691 |
patch_y: patch_y + patch_size, patch_x: patch_x + patch_size |
|
|
692 |
] |
|
|
693 |
|
|
|
694 |
labels = self.get_labels_with_enough_tissue_annotated(label_dict, patch_mask) |
|
|
695 |
if labels is not None: |
|
|
696 |
if len(labels) > 1: |
|
|
697 |
self.update_overlapping_annotations_file( |
|
|
698 |
slide_name, verbose=self.config["overlapping_annotations_verbose"]) |
|
|
699 |
|
|
|
700 |
for label in labels: |
|
|
701 |
# this check is done to ensure that non-tumor tissue (unannotated) is handled |
|
|
702 |
# properly |
|
|
703 |
if self.config["label_dict"][label]["annotated"]: |
|
|
704 |
annotated = True |
|
|
705 |
|
|
|
706 |
else: |
|
|
707 |
labels = "unlabeled" |
|
|
708 |
|
|
|
709 |
if labels is not None: |
|
|
710 |
if self.annotated_only and annotated or not self.annotated_only: |
|
|
711 |
if slide_name is not None: |
|
|
712 |
|
|
|
713 |
file_name = ( |
|
|
714 |
slide_name + "_" + str(global_x) + "_" + str(global_y) + "." + output_format |
|
|
715 |
) |
|
|
716 |
else: |
|
|
717 |
file_name = ( |
|
|
718 |
str(patch_nb) + "_" + str(global_x) + "_" + str(global_y) + "." + |
|
|
719 |
output_format |
|
|
720 |
) |
|
|
721 |
|
|
|
722 |
patch = Image.fromarray(patch) |
|
|
723 |
|
|
|
724 |
for label in labels: |
|
|
725 |
patch.save(os.path.join(self.output_path, label, file_name), format=output_format) |
|
|
726 |
|
|
|
727 |
patch_dict.update( |
|
|
728 |
{ |
|
|
729 |
patch_nb: { |
|
|
730 |
"slide_name": slide_name, |
|
|
731 |
"patch_path": os.path.join(label, file_name), |
|
|
732 |
"label": label, |
|
|
733 |
"x_pos": global_x, |
|
|
734 |
"y_pos": global_y, |
|
|
735 |
"patch_size": patch_size, |
|
|
736 |
} |
|
|
737 |
} |
|
|
738 |
) |
|
|
739 |
patch_nb += 1 |
|
|
740 |
if stop_x: |
|
|
741 |
break |
|
|
742 |
if stop_y: |
|
|
743 |
break |
|
|
744 |
|
|
|
745 |
return patch_dict |
|
|
746 |
|
|
|
747 |
def export_dict(self, dictionary, metadata_format, filename): |
|
|
748 |
|
|
|
749 |
if metadata_format == "json": |
|
|
750 |
file = os.path.join(self.output_path, filename + ".json") |
|
|
751 |
with open(file, "w") as json_file: |
|
|
752 |
json.dump(dictionary, json_file, indent=4) |
|
|
753 |
elif metadata_format == "csv": |
|
|
754 |
df = pd.DataFrame(dictionary.values()) |
|
|
755 |
file = os.path.join(self.output_path, filename + ".csv") |
|
|
756 |
df.to_csv(file, index=False) |
|
|
757 |
else: |
|
|
758 |
print("Could not write metadata. Metadata format has to be json or csv") |
|
|
759 |
|
|
|
760 |
def save_thumbnail(self, mask, slide_name, level, output_format="png"): |
|
|
761 |
|
|
|
762 |
remap_color = ((0, 0, 0), (255, 255, 255)) |
|
|
763 |
|
|
|
764 |
process_level = level |
|
|
765 |
img = self.slide.read_region([0, 0], process_level, self.slide.level_dimensions[process_level]) |
|
|
766 |
|
|
|
767 |
# Remove Alpha |
|
|
768 |
img = np.array(img)[:, :, 0:3] |
|
|
769 |
|
|
|
770 |
if remap_color is not None: |
|
|
771 |
indizes = np.all(img == remap_color[0], axis=2) |
|
|
772 |
img[indizes] = remap_color[1] |
|
|
773 |
|
|
|
774 |
copy_img = img[mask.astype(bool), :] |
|
|
775 |
|
|
|
776 |
median_filtered_img = cv2.medianBlur(img, 11) |
|
|
777 |
median_filtered_img[mask.astype(bool)] = copy_img |
|
|
778 |
|
|
|
779 |
img = median_filtered_img |
|
|
780 |
|
|
|
781 |
file_name = os.path.join(self.config["output_path"], slide_name, "thumbnail." + output_format) |
|
|
782 |
plt.imsave(file_name, img, format=output_format) |
|
|
783 |
|
|
|
784 |
def init_generic_tiff(self): |
|
|
785 |
|
|
|
786 |
unit_dict = {"milimeter": 1000, "centimeter": 10000, "meter": 1000000} |
|
|
787 |
self.scanner = "generic-tiff" |
|
|
788 |
|
|
|
789 |
assert self.slide.properties["tiff.ResolutionUnit"] in unit_dict.keys(), ( |
|
|
790 |
"Unknown unit " + self.slide.properties["tiff.ResolutionUnit"] |
|
|
791 |
) |
|
|
792 |
|
|
|
793 |
factor = unit_dict[self.slide.properties["tiff.ResolutionUnit"]] |
|
|
794 |
|
|
|
795 |
# convert to mpp |
|
|
796 |
self.res_x = factor / float(self.slide.properties["tiff.XResolution"]) |
|
|
797 |
self.res_y = factor / float(self.slide.properties["tiff.YResolution"]) |
|
|
798 |
|
|
|
799 |
def init_aperio(self): |
|
|
800 |
self.scanner = "aperio" |
|
|
801 |
|
|
|
802 |
self.res_x = float(self.slide.properties["openslide.mpp-x"]) |
|
|
803 |
self.res_y = float(self.slide.properties["openslide.mpp-y"]) |
|
|
804 |
|
|
|
805 |
def init_mirax(self): |
|
|
806 |
self.scanner = "mirax" |
|
|
807 |
self.res_x = float(self.slide.properties["openslide.mpp-x"]) |
|
|
808 |
self.res_y = float(self.slide.properties["openslide.mpp-y"]) |
|
|
809 |
|
|
|
810 |
def init_unknown(self): |
|
|
811 |
try: |
|
|
812 |
self.scanner = self.slide.properties["openslide.vendor"] |
|
|
813 |
self.res_x = float(self.slide.properties["openslide.mpp-x"]) |
|
|
814 |
self.res_y = float(self.slide.properties["openslide.mpp-y"]) |
|
|
815 |
except Exception as e: |
|
|
816 |
print(e) |
|
|
817 |
|
|
|
818 |
def init_patch_calibration(self): |
|
|
819 |
|
|
|
820 |
# check scanner type |
|
|
821 |
if self.slide.properties["openslide.vendor"] == "aperio": |
|
|
822 |
self.init_aperio() |
|
|
823 |
elif self.slide.properties["openslide.vendor"] == "generic-tiff": |
|
|
824 |
self.init_generic_tiff() |
|
|
825 |
elif self.slide.properties["openslide.vendor"] == "mirax": |
|
|
826 |
self.init_mirax() |
|
|
827 |
else: |
|
|
828 |
self.init_unknown() |
|
|
829 |
# future vendors |
|
|
830 |
# elif ... |
|
|
831 |
|
|
|
832 |
assert self.scanner, "Not integrated scanner type, aborting" |
|
|
833 |
|
|
|
834 |
def process_slide(self, slide): |
|
|
835 |
slide_name = os.path.basename(slide) |
|
|
836 |
slide_name = os.path.splitext(slide_name)[0] |
|
|
837 |
|
|
|
838 |
print("Processing", slide_name, "process id is", os.getpid()) |
|
|
839 |
|
|
|
840 |
try: |
|
|
841 |
annotation_path = os.path.join( |
|
|
842 |
self.config["annotation_dir"], slide_name + "." + self.config["annotation_file_format"] |
|
|
843 |
) |
|
|
844 |
if os.path.exists(annotation_path): |
|
|
845 |
|
|
|
846 |
annotated = True |
|
|
847 |
self.annotation_dict = self.load_annotation(annotation_path) |
|
|
848 |
else: |
|
|
849 |
annotated = False |
|
|
850 |
self.annotation_dict = None |
|
|
851 |
|
|
|
852 |
except Exception as e: |
|
|
853 |
self.print_and_log_slide_error(slide_name, e, "process_slide - load_annotations") |
|
|
854 |
return 0 |
|
|
855 |
|
|
|
856 |
self.make_dirs( |
|
|
857 |
output_path=self.config["output_path"], |
|
|
858 |
slide_name=slide_name, |
|
|
859 |
label_dict=self.config["label_dict"], |
|
|
860 |
annotated=annotated, |
|
|
861 |
) |
|
|
862 |
|
|
|
863 |
slide_path = os.path.join(self.config["slides_dir"], slide) |
|
|
864 |
try: |
|
|
865 |
level = self.load_slide(slide_path) |
|
|
866 |
except Exception as e: |
|
|
867 |
self.print_and_log_slide_error(slide_name, e, "load_slide") |
|
|
868 |
return 0 |
|
|
869 |
|
|
|
870 |
if self.config["calibration"]["use_non_pixel_lengths"]: |
|
|
871 |
try: |
|
|
872 |
self.init_patch_calibration() |
|
|
873 |
except Exception as e: |
|
|
874 |
self.print_and_log_slide_error(slide_name, e, "init_patch_calibration") |
|
|
875 |
return 0 |
|
|
876 |
|
|
|
877 |
if self.config["use_tissue_detection"]: |
|
|
878 |
mask, level = self.apply_tissue_detection(level=level, show=self.config["show_mode"]) |
|
|
879 |
else: |
|
|
880 |
mask = np.ones(shape=self.slide.level_dimensions[level]).transpose() |
|
|
881 |
try: |
|
|
882 |
tile_size = self.determine_tile_size(level) |
|
|
883 |
except Exception as e: |
|
|
884 |
self.print_and_log_slide_error(slide_name, e, "determine_tile_size") |
|
|
885 |
return 0 |
|
|
886 |
try: |
|
|
887 |
tile_dict = self.get_relevant_tiles( |
|
|
888 |
mask, |
|
|
889 |
tile_size=tile_size, |
|
|
890 |
min_coverage=self.config["tissue_coverage"], |
|
|
891 |
level=level, |
|
|
892 |
show=self.config["show_mode"], |
|
|
893 |
) |
|
|
894 |
except Exception as e: |
|
|
895 |
self.print_and_log_slide_error(slide_name, e, "get_relevant_tiles") |
|
|
896 |
return 0 |
|
|
897 |
|
|
|
898 |
# Calibrated or non calibrated patch sizes |
|
|
899 |
if self.config["calibration"]["use_non_pixel_lengths"]: |
|
|
900 |
try: |
|
|
901 |
patch_dict = self.extract_calibrated_patches( |
|
|
902 |
tile_dict, |
|
|
903 |
level, |
|
|
904 |
self.annotation_dict, |
|
|
905 |
self.config["label_dict"], |
|
|
906 |
overlap=self.config["overlap"], |
|
|
907 |
annotation_overlap=self.config["annotation_overlap"], |
|
|
908 |
slide_name=slide_name, |
|
|
909 |
output_format=self.config["output_format"] |
|
|
910 |
) |
|
|
911 |
except Exception as e: |
|
|
912 |
self.print_and_log_slide_error(slide_name, e, "extract_calibrated_patches") |
|
|
913 |
return 0 |
|
|
914 |
else: |
|
|
915 |
try: |
|
|
916 |
patch_dict = self.extract_patches( |
|
|
917 |
tile_dict, |
|
|
918 |
level, |
|
|
919 |
self.annotation_dict, |
|
|
920 |
self.config["label_dict"], |
|
|
921 |
overlap=self.config["overlap"], |
|
|
922 |
annotation_overlap=self.config["annotation_overlap"], |
|
|
923 |
patch_size=self.config["patch_size"], |
|
|
924 |
slide_name=slide_name, |
|
|
925 |
output_format=self.config["output_format"], |
|
|
926 |
) |
|
|
927 |
except Exception as e: |
|
|
928 |
self.print_and_log_slide_error(slide_name, e, "extract_patches") |
|
|
929 |
return 0 |
|
|
930 |
|
|
|
931 |
self.export_dict(patch_dict, self.config["metadata_format"], "tile_information") |
|
|
932 |
try: |
|
|
933 |
self.save_thumbnail(mask, level=level, slide_name=slide_name, |
|
|
934 |
output_format=self.config["output_format"]) |
|
|
935 |
print("Finished slide ", slide_name) |
|
|
936 |
|
|
|
937 |
except Exception as e: |
|
|
938 |
self.print_and_log_slide_error(slide_name, e, "save_thumbnail") |
|
|
939 |
return 0 |
|
|
940 |
|
|
|
941 |
@staticmethod |
|
|
942 |
def read_slide_file(slide_file_path, ext_list): |
|
|
943 |
|
|
|
944 |
slide_list = [] |
|
|
945 |
|
|
|
946 |
with open(slide_file_path) as file: |
|
|
947 |
lines = file.read().splitlines() |
|
|
948 |
|
|
|
949 |
for line in lines: |
|
|
950 |
if os.path.isdir(line): |
|
|
951 |
for ext in ext_list: |
|
|
952 |
for file in Path(line).resolve().glob("**/*" + ext): |
|
|
953 |
slide = str(file) |
|
|
954 |
else: |
|
|
955 |
slide = line |
|
|
956 |
|
|
|
957 |
slide_list.append(slide) |
|
|
958 |
|
|
|
959 |
return slide_list |
|
|
960 |
|
|
|
961 |
@staticmethod |
|
|
962 |
def init(l): |
|
|
963 |
global lock |
|
|
964 |
lock = l |
|
|
965 |
|
|
|
966 |
@staticmethod |
|
|
967 |
def get_slide_name_from_slide_path(slide_path): |
|
|
968 |
return os.path.splitext(os.path.basename(slide_path))[0] |
|
|
969 |
|
|
|
970 |
def slides2patches(self): |
|
|
971 |
|
|
|
972 |
l = multiprocessing.Lock() |
|
|
973 |
|
|
|
974 |
extensions = [".tif", ".svs", ".mrxs"] |
|
|
975 |
slide_list = [] |
|
|
976 |
|
|
|
977 |
if self.config["slides_file"] is not None: |
|
|
978 |
print("Using slide file: " + self.config["slides_file"]) |
|
|
979 |
slide_list = self.read_slide_file(self.config["slides_file"], extensions) |
|
|
980 |
else: |
|
|
981 |
for extension in extensions: |
|
|
982 |
for file in Path(self.config["slides_dir"]).resolve().glob("**/*" + extension): |
|
|
983 |
slide_list.append(file) |
|
|
984 |
|
|
|
985 |
self.annotation_list = [] |
|
|
986 |
if os.path.exists(self.config["annotation_dir"]): |
|
|
987 |
annotation_list = os.listdir(self.config["annotation_dir"]) |
|
|
988 |
self.annotation_list = [os.path.splitext(annotation)[0] for annotation in annotation_list] |
|
|
989 |
|
|
|
990 |
missing_annotations = [] |
|
|
991 |
annotated_slides = [ |
|
|
992 |
name |
|
|
993 |
if os.path.splitext(os.path.basename(name))[0] in self.annotation_list |
|
|
994 |
else missing_annotations.append(os.path.splitext(os.path.basename(name))[0]) |
|
|
995 |
for name in slide_list |
|
|
996 |
] |
|
|
997 |
annotated_slides = list(filter(lambda slide: True if slide is not None else False, annotated_slides)) |
|
|
998 |
|
|
|
999 |
print("###############################################") |
|
|
1000 |
print("Found", len(annotated_slides), "annotated slides") |
|
|
1001 |
print("###############################################") |
|
|
1002 |
print("Found", len(missing_annotations), "unannotated slides") |
|
|
1003 |
print("###############################################") |
|
|
1004 |
if not self.config["use_tissue_detection"]: |
|
|
1005 |
print("Tissue detection deactivated") |
|
|
1006 |
print("###############################################") |
|
|
1007 |
|
|
|
1008 |
if self.config["skip_unlabeled_slides"]: |
|
|
1009 |
slide_list = annotated_slides |
|
|
1010 |
print("Processing annotated slides only") |
|
|
1011 |
|
|
|
1012 |
if not os.path.exists(self.config["output_path"]): |
|
|
1013 |
os.makedirs(self.config["output_path"]) |
|
|
1014 |
|
|
|
1015 |
with open(os.path.join(self.config["output_path"], "error_log.txt"), "w"): |
|
|
1016 |
pass |
|
|
1017 |
|
|
|
1018 |
if not len(slide_list) == 0: |
|
|
1019 |
slide_list = sorted(slide_list) |
|
|
1020 |
|
|
|
1021 |
# writing this to file to work somewhat elegantly around multiprocessing without much restructuring of the |
|
|
1022 |
# existing code |
|
|
1023 |
with (open(os.path.join(self.config["output_path"], "overlapping_annotations_present_in_slides.json"), "w") |
|
|
1024 |
as file): |
|
|
1025 |
json.dump(dict.fromkeys(list(map(self.get_slide_name_from_slide_path, slide_list)), False), |
|
|
1026 |
file, indent=4) |
|
|
1027 |
|
|
|
1028 |
if _MULTIPROCESS: |
|
|
1029 |
available_threads = multiprocessing.cpu_count() - self.config["blocked_threads"] |
|
|
1030 |
pool = multiprocessing.Pool(processes=available_threads, initializer=self.init, initargs=(l,)) |
|
|
1031 |
pool.map(self.process_slide, slide_list) |
|
|
1032 |
|
|
|
1033 |
else: |
|
|
1034 |
for slide in slide_list: |
|
|
1035 |
self.process_slide(slide) |
|
|
1036 |
|
|
|
1037 |
slide_dict = {} |
|
|
1038 |
|
|
|
1039 |
if len(annotated_slides) == 0: |
|
|
1040 |
for i in range(len(slide_list)): |
|
|
1041 |
slide = slide_list[i] |
|
|
1042 |
slide_name = self.get_slide_name_from_slide_path(slide) |
|
|
1043 |
slide_dict.update({i: {"slide_name": slide_name, |
|
|
1044 |
"slide_path": slide, |
|
|
1045 |
} |
|
|
1046 |
}) |
|
|
1047 |
self.output_path = self.config["output_path"] |
|
|
1048 |
self.export_dict(slide_dict, self.config["metadata_format"], "slide_information") |
|
|
1049 |
else: |
|
|
1050 |
# Save label proportion per slide |
|
|
1051 |
labels = list(self.config["label_dict"].keys()) |
|
|
1052 |
|
|
|
1053 |
with (open(os.path.join(self.config["output_path"], |
|
|
1054 |
"overlapping_annotations_present_in_slides.json"), "r") as file): |
|
|
1055 |
overlapping_annotations_present_in_slide = json.load(file) |
|
|
1056 |
for i in range(len(annotated_slides)): |
|
|
1057 |
slide = slide_list[i] |
|
|
1058 |
slide_name = self.get_slide_name_from_slide_path(slide) |
|
|
1059 |
slide_path = os.path.join(self.config["output_path"], slide_name) |
|
|
1060 |
|
|
|
1061 |
n_labeled_tiles = 0 |
|
|
1062 |
n_labels = {} |
|
|
1063 |
for label in labels: |
|
|
1064 |
n_label = len(os.listdir(os.path.join(slide_path, label))) |
|
|
1065 |
n_labels.update({label: n_label}) |
|
|
1066 |
n_labeled_tiles += n_label |
|
|
1067 |
|
|
|
1068 |
slide_dict_entry = {} |
|
|
1069 |
slide_dict_entry.update({"slide_name": slide_name, |
|
|
1070 |
"slide_contains_overlapping_annotations": |
|
|
1071 |
overlapping_annotations_present_in_slide[slide_name]}) |
|
|
1072 |
fracs = {} |
|
|
1073 |
for label, n_label in n_labels.items(): |
|
|
1074 |
slide_dict_entry.update({label: n_label}) |
|
|
1075 |
fracs.update({label: n_label / n_labeled_tiles * 100}) |
|
|
1076 |
slide_dict_entry.update({"total": n_labeled_tiles, "frac": fracs}) |
|
|
1077 |
slide_dict.update({i: slide_dict_entry}) |
|
|
1078 |
|
|
|
1079 |
self.output_path = self.config["output_path"] |
|
|
1080 |
self.export_dict(slide_dict, self.config["metadata_format"], "slide_information") |
|
|
1081 |
|
|
|
1082 |
# Save used config file |
|
|
1083 |
file = os.path.join(self.config["output_path"], "config.json") |
|
|
1084 |
with open(file, "w") as json_file: |
|
|
1085 |
json.dump(self.config, json_file, indent=4) |
|
|
1086 |
|
|
|
1087 |
print("Finished tiling process!") |
|
|
1088 |
|
|
|
1089 |
else: |
|
|
1090 |
print("###############################################") |
|
|
1091 |
print("WARNING: No slides processed!") |
|
|
1092 |
print("###############################################") |
|
|
1093 |
|
|
|
1094 |
|
|
|
1095 |
if __name__ == "__main__": |
|
|
1096 |
parser = ArgumentParser() |
|
|
1097 |
parser.add_argument("--config_path", default=script_dir + "/resources/config.json") |
|
|
1098 |
args = parser.parse_args() |
|
|
1099 |
|
|
|
1100 |
slide_handler = WSIHandler(config_path=args.config_path) |
|
|
1101 |
slide_handler.slides2patches() |