[78ef36]: / docs / slide_processing / index.html

Download this file

695 lines (538 with data), 56.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
<meta charset="utf-8">
<meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Slide Processing &mdash; slideflow 3.0.0 documentation</title>
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
<link rel="index" title="Index" href="../genindex/" />
<link rel="search" title="Search" href="../search/" />
<link rel="next" title="Training" href="../training/" />
<link rel="prev" title="Datasets" href="../datasets_and_val/" />
<script src="../_static/js/modernizr.min.js"></script>
<!-- Preload the theme fonts -->
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<!-- Preload the katex fonts -->
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
<script defer data-domain="slideflow.dev" src="https://plausible.io/js/script.js"></script>
</head>
<div class="container-fluid header-holder tutorials-header" id="header-holder">
<div class="container">
<div class="header-container">
<a class="header-logo" href="https://slideflow.dev" aria-label="Slideflow"></a>
<div class="main-menu">
<ul>
<li class="active">
<a href="https://slideflow.dev">Docs</a>
</li>
<li>
<a href="https://slideflow.dev/tutorial1/">Tutorials</a>
</li>
<li>
<a href="https://github.com/slideflow/slideflow">GitHub</a>
</li>
</ul>
</div>
<a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
</div>
</div>
</div>
<body class="pytorch-body">
<div class="table-of-contents-link-wrapper">
<span>Table of Contents</span>
<a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
</div>
<nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
<div class="pytorch-side-scroll">
<div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
<div class="pytorch-left-menu-search">
<div class="version">
3.0
</div>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="../search/" method="get">
<input type="text" name="q" placeholder="Search Docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div>
<p class="caption" role="heading"><span class="caption-text">Introduction</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../installation/">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../overview/">Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../quickstart/">Quickstart</a></li>
<li class="toctree-l1"><a class="reference internal" href="../project_setup/">Setting up a Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../datasets_and_val/">Datasets</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Slide Processing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../training/">Training</a></li>
<li class="toctree-l1"><a class="reference internal" href="../evaluation/">Evaluation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../posthoc/">Layer Activations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../uq/">Uncertainty Quantification</a></li>
<li class="toctree-l1"><a class="reference internal" href="../features/">Generating Features</a></li>
<li class="toctree-l1"><a class="reference internal" href="../mil/">Multiple-Instance Learning (MIL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../ssl/">Self-Supervised Learning (SSL)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stylegan/">Generative Networks (GANs)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../saliency/">Saliency Maps</a></li>
<li class="toctree-l1"><a class="reference internal" href="../segmentation/">Tissue Segmentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../cellseg/">Cell Segmentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../custom_loops/">Custom Training Loops</a></li>
<li class="toctree-l1"><a class="reference internal" href="../studio/">Slideflow Studio: Live Visualization</a></li>
<li class="toctree-l1"><a class="reference internal" href="../troubleshooting/">Troubleshooting</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Developer Notes</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../tfrecords/">TFRecords: Reading and Writing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dataloaders/">Dataloaders: Sampling and Augmentation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../custom_extractors/">Custom Feature Extractors</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tile_labels/">Strong Supervision with Tile Labels</a></li>
<li class="toctree-l1"><a class="reference internal" href="../plugins/">Creating a Slideflow Plugin</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">API</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../slideflow/">slideflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../project/">slideflow.Project</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dataset/">slideflow.Dataset</a></li>
<li class="toctree-l1"><a class="reference internal" href="../dataset_features/">slideflow.DatasetFeatures</a></li>
<li class="toctree-l1"><a class="reference internal" href="../heatmap/">slideflow.Heatmap</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_params/">slideflow.ModelParams</a></li>
<li class="toctree-l1"><a class="reference internal" href="../mosaic/">slideflow.Mosaic</a></li>
<li class="toctree-l1"><a class="reference internal" href="../slidemap/">slideflow.SlideMap</a></li>
<li class="toctree-l1"><a class="reference internal" href="../biscuit/">slideflow.biscuit</a></li>
<li class="toctree-l1"><a class="reference internal" href="../slideflow_cellseg/">slideflow.cellseg</a></li>
<li class="toctree-l1"><a class="reference internal" href="../io/">slideflow.io</a></li>
<li class="toctree-l1"><a class="reference internal" href="../io_tensorflow/">slideflow.io.tensorflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../io_torch/">slideflow.io.torch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../gan/">slideflow.gan</a></li>
<li class="toctree-l1"><a class="reference internal" href="../grad/">slideflow.grad</a></li>
<li class="toctree-l1"><a class="reference internal" href="../mil_module/">slideflow.mil</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model/">slideflow.model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_tensorflow/">slideflow.model.tensorflow</a></li>
<li class="toctree-l1"><a class="reference internal" href="../model_torch/">slideflow.model.torch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../norm/">slideflow.norm</a></li>
<li class="toctree-l1"><a class="reference internal" href="../simclr/">slideflow.simclr</a></li>
<li class="toctree-l1"><a class="reference internal" href="../slide/">slideflow.slide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../slide_qc/">slideflow.slide.qc</a></li>
<li class="toctree-l1"><a class="reference internal" href="../stats/">slideflow.stats</a></li>
<li class="toctree-l1"><a class="reference internal" href="../util/">slideflow.util</a></li>
<li class="toctree-l1"><a class="reference internal" href="../studio_module/">slideflow.studio</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../tutorial1/">Tutorial 1: Model training (simple)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial2/">Tutorial 2: Model training (advanced)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial3/">Tutorial 3: Using a custom architecture</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial4/">Tutorial 4: Model evaluation &amp; heatmaps</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial5/">Tutorial 5: Creating a mosaic map</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial6/">Tutorial 6: Custom slide filtering</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial7/">Tutorial 7: Training with custom augmentations</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorial8/">Tutorial 8: Multiple-Instance Learning</a></li>
</ul>
</div>
</div>
</nav>
<div class="pytorch-container">
<div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
<div class="pytorch-breadcrumbs-wrapper">
<div role="navigation" aria-label="breadcrumbs navigation">
<ul class="pytorch-breadcrumbs">
<li>
<a href="../">
Docs
</a> &gt;
</li>
<li>Slide Processing</li>
<li class="pytorch-breadcrumbs-aside">
<a href="../_sources/slide_processing.rst.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>
</li>
</ul>
</div>
</div>
<div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
Shortcuts
</div>
</div>
<section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
<div class="pytorch-content-left">
<div class="rst-content">
<div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
<article itemprop="articleBody" id="pytorch-article" class="pytorch-article">
<section id="slide-processing">
<span id="filtering"></span><h1>Slide Processing<a class="headerlink" href="#slide-processing" title="Permalink to this heading"></a></h1>
<img alt="../_images/tile_extraction_overview.png" src="../_images/tile_extraction_overview.png" />
<div class="line-block">
<div class="line"><br /></div>
</div>
<p>Whole-slide histopathological images present many challenges for machine learning researchers, as these large gigapixel images may contain out-of-focus regions, pen marks, uneven staining, or varying optical resolutions. Slideflow provides tools for both flexible and computationally efficient slide processing in order to build datasets ready for machine learning applications.</p>
<p>Most tools in Slideflow work with image tiles - extracted sub-regions of a whole-slide image - as the primary data source. For efficiency, image tiles are first buffered into <a class="reference internal" href="../tfrecords/#tfrecords"><span class="std std-ref">TFRecords</span></a> , a binary file format that greatly improves IO throughput. Although training can be performed without using TFRecords (see <a class="reference internal" href="../training/#from-wsi"><span class="std std-ref">Training without TFRecords</span></a>), we recommend tile extraction as the first step for most projects.</p>
<section id="tile-extraction">
<h2>Tile extraction<a class="headerlink" href="#tile-extraction" title="Permalink to this heading"></a></h2>
<p>Image tiles are extracted from whole-slide images using either <a class="reference internal" href="../project/#slideflow.Project.extract_tiles" title="slideflow.Project.extract_tiles"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.Project.extract_tiles()</span></code></a> or <a class="reference internal" href="../dataset/#slideflow.Dataset.extract_tiles" title="slideflow.Dataset.extract_tiles"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.Dataset.extract_tiles()</span></code></a>. When using the Project interface, the only arguments required are <code class="docutils literal notranslate"><span class="pre">tile_px</span></code> and <code class="docutils literal notranslate"><span class="pre">tile_um</span></code>, which determine the size of the extracted image tiles in pixels and microns:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">tile_px</span><span class="o">=</span><span class="mi">299</span><span class="p">,</span> <span class="n">tile_um</span><span class="o">=</span><span class="mi">302</span><span class="p">)</span>
</pre></div>
</div>
<p>and when using a <a class="reference internal" href="../dataset/#slideflow.Dataset" title="slideflow.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">slideflow.Dataset</span></code></a>, no arguments are required.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dataset</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">()</span>
</pre></div>
</div>
<p>Tiles will be extracted at the specified pixel and micron size and stored in TFRecord format. Loose image tiles (*.jpg or *.png format) can also be saved with the argument <code class="docutils literal notranslate"><span class="pre">save_tiles=True</span></code>.</p>
<p>See the <a class="reference internal" href="../dataset/#slideflow.Dataset.extract_tiles" title="slideflow.Dataset.extract_tiles"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.Dataset.extract_tiles()</span></code></a> API documentation for customization options.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Slide scanners may have differing microns-per-pixel (MPP) resolutions, so “10X” magnification from one scanner may be slightly different than “10X” on another scanner. Specifying a fixed <code class="docutils literal notranslate"><span class="pre">tile_um</span></code> ensures all image tiles have both the same pixel size and micron size. This MPP-harmonization step uses the <a class="reference external" href="https://www.libvips.org/API/current/libvips-resample.html#vips-resize">Libvips resize</a> function on extracted images. To disable this step and instead extract tiles at a given <a class="reference external" href="https://dicom.nema.org/dicom/dicomwsi/">downsample layer</a>, set <code class="docutils literal notranslate"><span class="pre">tile_um</span></code> equal to a magnification level rather than micron size:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">tile_px</span><span class="o">=</span><span class="mi">299</span><span class="p">,</span> <span class="n">tile_um</span><span class="o">=</span><span class="s2">&quot;10x&quot;</span><span class="p">)</span>
</pre></div>
</div>
</div>
</section>
<section id="cell-segmentation">
<h2>Cell segmentation<a class="headerlink" href="#cell-segmentation" title="Permalink to this heading"></a></h2>
<p>An alternative to extracting tiles in a grid across whole-slide images is extracting tiles at detected cell centroids. This is discussed separately in <a class="reference internal" href="../cellseg/#cellseg"><span class="std std-ref">Cell Segmentation</span></a>.</p>
</section>
<section id="regions-of-interest">
<span id="id1"></span><h2>Regions of Interest<a class="headerlink" href="#regions-of-interest" title="Permalink to this heading"></a></h2>
<p>Tile extraction can be optionally restricted based on pathologist-annotated Regions of Interest (ROI), allowing you to enrich your dataset by only using relevant sections of a slide.</p>
<p>We offer two methods for annotating ROIs - <a class="reference internal" href="../studio/#studio-roi"><span class="std std-ref">Slideflow Studio</span></a> and <a class="reference external" href="https://qupath.github.io/">QuPath</a>. Please see the Slideflow Studio section for instructions on generating ROI annotations using the Slideflow interface.</p>
<p>If you are using QuPath, annotate whole-slide images using the Polygon tool. Then, click <strong>Automate</strong> -&gt; <strong>Show script editor</strong>. In the box that comes up, click <strong>File</strong> -&gt; <strong>Open</strong> and load the <code class="docutils literal notranslate"><span class="pre">qupath_roi.groovy</span></code> script (QuPath 0.2 or greater) or <code class="docutils literal notranslate"><span class="pre">qupath_roi_legacy.groovy</span></code> (QuPath 0.1.x), scripts <a class="reference external" href="https://github.com/slideflow/slideflow">available on GitHub</a>. Click <strong>Run</strong> -&gt; <strong>Run</strong> if using QuPath 0.2 or greater, or <strong>Run</strong> -&gt; <strong>Run for Project</strong> if using QuPath 0.1.x. ROIs will be exported in CSV format in the QuPath project directory, in the subdirectory “ROI”.</p>
<p>Once ROI CSV files are generated, ensure they are placed in the folder expected by your <a class="reference internal" href="../project_setup/#project-setup"><span class="std std-ref">Project</span></a> or <a class="reference internal" href="../datasets_and_val/#datasets-and-validation"><span class="std std-ref">Dataset</span></a> based on their respective configurations.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">roi_method</span></code> argument to the <code class="docutils literal notranslate"><span class="pre">extract_tiles()</span></code> functions allow you to control how ROIs are used. Options include:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">'auto'</span></code>: Default behavior. For slides with a valid ROI, extract tiles from within ROIs only. For slides without ROIs, extract from the whole-slide image.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">'inside'</span></code>: Extract from within ROIs, and skip any slides missing ROIs.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">'outside'</span></code>: Extract from outside ROIs, and skip any slides missing ROIs.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">'ignore'</span></code>: Ignore all ROIs, extracting from whole-slide images.</p></li>
</ul>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Nested ROIs will be rendered as holes.</p>
</div>
<p>By default, ROIs filter tiles based on the center point of the tile. Alternatively, you can filter tiles based on the proportion of the tile inside an ROI by using the argument <code class="docutils literal notranslate"><span class="pre">roi_filter_method</span></code>. If <code class="docutils literal notranslate"><span class="pre">roi_filter_method</span></code> is set to a float (0-1), this value will be interpreted as a proportion threshold. If the proportion of a tile inside an ROI is greater than this number, the tile is included. For example, if <code class="docutils literal notranslate"><span class="pre">roi_filter_method=0.7</span></code>, a tile that is 80% inside of an ROI will be included, but a tile that is only 60% inside of an ROI will be excluded.</p>
<img alt="../_images/roi_filter.jpg" src="../_images/roi_filter.jpg" />
<div class="line-block">
<div class="line"><br /></div>
</div>
<p id="roi-labels">ROIs can optionally be assigned a label. Labels can be added or changed using <a class="reference internal" href="../studio/#studio-roi"><span class="std std-ref">Slideflow Studio</span></a>, or by adding a “label” column in the ROI CSV file. Labels can be used to train strongly supervised models, where each tile is assigned a label based on the ROI it is extracted from, rather than inheriting the label of the whole-slide image. See the developer note <a class="reference internal" href="../tile_labels/#tile-labels"><span class="std std-ref">Strong Supervision with Tile Labels</span></a> for more information.</p>
<p>To retrieve the ROI name (and label, if present) for all tiles in a slide, use <a class="reference internal" href="../slide/#slideflow.WSI.get_tile_dataframe" title="slideflow.WSI.get_tile_dataframe"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.WSI.get_tile_dataframe()</span></code></a>. This will return a Pandas DataFrame with the following columns:</p>
<blockquote>
<div><ul class="simple">
<li><p><strong>loc_x</strong>: X-coordinate of tile center</p></li>
<li><p><strong>loc_y</strong>: Y-coordinate of tile center</p></li>
<li><p><strong>grid_x</strong>: X grid index of the tile</p></li>
<li><p><strong>grid_y</strong>: Y grid index of the tile</p></li>
<li><p><strong>roi_name</strong>: Name of the ROI if tile is in an ROI, else None</p></li>
<li><p><strong>roi_desc</strong>: Description of the ROI if tile is in ROI, else None</p></li>
<li><p><strong>label</strong>: ROI label, if present.</p></li>
</ul>
</div></blockquote>
<p>The <strong>loc_x</strong> and <strong>loc_y</strong> columns contain the same tile location information <a class="reference internal" href="../tfrecords/#tfrecords"><span class="std std-ref">stored in TFRecords</span></a>.</p>
<p>You can also retrieve this information for all slides in a dataset by using <a class="reference internal" href="../dataset/#slideflow.Dataset.get_tile_dataframe" title="slideflow.Dataset.get_tile_dataframe"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.Dataset.get_tile_dataframe()</span></code></a>, which will return a DataFrame with the same columns as above, plus <code class="docutils literal notranslate"><span class="pre">slide</span></code> column.</p>
</section>
<section id="masking-filtering">
<h2>Masking &amp; Filtering<a class="headerlink" href="#masking-filtering" title="Permalink to this heading"></a></h2>
<p>Slideflow provides two approaches for refining where image tiles should be extracted from whole-slide images: <strong>slide-level masking</strong> and <strong>tile-level filtering</strong>. In these next sections, we’ll review options for both approaches.</p>
<section id="otsu-s-thresholding">
<h3>Otsu’s thresholding<a class="headerlink" href="#otsu-s-thresholding" title="Permalink to this heading"></a></h3>
<img alt="../_images/otsu.png" src="../_images/otsu.png" />
<div class="line-block">
<div class="line"><br /></div>
</div>
<p>Otsu’s thresholding is a <strong>slide-based method</strong> that distinguishes foreground (tissue) from background (empty slide). Otsu’s thresholding is performed in the HSV colorspace and yields similar results to grayspace filtering, a tile-level filtering method described below.</p>
<p>To apply Otsu’s thresholding to slides before tile extraction, use the <code class="docutils literal notranslate"><span class="pre">qc</span></code> argument of the <code class="docutils literal notranslate"><span class="pre">.extract_tiles()</span></code> functions.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide</span> <span class="kn">import</span> <span class="n">qc</span>
<span class="c1"># Use this QC during tile extraction</span>
<span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">qc</span><span class="o">=</span><span class="n">qc</span><span class="o">.</span><span class="n">Otsu</span><span class="p">())</span>
</pre></div>
</div>
<p>You can also apply Otsu’s thresholding to a single slide with the <a class="reference internal" href="../slide/#slideflow.WSI.qc" title="slideflow.WSI.qc"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.WSI.qc()</span></code></a> method. See <code class="xref py py-class docutils literal notranslate"><span class="pre">the</span> <span class="pre">WSI</span> <span class="pre">API</span> <span class="pre">documentation</span></code> for more information on working with individual slides.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="c1"># Apply Otsu&#39;s thresholding to a WSI object</span>
<span class="n">wsi</span> <span class="o">=</span> <span class="n">sf</span><span class="o">.</span><span class="n">WSI</span><span class="p">(</span><span class="o">...</span><span class="p">)</span>
<span class="n">wsi</span><span class="o">.</span><span class="n">qc</span><span class="p">(</span><span class="n">qc</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
</pre></div>
</div>
</section>
<section id="gaussian-blur-filtering">
<h3>Gaussian blur filtering<a class="headerlink" href="#gaussian-blur-filtering" title="Permalink to this heading"></a></h3>
<img alt="../_images/blur.png" src="../_images/blur.png" />
<div class="line-block">
<div class="line"><br /></div>
</div>
<p>Gaussian blur masking is another <strong>slide-based method</strong> that can detect pen marks and out-of-focus areas, and is particularly useful for datasets lacking annotated Regions of Interest (ROIs). Gaussian blur masking is applied similarly, using the <code class="docutils literal notranslate"><span class="pre">qc</span></code> argument.</p>
<p>Two versions of Gaussian blur masking are available: <code class="docutils literal notranslate"><span class="pre">qc.Gaussian</span></code> and <code class="docutils literal notranslate"><span class="pre">qc.GaussianV2</span></code> (new in Slideflow 2.1.0). The latter is the default and recommended version, as it is more computationally efficient. The former is provided for backwards compatibility.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide</span> <span class="kn">import</span> <span class="n">qc</span>
<span class="c1"># Use this QC during tile extraction</span>
<span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">qc</span><span class="o">=</span><span class="n">qc</span><span class="o">.</span><span class="n">GaussianV2</span><span class="p">())</span>
</pre></div>
</div>
<p>By default, Gaussian blur masking is calculated at 4 times lower magnification than the tile extraction MPP (e.g., when extracting tiles at 10X effective magnification, Gaussian filtering would be calculated at 2.5X). This is to reduce computation time. You can change this behavior by manually setting the <code class="docutils literal notranslate"><span class="pre">mpp</span></code> argument to a specific microns-per-pixel value.</p>
<p>Gaussian blur masking is performed on gray images. The <code class="docutils literal notranslate"><span class="pre">sigma</span></code> argument controls the standard deviation of the Gaussian blur kernel. The default value of 3 is recommended, but you may need to adjust this value for your dataset. A higher value will result in more areas being masked, while a lower value will result in fewer areas being masked.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide</span> <span class="kn">import</span> <span class="n">qc</span>
<span class="c1"># Customize the Gaussian filter,</span>
<span class="c1"># using a sigma of 2 and a mpp of 1 (10X magnification)</span>
<span class="n">gaussian</span> <span class="o">=</span> <span class="n">qc</span><span class="o">.</span><span class="n">GaussianV2</span><span class="p">(</span><span class="n">mpp</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">sigma</span><span class="o">=</span><span class="mi">2</span><span class="p">)</span>
</pre></div>
</div>
<p>You can also use multiple slide-level masking methods by providing a list to <code class="docutils literal notranslate"><span class="pre">qc</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide</span> <span class="kn">import</span> <span class="n">qc</span>
<span class="n">qc</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">qc</span><span class="o">.</span><span class="n">Otsu</span><span class="p">(),</span>
<span class="n">qc</span><span class="o">.</span><span class="n">Gaussian</span><span class="p">()</span>
<span class="p">]</span>
<span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">qc</span><span class="o">=</span><span class="n">qc</span><span class="p">)</span>
</pre></div>
</div>
<p>If both Otsu’s thresholding and blur detection are being used, Slideflow will calculate Blur Burden, a metric used to assess the degree to which non-background tiles are either out-of-focus or contain artifact. In the tile extraction PDF report that is generated (see next section), the distribution of blur burden for slides in the dataset will be plotted on the first page. The report will contain the number of slides meeting criteria for warning, when the blur burden exceeds 5% for a given slide. A text file containing names of slides with high blur burden will be saved in the exported TFRecords directory. These slides should be manually reviewed to ensure they are of high enough quality to include in the dataset.</p>
</section>
<section id="deepfocus">
<h3>DeepFocus<a class="headerlink" href="#deepfocus" title="Permalink to this heading"></a></h3>
<p>Slideflow also provides an interface for using <a class="reference external" href="https://journals.plos.org/plosone/article/file?id=10.1371/journal.pone.0205387&amp;type=printable">DeepFocus</a> to identify in-focus regions. DeepFocus is a lightweight neural network that predicts whether a section of a slide is in- or out-of-focus. When used as a slide-level masking method, DeepFocus will filter out-of-focus tiles from a slide. By default, DeepFocus is applied to slides at 40X magnification, although this can be customized with the <code class="docutils literal notranslate"><span class="pre">tile_um</span></code> argument.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide</span> <span class="kn">import</span> <span class="n">qc</span>
<span class="n">deepfocus</span> <span class="o">=</span> <span class="n">qc</span><span class="o">.</span><span class="n">DeepFocus</span><span class="p">(</span><span class="n">tile_um</span><span class="o">=</span><span class="s1">&#39;20x&#39;</span><span class="p">)</span>
<span class="n">slide</span><span class="o">.</span><span class="n">qc</span><span class="p">(</span><span class="n">deepfocus</span><span class="p">)</span>
</pre></div>
</div>
<p>Alternatively, you can also retrieve raw predictions from the DeepFocus model for a slide by calling the deepfocus object on a <code class="xref py py-class docutils literal notranslate"><span class="pre">slideflow.WSI</span></code> object, passing the argument threshold=False:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">preds</span> <span class="o">=</span> <span class="n">deepfocus</span><span class="p">(</span><span class="n">slide</span><span class="p">,</span> <span class="n">threshold</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</pre></div>
</div>
</section>
<section id="custom-deep-learning-qc">
<h3>Custom deep learning QC<a class="headerlink" href="#custom-deep-learning-qc" title="Permalink to this heading"></a></h3>
<p>You can also create your own deep learning slide filters. To create a custom deep learning QC method like DeepFocus, create a custom slide filter that inherits <a class="reference internal" href="../slide_qc/#slideflow.slide.qc.StridedDL" title="slideflow.slide.qc.StridedDL"><code class="xref py py-class docutils literal notranslate"><span class="pre">slideflow.slide.qc.StridedDL</span></code></a>. For example, to manually recreate the above DeepFocus model, first clone the <a class="reference external" href="https://github.com/jamesdolezal/deepfocus">TF2 fork on GitHub</a>, which contains the DeepFocus architecture and model weights, and create a custom class as below:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.slide.qc</span> <span class="kn">import</span> <span class="n">strided_dl</span>
<span class="kn">from</span> <span class="nn">deepfocus.keras_model</span> <span class="kn">import</span> <span class="n">load_checkpoint</span><span class="p">,</span> <span class="n">deepfocus_v3</span>
<span class="k">class</span> <span class="nc">CustomDeepFocus</span><span class="p">(</span><span class="n">strided_dl</span><span class="o">.</span><span class="n">StridedDL</span><span class="p">):</span>
<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
<span class="n">model</span> <span class="o">=</span> <span class="n">deepfocus_v3</span><span class="p">()</span>
<span class="n">checkpoint</span> <span class="o">=</span> <span class="s1">&#39;/path/to/deepfocus/checkpoints/ver5&#39;</span>
<span class="n">load_checkpoint</span><span class="p">(</span><span class="n">model</span><span class="p">,</span> <span class="n">checkpoint</span><span class="p">)</span>
<span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span>
<span class="n">model</span><span class="o">=</span><span class="n">model</span><span class="p">,</span>
<span class="n">pred_idx</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
<span class="n">tile_px</span><span class="o">=</span><span class="mi">64</span><span class="p">,</span>
<span class="n">tile_um</span><span class="o">=</span><span class="s1">&#39;40x&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<p>Then, supply this class to the <code class="docutils literal notranslate"><span class="pre">qc</span></code> argument as above.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span><span class="n">qc</span><span class="o">=</span><span class="n">CustomDeepFocus</span><span class="p">())</span>
</pre></div>
</div>
<p>See <a class="reference internal" href="../slide_qc/#qc"><span class="std std-ref">slideflow.slide.qc</span></a> for more information on the API for further QC customization.</p>
</section>
<section id="segmentation-models-u-net">
<h3>Segmentation Models (U-Net)<a class="headerlink" href="#segmentation-models-u-net" title="Permalink to this heading"></a></h3>
<p>Slideflow also provides an interface for both training and using segmentation models (e.g. U-Net, FPN, DeepLabV3) for slide-level masking. This is discussed separately in <a class="reference internal" href="../segmentation/#segmentation"><span class="std std-ref">Tissue Segmentation</span></a>.</p>
</section>
<section id="grayspace-filtering">
<h3>Grayspace filtering<a class="headerlink" href="#grayspace-filtering" title="Permalink to this heading"></a></h3>
<p>Grayspace filtering is a <strong>tile-based method</strong> that detects the amount of grayspace in a given image tile and discards the tile if the content exceeds a set threshold. RGB image tiles are converted to the HSV spectrum, and the fraction of pixels with saturation below a certain threshold is calculated. This filtering is performed separately for each tile as it is being extracted. Relevant arguments for grayspace filtering include:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">grayspace_threshold</span></code>: Saturation value, below which a pixel is considered gray. Range 0-1. Defaults to 0.05.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">grayspace_fraction</span></code>: Image tiles with grayspace above this fraction will be discarded. Defaults to 0.6.</p></li>
</ul>
<p>Grayspace filtering is enabled by default, and can be disabled by passing <code class="docutils literal notranslate"><span class="pre">grayspace_fraction=1</span></code> to the <code class="docutils literal notranslate"><span class="pre">.extract_tiles()</span></code> functions.</p>
<p>Grayspace filtering is similar to Otsu’s thresholding, with both operating in the HSV colorspace. Otsu’s thresholding is ~30% faster than grayspace filtering for slides with accessible downsample layers, but if downsample layers are not stored in a given slide or are inaccessible (e.g. <code class="docutils literal notranslate"><span class="pre">enable_downsample=False</span></code>), grayspace filtering may be faster. Grayspace filtering is more reliable than Otsu’s thresholding for slides with abundant pen marks or other artifact, which can present issues for the Otsu’s thresholding algorithm.</p>
</section>
<section id="whitepsace-filtering">
<h3>Whitepsace filtering<a class="headerlink" href="#whitepsace-filtering" title="Permalink to this heading"></a></h3>
<p>Whitespace filtering is performed similarly to grayspace filtering. Whitespace is calculated using overall brightness for each pixel, then counting the fraction of pixels with a brightness above some threshold. As with grayspace filtering, there are two relevant arguments:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">whitespace_threshold</span></code>: Brightness value, above which a pixel is considered white. Range 0-255. Defaults to 230.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">whitespace_fraction</span></code>: Image tiles with whitespace above this fraction will be discarded. Defaults to 1.0 (disabled).</p></li>
</ul>
<p>Whitespace filtering is disabled by default.</p>
</section>
</section>
<section id="stain-normalization">
<h2>Stain normalization<a class="headerlink" href="#stain-normalization" title="Permalink to this heading"></a></h2>
<img alt="../_images/wsi_norm_compare.jpg" src="../_images/wsi_norm_compare.jpg" />
<p>Image tiles can undergo digital Hematoxylin and Eosin (H&amp;E) stain normalization either during tile extraction or in real-time during training. Real-time normalization adds CPU overhead during training and inference but offers greater flexibility, allowing you to test different normalization strategies without re-extracting tiles from your entire dataset.</p>
<p>Available stain normalization algorithms include:</p>
<ul class="simple">
<li><p><strong>macenko</strong>: <a class="reference external" href="https://www.cs.unc.edu/~mn/sites/default/files/macenko2009.pdf">Original Macenko paper</a>.</p></li>
<li><p><strong>macenko_fast</strong>: Modified Macenko algorithm with the brightness standardization step removed.</p></li>
<li><p><strong>reinhard</strong>: <a class="reference external" href="https://ieeexplore.ieee.org/document/946629">Original Reinhard paper</a>.</p></li>
<li><p><strong>reinhard_fast</strong>: Modified Reinhard algorithm with the brightness standardization step removed.</p></li>
<li><p><strong>reinhard_mask</strong>: Modified Reinhard algorithm, with background/whitespace removed.</p></li>
<li><p><strong>reinhard_fast_mask</strong>: Modified Reinhard-Fast algorithm, with background/whitespace removed.</p></li>
<li><p><strong>vahadane</strong>: <a class="reference external" href="https://ieeexplore.ieee.org/document/7460968">Original Vahadane paper</a>.</p></li>
<li><p><strong>augment</strong>: HSV colorspace augmentation.</p></li>
<li><p><strong>cyclegan</strong>: CycleGAN-based stain normalization, as implemented by <a class="reference external" href="https://github.com/Boehringer-Ingelheim/stain-transfer">Zingman et al</a> (PyTorch only)</p></li>
</ul>
<p>The Macenko and Reinhard stain normalizers are highly efficient, with native Tensorflow, PyTorch, and Numpy/OpenCV implementations, and support GPU acceleration (see <a class="reference internal" href="../norm/#normalizer-performance"><span class="std std-ref">performance benchmarks</span></a>).</p>
<section id="during-tile-extraction">
<h3>During tile extraction<a class="headerlink" href="#during-tile-extraction" title="Permalink to this heading"></a></h3>
<p>Image tiles can be normalized during tile extraction by using the <code class="docutils literal notranslate"><span class="pre">normalizer</span></code> and <code class="docutils literal notranslate"><span class="pre">normalizer_source</span></code> arguments. <code class="docutils literal notranslate"><span class="pre">normalizer</span></code> is the name of the algorithm. The normalizer source - either a path to a reference image, or a <code class="docutils literal notranslate"><span class="pre">str</span></code> indicating one of our presets (e.g. <code class="docutils literal notranslate"><span class="pre">'v1'</span></code>, <code class="docutils literal notranslate"><span class="pre">'v2'</span></code>, <code class="docutils literal notranslate"><span class="pre">'v3'</span></code>) - can also be set with <code class="docutils literal notranslate"><span class="pre">normalizer_source</span></code>.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">P</span><span class="o">.</span><span class="n">extract_tiles</span><span class="p">(</span>
<span class="n">tile_px</span><span class="o">=</span><span class="mi">299</span><span class="p">,</span>
<span class="n">tile_um</span><span class="o">=</span><span class="mi">302</span><span class="p">,</span>
<span class="n">normalizer</span><span class="o">=</span><span class="s1">&#39;reinhard&#39;</span>
<span class="p">)</span>
</pre></div>
</div>
<p><a class="reference internal" href="../norm/#contextual-normalization"><span class="std std-ref">Contextual stain normalization</span></a> is supported when normalizing during tile extraction.</p>
</section>
<section id="on-the-fly">
<h3>On-the-fly<a class="headerlink" href="#on-the-fly" title="Permalink to this heading"></a></h3>
<p>The stain normalization implementations in Slideflow are fast and efficient, with separate Tensorflow-native, PyTorch-native, and Numpy/OpenCV implementations. In most instances, we recommend performing stain normalization on-the-fly as a part of image pre-processing, as this provides flexibility for changing normalization strategies without re-extracting all of your image tiles.</p>
<p>Real-time normalization can be performed by setting the <code class="docutils literal notranslate"><span class="pre">normalizer</span></code> and/or <code class="docutils literal notranslate"><span class="pre">normalizer_source</span></code> hyperparameters.</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span> <span class="nn">slideflow.model</span> <span class="kn">import</span> <span class="n">ModelParams</span>
<span class="n">hp</span> <span class="o">=</span> <span class="n">ModelParams</span><span class="p">(</span><span class="o">...</span><span class="p">,</span> <span class="n">normalizer</span><span class="o">=</span><span class="s1">&#39;reinhard&#39;</span><span class="p">)</span>
</pre></div>
</div>
<p>If a model was trained using a normalizer, the normalizer algorithm and fit information will be stored in the model metadata file, <code class="docutils literal notranslate"><span class="pre">params.json</span></code>, in the saved model folder. Any Slideflow function that uses this model will automatically process images using the same normalization strategy.</p>
<p>When stain normalizing on-the-fly, stain augmentation becomes available as a training augmentation technique. Read more about <a class="reference internal" href="../norm/#stain-augmentation"><span class="std std-ref">stain augmentation</span></a>.</p>
<p>The normalizer interfaces can also be access directly through <a class="reference internal" href="../norm/#slideflow.norm.StainNormalizer" title="slideflow.norm.StainNormalizer"><code class="xref py py-class docutils literal notranslate"><span class="pre">slideflow.norm.StainNormalizer</span></code></a>. See <code class="xref py py-mod docutils literal notranslate"><span class="pre">slideflow.norm</span></code> for examples and more information.</p>
</section>
</section>
<section id="performance-optimization">
<h2>Performance optimization<a class="headerlink" href="#performance-optimization" title="Permalink to this heading"></a></h2>
<p>As tile extraction is heavily reliant on random access reading, significant performance gains can be experienced by either 1) moving all slides to an SSD, or 2) utilizing an SSD or ramdisk buffer (to which slides will be copied prior to extraction). The use of a ramdisk buffer can improve tile extraction speed by 10-fold or greater! To maximize performance, pass the buffer path to the argument <code class="docutils literal notranslate"><span class="pre">buffer</span></code>.</p>
</section>
<section id="extraction-reports">
<h2>Extraction reports<a class="headerlink" href="#extraction-reports" title="Permalink to this heading"></a></h2>
<p>Once tiles have been extracted, a PDF report will be generated with a summary and sample of tiles extracted from their corresponding slides. An example of such a report is given below. Reviewing this report may enable you to identify data corruption, artifacts with stain normalization, or suboptimal background filtering. The report is saved in the TFRecords directory.</p>
<img alt="../_images/example_report_small.jpg" src="../_images/example_report_small.jpg" />
<p>In addition to viewing reports after tile extraction, you may generate new reports on existing tfrecords with <a class="reference internal" href="../dataset/#slideflow.Dataset.tfrecord_report" title="slideflow.Dataset.tfrecord_report"><code class="xref py py-func docutils literal notranslate"><span class="pre">slideflow.Dataset.tfrecord_report()</span></code></a>, by calling this function on a given dataset. For example:</p>
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="n">dataset</span> <span class="o">=</span> <span class="n">P</span><span class="o">.</span><span class="n">dataset</span><span class="p">(</span><span class="n">tile_px</span><span class="o">=</span><span class="mi">299</span><span class="p">,</span> <span class="n">tile_um</span><span class="o">=</span><span class="mi">302</span><span class="p">)</span>
<span class="n">dataset</span><span class="o">.</span><span class="n">tfrecord_report</span><span class="p">(</span><span class="s2">&quot;/path/to/dest&quot;</span><span class="p">)</span>
</pre></div>
</div>
<p>You can also generate reports for slides that have not yet been extracted by passing <code class="docutils literal notranslate"><span class="pre">dry_run=True</span></code> to <a class="reference internal" href="../dataset/#slideflow.Dataset.extract_tiles" title="slideflow.Dataset.extract_tiles"><code class="xref py py-meth docutils literal notranslate"><span class="pre">slideflow.Dataset.extract_tiles()</span></code></a>.</p>
</section>
</section>
</article>
</div>
<footer>
<div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
<a href="../training/" class="btn btn-neutral float-right" title="Training" accesskey="n" rel="next">Next <img src="../_static/images/chevron-right-orange.svg" class="next-page"></a>
<a href="../datasets_and_val/" class="btn btn-neutral" title="Datasets" accesskey="p" rel="prev"><img src="../_static/images/chevron-right-orange.svg" class="previous-page"> Previous</a>
</div>
<hr>
<div role="contentinfo">
<p>
&copy; Copyright 2023, James M Dolezal.
</p>
</div>
<div>
Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
</div>
</footer>
</div>
</div>
<div class="pytorch-content-right" id="pytorch-content-right">
<div class="pytorch-right-menu" id="pytorch-right-menu">
<div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
<ul>
<li><a class="reference internal" href="#">Slide Processing</a><ul>
<li><a class="reference internal" href="#tile-extraction">Tile extraction</a></li>
<li><a class="reference internal" href="#cell-segmentation">Cell segmentation</a></li>
<li><a class="reference internal" href="#regions-of-interest">Regions of Interest</a></li>
<li><a class="reference internal" href="#masking-filtering">Masking &amp; Filtering</a><ul>
<li><a class="reference internal" href="#otsu-s-thresholding">Otsu’s thresholding</a></li>
<li><a class="reference internal" href="#gaussian-blur-filtering">Gaussian blur filtering</a></li>
<li><a class="reference internal" href="#deepfocus">DeepFocus</a></li>
<li><a class="reference internal" href="#custom-deep-learning-qc">Custom deep learning QC</a></li>
<li><a class="reference internal" href="#segmentation-models-u-net">Segmentation Models (U-Net)</a></li>
<li><a class="reference internal" href="#grayspace-filtering">Grayspace filtering</a></li>
<li><a class="reference internal" href="#whitepsace-filtering">Whitepsace filtering</a></li>
</ul>
</li>
<li><a class="reference internal" href="#stain-normalization">Stain normalization</a><ul>
<li><a class="reference internal" href="#during-tile-extraction">During tile extraction</a></li>
<li><a class="reference internal" href="#on-the-fly">On-the-fly</a></li>
</ul>
</li>
<li><a class="reference internal" href="#performance-optimization">Performance optimization</a></li>
<li><a class="reference internal" href="#extraction-reports">Extraction reports</a></li>
</ul>
</li>
</ul>
</div>
</div>
</div>
</section>
</div>
<script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
<script src="../_static/doctools.js"></script>
<script src="../_static/sphinx_highlight.js"></script>
<script type="text/javascript" src="../_static/js/vendor/jquery-3.6.3.min.js"></script>
<script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
<script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
<script type="text/javascript" src="../_static/js/theme.js"></script>
<script type="text/javascript">
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<!-- Begin Footer -->
<!-- End Footer -->
<!-- Begin Mobile Menu -->
<div class="mobile-main-menu">
<div class="container-fluid">
<div class="container">
<div class="mobile-main-menu-header-container">
<a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
<a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
</div>
</div>
</div>
<div class="mobile-main-menu-links-container">
<div class="main-menu">
<ul>
<li>
<a href="https://slideflow.dev">Docs</a>
</li>
<li>
<a href="https://slideflow.dev/tutorial1/">Tutorials</a>
</li>
<li>
<a href="https://github.com/slideflow/slideflow">Github</a>
</li>
</ul>
</div>
</div>
</div>
<!-- End Mobile Menu -->
<script script type="text/javascript">
var collapsedSections = [];
</script>
<script type="text/javascript" src="../_static/js/vendor/anchor.min.js"></script>
<script type="text/javascript">
$(document).ready(function() {
mobileMenu.bind();
mobileTOC.bind();
pytorchAnchors.bind();
sideMenus.bind();
scrollToAnchor.bind();
highlightNavigation.bind();
mainMenuDropdown.bind();
filterTags.bind();
// Add class to links that have code blocks, since we cannot create links in code blocks
$("article.pytorch-article a span.pre").each(function(e) {
$(this).closest("a").addClass("has-code");
});
})
</script>
</body>
</html>