Diff of /configure.py [000000] .. [cec8b4]

Switch to side-by-side view

--- a
+++ b/configure.py
@@ -0,0 +1,157 @@
+"""Configure Slideflow projects for reproducing published results."""
+
+import os
+import click
+import slideflow as sf
+from biscuit import experiment
+from os.path import exists, join, abspath
+
+# -----------------------------------------------------------------------------
+
+@click.command()
+@click.option('--train_slides', type=str, help='Directory to training slides, for cross-validation', required=True)
+@click.option('--train_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/tcga.csv', show_default=True)
+@click.option('--train_roi', type=str, help='Directory to CSV ROI files, for cross-validation')
+@click.option('--outcome', type=str, help='Outcome (annotation header) that assigns class labels.', default='cohort', show_default=True)
+@click.option('--outcome1', type=str, help='First class label.', default='LUAD', show_default=True)
+@click.option('--outcome2', type=str, help='Second class label.', default='LUSC', show_default=True)
+@click.option('--val_slides', type=str, help='Directory to external evaluation slides, for evaluation')
+@click.option('--val_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/cptac.csv', show_default=True)
+def configure_projects(
+    train_slides,
+    train_anns,
+    train_roi,
+    outcome,
+    outcome1,
+    outcome2,
+    val_slides=None,
+    val_anns=None,
+):
+    """Configure Slideflow projects for reproducing published results.
+
+    This script uses the provided slides to build Slideflow projects in the
+    'projects/' folder of the current working directory. Clinical annotations
+    (class labels) are read from the 'annotations/' folder unless otherwise
+    specified.
+
+    Training slides from The Cancer Genome Atlas (TCGA) are available at
+    https://portal.gdc.cancer.gov/ (projects TCGA-LUAD and TCGA-LUSC).
+
+    Validation slides from the Clinical Proteomics Tumor Analysis Consortium
+    (CPTAC) are available at https://proteomics.cancer.gov/data-portal
+    (projects CPTAC-LUAD and CPTAC-LSCC).
+    """
+
+    # Absolute paths
+    train_slides = abspath(train_slides)
+    train_anns = abspath(train_anns)
+    out = abspath('projects')
+    if val_slides:
+        val_slides = abspath(train_slides)
+    if val_anns:
+        val_anns = abspath(val_anns)
+    if train_roi:
+        train_roi = abspath(train_roi)
+    gan_path = abspath('gan')
+    if not exists(gan_path):
+        os.makedirs(gan_path)
+
+    # --- Set up projects -----------------------------------------------------
+
+    # Set up training project
+    if (not exists(join(out, 'training'))
+       or not exists(join(out, 'training', 'settings.json'))):
+        print("Setting up training project...")
+        tP = sf.Project(
+            join(out, 'training'),
+            sources=['Training'],
+            annotations=train_anns
+        )
+        tP.add_source(
+            name='Training',
+            slides=train_slides,
+            roi=(train_roi if train_roi else train_slides),
+            tiles=join(out, 'training', 'tiles'),
+            tfrecords=join(out, 'training', 'tfrecords')
+        )
+        tP.add_source(
+            name='LUNG_GAN',
+            slides=gan_path,
+            roi=gan_path,
+            tiles=gan_path,
+            tfrecords=gan_path
+        )
+        print(f"Training project setup at {join(out, 'training')}.")
+    else:
+        tP = sf.Project(join(out, 'training'))
+        print("Loading training project which already exists.")
+
+    # Set up external evaluation project
+    if val_slides:
+        if not val_anns:
+            msg = "If providing evaluation slides, evaluation annotations "
+            msg += "must also be provided (--val_anns)"
+            raise ValueError(msg)
+        if (not exists(join(out, 'evaluation'))
+           or not exists(join(out, 'evaluation', 'settings.json'))):
+            print("Setting up evaluation project.")
+            eP = sf.Project(
+                join(out, 'evaluation'),
+                sources=['Evaluation'],
+                annotations=val_anns
+            )
+            eP.add_source(
+                name='Evaluation',
+                slides=val_slides,
+                roi=val_slides,
+                tiles=join(out, 'evaluation', 'tiles'),
+                tfrecords=join(out, 'evaluation', 'tfrecords')
+            )
+            print(f"Evaluation project setup at {join(out, 'evaluation')}.")
+        else:
+            eP = sf.Project(join(out, 'evaluation'))
+            print("Loading evaluation project which already exists.")
+
+    # --- Perform tile extraction ---------------------------------------------
+
+    print("Extracting tiles from WSIs at 299px, 302um")
+    for P in (eP, tP):
+        P.extract_tiles(
+            tile_px=299,
+            tile_um=302,
+            qc='both',
+            img_format='png'
+        )
+    print("Extracting tiles from WSIs at 512px, 400um (for GAN training)")
+    for P in (eP, tP):
+        P.extract_tiles(
+            tile_px=512,
+            tile_um=400,
+            qc='both',
+            img_format='png'
+        )
+    print("Finished tile extraction, project configuration complete.")
+
+    # --- Save GAN training configuration -------------------------------------
+
+    if not exists('gan_config.json'):
+        gan_config = {
+            "project_path": join(out, 'training'),
+            "tile_px": 512,
+            "tile_um": 400,
+            "model_type": "categorical",
+            "outcomes": [outcome],
+            "filters": {outcome: [outcome1, outcome2]}
+        }
+        sf.util.write_json(gan_config, 'gan_config.json')
+        print("Wrote GAN configuration to gan_config.json")
+    else:
+        print("GAN configuration already exists at gan_config.json")
+
+
+# ----------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    configure_projects()  # pylint: disable=no-value-for-parameter
+
+# ----------------------------------------------------------------------------