biscuit / Git / [cec8b4] /configure.py

Models:
DanielG/
biscuit
Downloads: 1
[cec8b4]: / configure.py
History
Download this file
158 lines (139 with data), 6.0 kB

"""Configure Slideflow projects for reproducing published results."""

import os
import click
import slideflow as sf
from biscuit import experiment
from os.path import exists, join, abspath

# -----------------------------------------------------------------------------

@click.command()
@click.option('--train_slides', type=str, help='Directory to training slides, for cross-validation', required=True)
@click.option('--train_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/tcga.csv', show_default=True)
@click.option('--train_roi', type=str, help='Directory to CSV ROI files, for cross-validation')
@click.option('--outcome', type=str, help='Outcome (annotation header) that assigns class labels.', default='cohort', show_default=True)
@click.option('--outcome1', type=str, help='First class label.', default='LUAD', show_default=True)
@click.option('--outcome2', type=str, help='Second class label.', default='LUSC', show_default=True)
@click.option('--val_slides', type=str, help='Directory to external evaluation slides, for evaluation')
@click.option('--val_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/cptac.csv', show_default=True)
def configure_projects(
    train_slides,
    train_anns,
    train_roi,
    outcome,
    outcome1,
    outcome2,
    val_slides=None,
    val_anns=None,
):
    """Configure Slideflow projects for reproducing published results.

    This script uses the provided slides to build Slideflow projects in the
    'projects/' folder of the current working directory. Clinical annotations
    (class labels) are read from the 'annotations/' folder unless otherwise
    specified.

    Training slides from The Cancer Genome Atlas (TCGA) are available at
    https://portal.gdc.cancer.gov/ (projects TCGA-LUAD and TCGA-LUSC).

    Validation slides from the Clinical Proteomics Tumor Analysis Consortium
    (CPTAC) are available at https://proteomics.cancer.gov/data-portal
    (projects CPTAC-LUAD and CPTAC-LSCC).
    """

    # Absolute paths
    train_slides = abspath(train_slides)
    train_anns = abspath(train_anns)
    out = abspath('projects')
    if val_slides:
        val_slides = abspath(train_slides)
    if val_anns:
        val_anns = abspath(val_anns)
    if train_roi:
        train_roi = abspath(train_roi)
    gan_path = abspath('gan')
    if not exists(gan_path):
        os.makedirs(gan_path)

    # --- Set up projects -----------------------------------------------------

    # Set up training project
    if (not exists(join(out, 'training'))
       or not exists(join(out, 'training', 'settings.json'))):
        print("Setting up training project...")
        tP = sf.Project(
            join(out, 'training'),
            sources=['Training'],
            annotations=train_anns
        )
        tP.add_source(
            name='Training',
            slides=train_slides,
            roi=(train_roi if train_roi else train_slides),
            tiles=join(out, 'training', 'tiles'),
            tfrecords=join(out, 'training', 'tfrecords')
        )
        tP.add_source(
            name='LUNG_GAN',
            slides=gan_path,
            roi=gan_path,
            tiles=gan_path,
            tfrecords=gan_path
        )
        print(f"Training project setup at {join(out, 'training')}.")
    else:
        tP = sf.Project(join(out, 'training'))
        print("Loading training project which already exists.")

    # Set up external evaluation project
    if val_slides:
        if not val_anns:
            msg = "If providing evaluation slides, evaluation annotations "
            msg += "must also be provided (--val_anns)"
            raise ValueError(msg)
        if (not exists(join(out, 'evaluation'))
           or not exists(join(out, 'evaluation', 'settings.json'))):
            print("Setting up evaluation project.")
            eP = sf.Project(
                join(out, 'evaluation'),
                sources=['Evaluation'],
                annotations=val_anns
            )
            eP.add_source(
                name='Evaluation',
                slides=val_slides,
                roi=val_slides,
                tiles=join(out, 'evaluation', 'tiles'),
                tfrecords=join(out, 'evaluation', 'tfrecords')
            )
            print(f"Evaluation project setup at {join(out, 'evaluation')}.")
        else:
            eP = sf.Project(join(out, 'evaluation'))
            print("Loading evaluation project which already exists.")

    # --- Perform tile extraction ---------------------------------------------

    print("Extracting tiles from WSIs at 299px, 302um")
    for P in (eP, tP):
        P.extract_tiles(
            tile_px=299,
            tile_um=302,
            qc='both',
            img_format='png'
        )
    print("Extracting tiles from WSIs at 512px, 400um (for GAN training)")
    for P in (eP, tP):
        P.extract_tiles(
            tile_px=512,
            tile_um=400,
            qc='both',
            img_format='png'
        )
    print("Finished tile extraction, project configuration complete.")

    # --- Save GAN training configuration -------------------------------------

    if not exists('gan_config.json'):
        gan_config = {
            "project_path": join(out, 'training'),
            "tile_px": 512,
            "tile_um": 400,
            "model_type": "categorical",
            "outcomes": [outcome],
            "filters": {outcome: [outcome1, outcome2]}
        }
        sf.util.write_json(gan_config, 'gan_config.json')
        print("Wrote GAN configuration to gan_config.json")
    else:
        print("GAN configuration already exists at gan_config.json")


# ----------------------------------------------------------------------------

if __name__ == "__main__":
    configure_projects()  # pylint: disable=no-value-for-parameter

# ----------------------------------------------------------------------------