Diff of /configure.py [000000] .. [cec8b4]

Switch to unified view

a b/configure.py
1
"""Configure Slideflow projects for reproducing published results."""
2
3
import os
4
import click
5
import slideflow as sf
6
from biscuit import experiment
7
from os.path import exists, join, abspath
8
9
# -----------------------------------------------------------------------------
10
11
@click.command()
12
@click.option('--train_slides', type=str, help='Directory to training slides, for cross-validation', required=True)
13
@click.option('--train_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/tcga.csv', show_default=True)
14
@click.option('--train_roi', type=str, help='Directory to CSV ROI files, for cross-validation')
15
@click.option('--outcome', type=str, help='Outcome (annotation header) that assigns class labels.', default='cohort', show_default=True)
16
@click.option('--outcome1', type=str, help='First class label.', default='LUAD', show_default=True)
17
@click.option('--outcome2', type=str, help='Second class label.', default='LUSC', show_default=True)
18
@click.option('--val_slides', type=str, help='Directory to external evaluation slides, for evaluation')
19
@click.option('--val_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/cptac.csv', show_default=True)
20
def configure_projects(
21
    train_slides,
22
    train_anns,
23
    train_roi,
24
    outcome,
25
    outcome1,
26
    outcome2,
27
    val_slides=None,
28
    val_anns=None,
29
):
30
    """Configure Slideflow projects for reproducing published results.
31
32
    This script uses the provided slides to build Slideflow projects in the
33
    'projects/' folder of the current working directory. Clinical annotations
34
    (class labels) are read from the 'annotations/' folder unless otherwise
35
    specified.
36
37
    Training slides from The Cancer Genome Atlas (TCGA) are available at
38
    https://portal.gdc.cancer.gov/ (projects TCGA-LUAD and TCGA-LUSC).
39
40
    Validation slides from the Clinical Proteomics Tumor Analysis Consortium
41
    (CPTAC) are available at https://proteomics.cancer.gov/data-portal
42
    (projects CPTAC-LUAD and CPTAC-LSCC).
43
    """
44
45
    # Absolute paths
46
    train_slides = abspath(train_slides)
47
    train_anns = abspath(train_anns)
48
    out = abspath('projects')
49
    if val_slides:
50
        val_slides = abspath(train_slides)
51
    if val_anns:
52
        val_anns = abspath(val_anns)
53
    if train_roi:
54
        train_roi = abspath(train_roi)
55
    gan_path = abspath('gan')
56
    if not exists(gan_path):
57
        os.makedirs(gan_path)
58
59
    # --- Set up projects -----------------------------------------------------
60
61
    # Set up training project
62
    if (not exists(join(out, 'training'))
63
       or not exists(join(out, 'training', 'settings.json'))):
64
        print("Setting up training project...")
65
        tP = sf.Project(
66
            join(out, 'training'),
67
            sources=['Training'],
68
            annotations=train_anns
69
        )
70
        tP.add_source(
71
            name='Training',
72
            slides=train_slides,
73
            roi=(train_roi if train_roi else train_slides),
74
            tiles=join(out, 'training', 'tiles'),
75
            tfrecords=join(out, 'training', 'tfrecords')
76
        )
77
        tP.add_source(
78
            name='LUNG_GAN',
79
            slides=gan_path,
80
            roi=gan_path,
81
            tiles=gan_path,
82
            tfrecords=gan_path
83
        )
84
        print(f"Training project setup at {join(out, 'training')}.")
85
    else:
86
        tP = sf.Project(join(out, 'training'))
87
        print("Loading training project which already exists.")
88
89
    # Set up external evaluation project
90
    if val_slides:
91
        if not val_anns:
92
            msg = "If providing evaluation slides, evaluation annotations "
93
            msg += "must also be provided (--val_anns)"
94
            raise ValueError(msg)
95
        if (not exists(join(out, 'evaluation'))
96
           or not exists(join(out, 'evaluation', 'settings.json'))):
97
            print("Setting up evaluation project.")
98
            eP = sf.Project(
99
                join(out, 'evaluation'),
100
                sources=['Evaluation'],
101
                annotations=val_anns
102
            )
103
            eP.add_source(
104
                name='Evaluation',
105
                slides=val_slides,
106
                roi=val_slides,
107
                tiles=join(out, 'evaluation', 'tiles'),
108
                tfrecords=join(out, 'evaluation', 'tfrecords')
109
            )
110
            print(f"Evaluation project setup at {join(out, 'evaluation')}.")
111
        else:
112
            eP = sf.Project(join(out, 'evaluation'))
113
            print("Loading evaluation project which already exists.")
114
115
    # --- Perform tile extraction ---------------------------------------------
116
117
    print("Extracting tiles from WSIs at 299px, 302um")
118
    for P in (eP, tP):
119
        P.extract_tiles(
120
            tile_px=299,
121
            tile_um=302,
122
            qc='both',
123
            img_format='png'
124
        )
125
    print("Extracting tiles from WSIs at 512px, 400um (for GAN training)")
126
    for P in (eP, tP):
127
        P.extract_tiles(
128
            tile_px=512,
129
            tile_um=400,
130
            qc='both',
131
            img_format='png'
132
        )
133
    print("Finished tile extraction, project configuration complete.")
134
135
    # --- Save GAN training configuration -------------------------------------
136
137
    if not exists('gan_config.json'):
138
        gan_config = {
139
            "project_path": join(out, 'training'),
140
            "tile_px": 512,
141
            "tile_um": 400,
142
            "model_type": "categorical",
143
            "outcomes": [outcome],
144
            "filters": {outcome: [outcome1, outcome2]}
145
        }
146
        sf.util.write_json(gan_config, 'gan_config.json')
147
        print("Wrote GAN configuration to gan_config.json")
148
    else:
149
        print("GAN configuration already exists at gan_config.json")
150
151
152
# ----------------------------------------------------------------------------
153
154
if __name__ == "__main__":
155
    configure_projects()  # pylint: disable=no-value-for-parameter
156
157
# ----------------------------------------------------------------------------