[cec8b4]: / configure.py

Download this file

158 lines (139 with data), 6.0 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""Configure Slideflow projects for reproducing published results."""
import os
import click
import slideflow as sf
from biscuit import experiment
from os.path import exists, join, abspath
# -----------------------------------------------------------------------------
@click.command()
@click.option('--train_slides', type=str, help='Directory to training slides, for cross-validation', required=True)
@click.option('--train_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/tcga.csv', show_default=True)
@click.option('--train_roi', type=str, help='Directory to CSV ROI files, for cross-validation')
@click.option('--outcome', type=str, help='Outcome (annotation header) that assigns class labels.', default='cohort', show_default=True)
@click.option('--outcome1', type=str, help='First class label.', default='LUAD', show_default=True)
@click.option('--outcome2', type=str, help='Second class label.', default='LUSC', show_default=True)
@click.option('--val_slides', type=str, help='Directory to external evaluation slides, for evaluation')
@click.option('--val_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/cptac.csv', show_default=True)
def configure_projects(
train_slides,
train_anns,
train_roi,
outcome,
outcome1,
outcome2,
val_slides=None,
val_anns=None,
):
"""Configure Slideflow projects for reproducing published results.
This script uses the provided slides to build Slideflow projects in the
'projects/' folder of the current working directory. Clinical annotations
(class labels) are read from the 'annotations/' folder unless otherwise
specified.
Training slides from The Cancer Genome Atlas (TCGA) are available at
https://portal.gdc.cancer.gov/ (projects TCGA-LUAD and TCGA-LUSC).
Validation slides from the Clinical Proteomics Tumor Analysis Consortium
(CPTAC) are available at https://proteomics.cancer.gov/data-portal
(projects CPTAC-LUAD and CPTAC-LSCC).
"""
# Absolute paths
train_slides = abspath(train_slides)
train_anns = abspath(train_anns)
out = abspath('projects')
if val_slides:
val_slides = abspath(train_slides)
if val_anns:
val_anns = abspath(val_anns)
if train_roi:
train_roi = abspath(train_roi)
gan_path = abspath('gan')
if not exists(gan_path):
os.makedirs(gan_path)
# --- Set up projects -----------------------------------------------------
# Set up training project
if (not exists(join(out, 'training'))
or not exists(join(out, 'training', 'settings.json'))):
print("Setting up training project...")
tP = sf.Project(
join(out, 'training'),
sources=['Training'],
annotations=train_anns
)
tP.add_source(
name='Training',
slides=train_slides,
roi=(train_roi if train_roi else train_slides),
tiles=join(out, 'training', 'tiles'),
tfrecords=join(out, 'training', 'tfrecords')
)
tP.add_source(
name='LUNG_GAN',
slides=gan_path,
roi=gan_path,
tiles=gan_path,
tfrecords=gan_path
)
print(f"Training project setup at {join(out, 'training')}.")
else:
tP = sf.Project(join(out, 'training'))
print("Loading training project which already exists.")
# Set up external evaluation project
if val_slides:
if not val_anns:
msg = "If providing evaluation slides, evaluation annotations "
msg += "must also be provided (--val_anns)"
raise ValueError(msg)
if (not exists(join(out, 'evaluation'))
or not exists(join(out, 'evaluation', 'settings.json'))):
print("Setting up evaluation project.")
eP = sf.Project(
join(out, 'evaluation'),
sources=['Evaluation'],
annotations=val_anns
)
eP.add_source(
name='Evaluation',
slides=val_slides,
roi=val_slides,
tiles=join(out, 'evaluation', 'tiles'),
tfrecords=join(out, 'evaluation', 'tfrecords')
)
print(f"Evaluation project setup at {join(out, 'evaluation')}.")
else:
eP = sf.Project(join(out, 'evaluation'))
print("Loading evaluation project which already exists.")
# --- Perform tile extraction ---------------------------------------------
print("Extracting tiles from WSIs at 299px, 302um")
for P in (eP, tP):
P.extract_tiles(
tile_px=299,
tile_um=302,
qc='both',
img_format='png'
)
print("Extracting tiles from WSIs at 512px, 400um (for GAN training)")
for P in (eP, tP):
P.extract_tiles(
tile_px=512,
tile_um=400,
qc='both',
img_format='png'
)
print("Finished tile extraction, project configuration complete.")
# --- Save GAN training configuration -------------------------------------
if not exists('gan_config.json'):
gan_config = {
"project_path": join(out, 'training'),
"tile_px": 512,
"tile_um": 400,
"model_type": "categorical",
"outcomes": [outcome],
"filters": {outcome: [outcome1, outcome2]}
}
sf.util.write_json(gan_config, 'gan_config.json')
print("Wrote GAN configuration to gan_config.json")
else:
print("GAN configuration already exists at gan_config.json")
# ----------------------------------------------------------------------------
if __name__ == "__main__":
configure_projects() # pylint: disable=no-value-for-parameter
# ----------------------------------------------------------------------------