|
a |
|
b/configure.py |
|
|
1 |
"""Configure Slideflow projects for reproducing published results.""" |
|
|
2 |
|
|
|
3 |
import os |
|
|
4 |
import click |
|
|
5 |
import slideflow as sf |
|
|
6 |
from biscuit import experiment |
|
|
7 |
from os.path import exists, join, abspath |
|
|
8 |
|
|
|
9 |
# ----------------------------------------------------------------------------- |
|
|
10 |
|
|
|
11 |
@click.command() |
|
|
12 |
@click.option('--train_slides', type=str, help='Directory to training slides, for cross-validation', required=True) |
|
|
13 |
@click.option('--train_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/tcga.csv', show_default=True) |
|
|
14 |
@click.option('--train_roi', type=str, help='Directory to CSV ROI files, for cross-validation') |
|
|
15 |
@click.option('--outcome', type=str, help='Outcome (annotation header) that assigns class labels.', default='cohort', show_default=True) |
|
|
16 |
@click.option('--outcome1', type=str, help='First class label.', default='LUAD', show_default=True) |
|
|
17 |
@click.option('--outcome2', type=str, help='Second class label.', default='LUSC', show_default=True) |
|
|
18 |
@click.option('--val_slides', type=str, help='Directory to external evaluation slides, for evaluation') |
|
|
19 |
@click.option('--val_anns', type=str, help='Directory to annotation file for training data (CSV)', default='annotations/cptac.csv', show_default=True) |
|
|
20 |
def configure_projects( |
|
|
21 |
train_slides, |
|
|
22 |
train_anns, |
|
|
23 |
train_roi, |
|
|
24 |
outcome, |
|
|
25 |
outcome1, |
|
|
26 |
outcome2, |
|
|
27 |
val_slides=None, |
|
|
28 |
val_anns=None, |
|
|
29 |
): |
|
|
30 |
"""Configure Slideflow projects for reproducing published results. |
|
|
31 |
|
|
|
32 |
This script uses the provided slides to build Slideflow projects in the |
|
|
33 |
'projects/' folder of the current working directory. Clinical annotations |
|
|
34 |
(class labels) are read from the 'annotations/' folder unless otherwise |
|
|
35 |
specified. |
|
|
36 |
|
|
|
37 |
Training slides from The Cancer Genome Atlas (TCGA) are available at |
|
|
38 |
https://portal.gdc.cancer.gov/ (projects TCGA-LUAD and TCGA-LUSC). |
|
|
39 |
|
|
|
40 |
Validation slides from the Clinical Proteomics Tumor Analysis Consortium |
|
|
41 |
(CPTAC) are available at https://proteomics.cancer.gov/data-portal |
|
|
42 |
(projects CPTAC-LUAD and CPTAC-LSCC). |
|
|
43 |
""" |
|
|
44 |
|
|
|
45 |
# Absolute paths |
|
|
46 |
train_slides = abspath(train_slides) |
|
|
47 |
train_anns = abspath(train_anns) |
|
|
48 |
out = abspath('projects') |
|
|
49 |
if val_slides: |
|
|
50 |
val_slides = abspath(train_slides) |
|
|
51 |
if val_anns: |
|
|
52 |
val_anns = abspath(val_anns) |
|
|
53 |
if train_roi: |
|
|
54 |
train_roi = abspath(train_roi) |
|
|
55 |
gan_path = abspath('gan') |
|
|
56 |
if not exists(gan_path): |
|
|
57 |
os.makedirs(gan_path) |
|
|
58 |
|
|
|
59 |
# --- Set up projects ----------------------------------------------------- |
|
|
60 |
|
|
|
61 |
# Set up training project |
|
|
62 |
if (not exists(join(out, 'training')) |
|
|
63 |
or not exists(join(out, 'training', 'settings.json'))): |
|
|
64 |
print("Setting up training project...") |
|
|
65 |
tP = sf.Project( |
|
|
66 |
join(out, 'training'), |
|
|
67 |
sources=['Training'], |
|
|
68 |
annotations=train_anns |
|
|
69 |
) |
|
|
70 |
tP.add_source( |
|
|
71 |
name='Training', |
|
|
72 |
slides=train_slides, |
|
|
73 |
roi=(train_roi if train_roi else train_slides), |
|
|
74 |
tiles=join(out, 'training', 'tiles'), |
|
|
75 |
tfrecords=join(out, 'training', 'tfrecords') |
|
|
76 |
) |
|
|
77 |
tP.add_source( |
|
|
78 |
name='LUNG_GAN', |
|
|
79 |
slides=gan_path, |
|
|
80 |
roi=gan_path, |
|
|
81 |
tiles=gan_path, |
|
|
82 |
tfrecords=gan_path |
|
|
83 |
) |
|
|
84 |
print(f"Training project setup at {join(out, 'training')}.") |
|
|
85 |
else: |
|
|
86 |
tP = sf.Project(join(out, 'training')) |
|
|
87 |
print("Loading training project which already exists.") |
|
|
88 |
|
|
|
89 |
# Set up external evaluation project |
|
|
90 |
if val_slides: |
|
|
91 |
if not val_anns: |
|
|
92 |
msg = "If providing evaluation slides, evaluation annotations " |
|
|
93 |
msg += "must also be provided (--val_anns)" |
|
|
94 |
raise ValueError(msg) |
|
|
95 |
if (not exists(join(out, 'evaluation')) |
|
|
96 |
or not exists(join(out, 'evaluation', 'settings.json'))): |
|
|
97 |
print("Setting up evaluation project.") |
|
|
98 |
eP = sf.Project( |
|
|
99 |
join(out, 'evaluation'), |
|
|
100 |
sources=['Evaluation'], |
|
|
101 |
annotations=val_anns |
|
|
102 |
) |
|
|
103 |
eP.add_source( |
|
|
104 |
name='Evaluation', |
|
|
105 |
slides=val_slides, |
|
|
106 |
roi=val_slides, |
|
|
107 |
tiles=join(out, 'evaluation', 'tiles'), |
|
|
108 |
tfrecords=join(out, 'evaluation', 'tfrecords') |
|
|
109 |
) |
|
|
110 |
print(f"Evaluation project setup at {join(out, 'evaluation')}.") |
|
|
111 |
else: |
|
|
112 |
eP = sf.Project(join(out, 'evaluation')) |
|
|
113 |
print("Loading evaluation project which already exists.") |
|
|
114 |
|
|
|
115 |
# --- Perform tile extraction --------------------------------------------- |
|
|
116 |
|
|
|
117 |
print("Extracting tiles from WSIs at 299px, 302um") |
|
|
118 |
for P in (eP, tP): |
|
|
119 |
P.extract_tiles( |
|
|
120 |
tile_px=299, |
|
|
121 |
tile_um=302, |
|
|
122 |
qc='both', |
|
|
123 |
img_format='png' |
|
|
124 |
) |
|
|
125 |
print("Extracting tiles from WSIs at 512px, 400um (for GAN training)") |
|
|
126 |
for P in (eP, tP): |
|
|
127 |
P.extract_tiles( |
|
|
128 |
tile_px=512, |
|
|
129 |
tile_um=400, |
|
|
130 |
qc='both', |
|
|
131 |
img_format='png' |
|
|
132 |
) |
|
|
133 |
print("Finished tile extraction, project configuration complete.") |
|
|
134 |
|
|
|
135 |
# --- Save GAN training configuration ------------------------------------- |
|
|
136 |
|
|
|
137 |
if not exists('gan_config.json'): |
|
|
138 |
gan_config = { |
|
|
139 |
"project_path": join(out, 'training'), |
|
|
140 |
"tile_px": 512, |
|
|
141 |
"tile_um": 400, |
|
|
142 |
"model_type": "categorical", |
|
|
143 |
"outcomes": [outcome], |
|
|
144 |
"filters": {outcome: [outcome1, outcome2]} |
|
|
145 |
} |
|
|
146 |
sf.util.write_json(gan_config, 'gan_config.json') |
|
|
147 |
print("Wrote GAN configuration to gan_config.json") |
|
|
148 |
else: |
|
|
149 |
print("GAN configuration already exists at gan_config.json") |
|
|
150 |
|
|
|
151 |
|
|
|
152 |
# ---------------------------------------------------------------------------- |
|
|
153 |
|
|
|
154 |
if __name__ == "__main__": |
|
|
155 |
configure_projects() # pylint: disable=no-value-for-parameter |
|
|
156 |
|
|
|
157 |
# ---------------------------------------------------------------------------- |