Diff of /label.py [000000] .. [5e0db2]

Switch to unified view

a b/label.py
1
"""Entry-point script to label radiology reports."""
2
import pandas as pd
3
4
from args import ArgParser
5
from loader import Loader
6
from stages import Extractor, Classifier, Aggregator
7
from constants import *
8
9
10
def write(reports, labels, output_path, verbose=False):
11
    """Write labeled reports to specified path."""
12
    labeled_reports = pd.DataFrame({REPORTS: reports})
13
    for index, category in enumerate(CATEGORIES):
14
        labeled_reports[category] = labels[:, index]
15
16
    if verbose:
17
        print(f"Writing reports and labels to {output_path}.")
18
    labeled_reports[[REPORTS] + CATEGORIES].to_csv(output_path,
19
                                                   index=False)
20
21
22
def label(args):
23
    """Label the provided report(s)."""
24
25
    loader = Loader(args.reports_path,
26
                    args.sections_to_extract,
27
                    args.extract_strict)
28
29
    extractor = Extractor(args.mention_phrases_dir,
30
                          args.unmention_phrases_dir,
31
                          verbose=args.verbose)
32
    classifier = Classifier(args.pre_negation_uncertainty_path,
33
                            args.negation_path,
34
                            args.post_negation_uncertainty_path,
35
                            verbose=args.verbose)
36
    aggregator = Aggregator(CATEGORIES,
37
                            verbose=args.verbose)
38
39
    # Load reports in place.
40
    loader.load()
41
    # Extract observation mentions in place.
42
    extractor.extract(loader.collection)
43
    # Classify mentions in place.
44
    classifier.classify(loader.collection)
45
    # Aggregate mentions to obtain one set of labels for each report.
46
    labels = aggregator.aggregate(loader.collection)
47
48
    write(loader.reports, labels, args.output_path, args.verbose)
49
50
51
if __name__ == "__main__":
52
    parser = ArgParser()
53
    label(parser.parse_args())