Diff of /app/st_utils.py [000000] .. [d69072]

Switch to unified view

a b/app/st_utils.py
1
import sys
2
import os
3
4
sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
from src.utils.parse_data import parse_concept, parse_ast, find_char_indexes
6
import pandas as pd
7
from tqdm import tqdm
8
import spacy_streamlit
9
10
11
def visualize_record(record, task="assertion"):
12
    """
13
    Visualize a single record.
14
    task can be "concept" or "assertion"
15
    """
16
    assert task in ["concept", "assertion"], "task must be 'concept' or 'assertion'"
17
    
18
    filename = record["filename"]
19
    with open("data/train/txt" + os.sep + filename + ".txt") as f:
20
        text = f.read()
21
    lines = text.split("\n")
22
23
    # parse file concepts and assertions
24
    if task == "concept":
25
        df = pd.DataFrame(parse_concept("data/train/concept/" + filename + ".con"))
26
        df = df.apply(find_char_indexes, axis=1, args=(text,))
27
        df = df.rename(columns={"start_char_index": "start", "end_char_index": "end", "concept_type": "label"})
28
        possible_labels = ["problem", "test", "treatment"]
29
    elif task == "assertion":
30
        df = pd.DataFrame(parse_ast("data/train/ast/" + filename + ".ast"))
31
        df = df.apply(find_char_indexes, axis=1, args=(text,))
32
        df = df.rename(columns={"start_char_index": "start", "end_char_index": "end", "assertion_type": "label"})
33
        possible_labels = ["present", "possible", "absent", "conditional", "hypothetical", "associated_with_someone_else"]
34
35
    doc = [
36
        {
37
            "text": line,
38
            "ents": df[df["start_line"] == i + 1][["start", "end", "label"]].to_dict(orient="records"),
39
        }
40
        for i, line in enumerate(lines)
41
    ]
42
43
    return spacy_streamlit.visualize_ner(
44
        doc,
45
        labels=possible_labels,
46
        show_table=False,
47
        title="",
48
        manual=True,
49
        displacy_options={
50
            "colors": {
51
                "problem": "#f08080",
52
                "treatment": "#9bddff",
53
                "test": "#ffdab9",
54
                "present": "#f08080",
55
                "possible": "#00ffff",
56
                "absent": "#ff00ff",
57
                "conditional": "#ffa500",
58
                "hypothetical": "#ffdab9",
59
                "associated_with_someone_else": "#00ff7f"
60
            },
61
        },
62
    )
63
64
65
if __name__ == "__main__":
66
    visualize_record({"filename": "018636330_DH"})