a b/src/nlp_pipeline.py
1
# Base Dependencies
2
# -----------------
3
import logging
4
from typing import List 
5
6
# Spacy Dependencies
7
# ------------------
8
from negspacy.negation import Negex
9
from spacy import load as spacy_load
10
from spacy.language import Language
11
from spacy.tokens import Doc, Span
12
13
# Constants
14
# ---------
15
from constants import N2C2_ENTITY_TYPES, DDI_ENTITY_TYPES
16
17
18
# Spacy's pipeline
19
NLP: Language = None
20
21
22
# Auxiliar functions
23
# ------------------
24
def get_pipeline() -> Language:
25
    """Gets Spacy's pipeline, loading it if necessary.
26
27
    Returns:
28
        Language: Spacy's pipeline singleton
29
    """
30
    global NLP
31
32
    # load only once
33
    if NLP is None:
34
        logging.warning("Loading Spacy's pipeline...")
35
36
        # load Scispacy's pipeline
37
        NLP = spacy_load("en_core_sci_sm", exclude=["ner"])
38
39
        # add negation detection component
40
        ent_types = [t.upper() for t in N2C2_ENTITY_TYPES] + [
41
            t.upper() for t in DDI_ENTITY_TYPES
42
        ]
43
        NLP.add_pipe("negex", config={"ent_types": ent_types})
44
45
        logging.warning("Spacy loaded!")
46
    return NLP
47
48
49
def set_spacy_entities(
50
    relation: Doc, 
51
    left_tokens: Doc,
52
    entity1_tokens: Doc,
53
    entity1_type: str,
54
    middle_tokens: Doc,
55
    entity2_tokens: Doc,
56
    entity2_type: str, 
57
    right_tokens: Doc,
58
) -> List[Span]:
59
    """_summary_
60
61
    Args:
62
        relation (Doc): _description_
63
        left_tokens (Doc): _description_
64
        entity1_tokens (Doc): _description_
65
        entity1_type (str): _description_
66
        middle_tokens (Doc): _description_
67
        entity2_tokens (Doc): _description_
68
        entity2_type (str): _description_
69
        right_tokens (Doc): _description_
70
71
    Returns:
72
        List[Span]: _description_
73
    """
74
75
    begin_e1 = len(left_tokens)
76
    end_e1 = begin_e1 + len(entity1_tokens)
77
78
    begin_e2 = end_e1 + len(middle_tokens)
79
    end_e2 = begin_e2 + len(entity2_tokens)
80
81
    e1 = Span(relation, begin_e1, end_e1, label=entity1_type)
82
    e2 = Span(relation, begin_e2, end_e2, label=entity2_type)
83
    
84
    relation.ents = [e1, e2]