Diff of /src/nlp_pipeline.py [000000] .. [735bb5]

Switch to side-by-side view

--- a
+++ b/src/nlp_pipeline.py
@@ -0,0 +1,84 @@
+# Base Dependencies
+# -----------------
+import logging
+from typing import List 
+
+# Spacy Dependencies
+# ------------------
+from negspacy.negation import Negex
+from spacy import load as spacy_load
+from spacy.language import Language
+from spacy.tokens import Doc, Span
+
+# Constants
+# ---------
+from constants import N2C2_ENTITY_TYPES, DDI_ENTITY_TYPES
+
+
+# Spacy's pipeline
+NLP: Language = None
+
+
+# Auxiliar functions
+# ------------------
+def get_pipeline() -> Language:
+    """Gets Spacy's pipeline, loading it if necessary.
+
+    Returns:
+        Language: Spacy's pipeline singleton
+    """
+    global NLP
+
+    # load only once
+    if NLP is None:
+        logging.warning("Loading Spacy's pipeline...")
+
+        # load Scispacy's pipeline
+        NLP = spacy_load("en_core_sci_sm", exclude=["ner"])
+
+        # add negation detection component
+        ent_types = [t.upper() for t in N2C2_ENTITY_TYPES] + [
+            t.upper() for t in DDI_ENTITY_TYPES
+        ]
+        NLP.add_pipe("negex", config={"ent_types": ent_types})
+
+        logging.warning("Spacy loaded!")
+    return NLP
+
+
+def set_spacy_entities(
+    relation: Doc, 
+    left_tokens: Doc,
+    entity1_tokens: Doc,
+    entity1_type: str,
+    middle_tokens: Doc,
+    entity2_tokens: Doc,
+    entity2_type: str, 
+    right_tokens: Doc,
+) -> List[Span]:
+    """_summary_
+
+    Args:
+        relation (Doc): _description_
+        left_tokens (Doc): _description_
+        entity1_tokens (Doc): _description_
+        entity1_type (str): _description_
+        middle_tokens (Doc): _description_
+        entity2_tokens (Doc): _description_
+        entity2_type (str): _description_
+        right_tokens (Doc): _description_
+
+    Returns:
+        List[Span]: _description_
+    """
+
+    begin_e1 = len(left_tokens)
+    end_e1 = begin_e1 + len(entity1_tokens)
+
+    begin_e2 = end_e1 + len(middle_tokens)
+    end_e2 = begin_e2 + len(entity2_tokens)
+
+    e1 = Span(relation, begin_e1, end_e1, label=entity1_type)
+    e2 = Span(relation, begin_e2, end_e2, label=entity2_type)
+    
+    relation.ents = [e1, e2]