al-medical-RE / Git / Diff of /src/features/negation

Models:
philipB/
al-medical-RE
Downloads: 1
Diff of /src/features/negation_feature.py [000000] .. [735bb5]
Switch to side-by-side view

--- a
+++ b/src/features/negation_feature.py
@@ -0,0 +1,65 @@
+# Base Dependencies
+# ----------------
+import numpy
+
+# Local Dependencies
+# ------------------
+from models.relation_collection import RelationCollection
+
+# 3rd-Party Dependencies
+# ----------------------
+from sklearn.base import BaseEstimator
+
+
+class NegationFeature(BaseEstimator):
+    """
+    NegationFeature
+
+    Determines if a relation:
+        1. does not contain `no`, `n't` or `not`.
+        2. doen't contain  any of the following phrases: "not recommended", "should not be", "must not be"
+        3. No target entity mention appears in the sentence after “no”, “n’t” or “not”
+
+    Source:
+        Chowdhury and Lavelli (2013) - Exploiting the Scope of Negations and Heterogeneous Features for Relation
+        Extraction: A Case Study for Drug-Drug Interaction Extraction
+    """
+
+    def __init__(self):
+        pass
+
+    def get_feature_names(self, input_features=None):
+        return ["no_word", "no_phrase", "no_target"]
+
+    def compute_not_feature(self, collection: RelationCollection) -> numpy.array:
+        features = []
+
+        for i in range(len(collection)):
+            feature = [1, 1, 1]
+            # 1. does not contain “no”, “n’t” or “not”
+            for token in collection.tokens[i]:
+                if token.lemma_ in ["no", "not"]:
+                    feature[0] = 0
+
+            # 2. hasn't any of the following phrases
+            for phrase in ["not recommended", "should not be", "must not be"]:
+                if phrase in collection.relations[i].text:
+                    feature[1] = 0
+
+            # 3. No target entity mention appears in the sentence after “no”, “n’t” or “not”
+            for token in collection.left_tokens[i]:
+                if token.lemma_ in ["no", "not"]:
+                    feature[2] = 0
+
+            features.append(feature)
+
+        return numpy.array(features)
+
+    def fit(self, x: RelationCollection, y=None):
+        return self
+
+    def transform(self, x: RelationCollection, y=None) -> numpy.array:
+        return self.compute_not_feature(x)
+
+    def fit_transform(self, x: RelationCollection, y=None) -> numpy.array:
+        return self.compute_not_feature(x)