[735bb5]: / src / features / position_feature.py

Download this file

90 lines (70 with data), 2.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Base Dependencies
# ----------------
import numpy
# Local Dependencies
# ------------------
from models import RelationCollection
# 3rd-Party Dependencies
# ----------------------
from sklearn.base import BaseEstimator
# Constants
# ---------
from constants import (
N2C2_ATTR_ENTITY_CANDIDATES,
DDI_ATTR_ENTITY_CANDIDATES,
)
class PositionFeature(BaseEstimator):
"""
Position Distance
Computes the position of the entity candidate (drug) with respect to
the attribute among the entire entity candidates of the attribute, where
the position of medical attribute is set to 0.
Source:
Alimova and Tutubalina (2020) - Multiple features for clinical relation extraction: A machine learning approach
"""
def __init__(self, dataset: str):
if dataset == "n2c2":
self.attr_entity_candidates = N2C2_ATTR_ENTITY_CANDIDATES
elif dataset == "ddi":
self.attr_entity_candidates = DDI_ATTR_ENTITY_CANDIDATES
else:
raise ValueError(
"only datasets 'n2c2' and 'ddi' are supported, but no '{}'".format(
dataset
)
)
self.dataset = dataset
def get_feature_names(self, input_features=None):
return ["position_1", "position_2"]
def create_position_feature(self, collection: RelationCollection) -> numpy.array:
features = []
for r in collection.relations:
feature = [0] * 2
attr, drug = r._ordered_entities
candidates = self.attr_entity_candidates[attr.type]
# count middle entities which could form the same type of relation
# i.e., count number of middle entities that are drugs for n2c2 and DDI
position = 0
for ent in r.middle_entities:
if ent.type in candidates:
position += 1
ent1 = r.entity1
ent2 = r.entity2
# if the attribute is the first entity, the position is positive
if ent1.type == attr.type:
feature[0] = 0
feature[1] = position
# if the attribute is the second entity, the position is negative
elif ent2.type == attr.type:
feature[0] = -position
feature[1] = 0
else:
raise ValueError("none of the entities correspond with the attribute")
features.append(feature)
return numpy.array(features)
def fit(self, x: RelationCollection, y=None):
return self
def transform(self, x: RelationCollection) -> numpy.array:
return self.create_position_feature(x)
def fit_transform(self, x: RelationCollection, y=None) -> numpy.array:
return self.create_position_feature(x)