[735bb5]: / src / features / relative_distance_feature.py

Download this file

87 lines (68 with data), 2.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Base Dependencies
# ----------------
from typing import List, Tuple
# Local Dependencies
# ------------------
from models import RelationCollection
# 3rd-Party Dependencies
# ----------------------
from spacy.tokens import Doc
from sklearn.base import BaseEstimator
class RelativeDistanceFeature(BaseEstimator):
"""Relative Distance Feature
Relative distance encoding for each of the two entities e1, e2 in a relation.
It marks the positions of all the words in a target entity as 0.
Every word to its right is assigned an incrementally higher distance number and
every word to its left is assigned an incrementally lower number.
"""
def __ini__(self):
pass
def get_feature_names(self, input_features=None):
return ["relative_distance"]
def relative_distance(self, collection: RelationCollection) -> Tuple[List, List]:
"""
Args:
collection (RelationCollection): collection on whose relations the relative
distance feature is computed
Returns:
relative distance of every token with respect to e1 and e2
"""
e1_rd_all = []
e2_rd_all = []
entities1: List[Doc] = collection.entities1_tokens
entities2: List[Doc] = collection.entities2_tokens
left: List[Doc] = collection.left_tokens
middle: List[Doc] = collection.middle_tokens
right: List[Doc] = collection.right_tokens
for i in range(len(collection)):
# relative distance to e1
before_e1 = list(map(lambda x: x - len(left[i]), range(len(left[i]))))
e1 = [0] * len(entities1[i])
after_e1 = list(
map(
lambda x: x + 1,
range(len(middle[i]) + len(entities2[i]) + len(right[i])),
)
)
e1_rd = before_e1 + e1 + after_e1
# relative distance to e2
before_e2 = list(range(len(left[i]) + len(entities1[i]) + len(middle[i])))
before_e2 = list(map(lambda x: x - len(before_e2), before_e2))
e2 = [0] * len(entities2[i])
after_e2 = list(map(lambda x: x + 1, range(len(right[i]))))
e2_rd = before_e2 + e2 + after_e2
e1_rd_all.append(e1_rd)
e2_rd_all.append(e2_rd)
assert len(e1_rd_all) == len(collection)
assert len(e1_rd_all) == len(e2_rd_all)
return e1_rd_all, e2_rd_all
def fit(self, x: RelationCollection, y=None):
return self
def transform(
self, x: RelationCollection
) -> Tuple[List[List[int]], List[List[int]], List[List[int]]]:
return self.relative_distance(x)
def fit_transform(
self, x: RelationCollection, y=None
) -> Tuple[List[List[int]], List[List[int]], List[List[int]]]:
return self.relative_distance(x)