Switch to unified view

a b/src/features/token_distance_feature.py
1
# Base Dependencies
2
# ----------------
3
import numpy
4
5
# Local Dependencies
6
# ------------------
7
from models import RelationCollection
8
9
# 3rd-Party Dependencies
10
# ----------------------
11
from sklearn.base import BaseEstimator
12
13
14
class TokenDistanceFeature(BaseEstimator):
15
    """
16
    TokenDistanceFeature
17
18
    Computes the number of tokens between the two entities of a relation.
19
    
20
    Source: 
21
        Alimova and Tutubalina (2020) - Multiple features for clinical relation extraction: A machine learning approach
22
    """
23
24
    def __init__(self):
25
        pass
26
27
    def get_feature_names(self, input_features=None):
28
        return ["token_dist"]
29
30
    def create_token_distance_feature(
31
        self, collection: RelationCollection
32
    ) -> numpy.array:
33
        features = []
34
        # max = 1
35
        for doc in collection.middle_tokens:
36
            features.append([len(doc)])
37
            # if len(r.middle_context) > max:
38
            #     max = len(r.middle_context)
39
40
        return numpy.array(features)
41
42
    def fit(self, x: RelationCollection, y=None):
43
        return self
44
45
    def transform(self, x: RelationCollection, y=None) -> numpy.array:
46
        return self.create_token_distance_feature(x)
47
48
    def fit_transform(self, x: RelationCollection, y=None) -> numpy.array:
49
        return self.create_token_distance_feature(x)