a b/medacy/pipelines/drug_event_pipeline.py
1
import spacy
2
3
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor
4
from medacy.pipeline_components.feature_overlayers.lexicon_component import LexiconOverlayer
5
from medacy.pipeline_components.feature_overlayers.metamap.metamap_all_types_component import MetaMapAllTypesOverlayer
6
from medacy.pipeline_components.feature_overlayers.table_matcher_component import TableMatcherOverlayer
7
from medacy.pipeline_components.learners.crf_learner import get_crf
8
from medacy.pipeline_components.tokenizers.character_tokenizer import CharacterTokenizer
9
from medacy.pipelines.base.base_pipeline import BasePipeline
10
11
12
class DrugEventPipeline(BasePipeline):
13
    """
14
    Pipeline for recognition of adverse drug events from the 2018/19 FDA OSE drug label challenge
15
16
    Created by Corey Sutphin of NLP@VCU
17
    """
18
19
    def __init__(self, entities, metamap=None, lexicon={}, **kwargs):
20
        """
21
        Init a pipeline for processing data related to identifying adverse drug events
22
        :param entities: a list of entities
23
        :param metamap: instance of MetaMap
24
        :param entities: entities to be identified, for this pipeline adverse drug events
25
        :param lexicon: Dictionary with labels and their corresponding lexicons to match on
26
        """
27
        super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
28
29
        if metamap:
30
            self.add_component(MetaMapAllTypesOverlayer, metamap)
31
32
        if lexicon is not None:
33
            self.add_component(LexiconOverlayer, lexicon)
34
35
        self.add_component(TableMatcherOverlayer)
36
37
    def get_learner(self):
38
        return "CRF_l2sgd", get_crf()
39
40
    def get_tokenizer(self):
41
        return CharacterTokenizer(self.spacy_pipeline)
42
43
    def get_feature_extractor(self):
44
        return FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'like_num', 'text', 'head'])