[6c353a]: / medacy / pipelines / drug_event_pipeline.py

Download this file

45 lines (33 with data), 1.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import spacy
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor
from medacy.pipeline_components.feature_overlayers.lexicon_component import LexiconOverlayer
from medacy.pipeline_components.feature_overlayers.metamap.metamap_all_types_component import MetaMapAllTypesOverlayer
from medacy.pipeline_components.feature_overlayers.table_matcher_component import TableMatcherOverlayer
from medacy.pipeline_components.learners.crf_learner import get_crf
from medacy.pipeline_components.tokenizers.character_tokenizer import CharacterTokenizer
from medacy.pipelines.base.base_pipeline import BasePipeline
class DrugEventPipeline(BasePipeline):
"""
Pipeline for recognition of adverse drug events from the 2018/19 FDA OSE drug label challenge
Created by Corey Sutphin of NLP@VCU
"""
def __init__(self, entities, metamap=None, lexicon={}, **kwargs):
"""
Init a pipeline for processing data related to identifying adverse drug events
:param entities: a list of entities
:param metamap: instance of MetaMap
:param entities: entities to be identified, for this pipeline adverse drug events
:param lexicon: Dictionary with labels and their corresponding lexicons to match on
"""
super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
if metamap:
self.add_component(MetaMapAllTypesOverlayer, metamap)
if lexicon is not None:
self.add_component(LexiconOverlayer, lexicon)
self.add_component(TableMatcherOverlayer)
def get_learner(self):
return "CRF_l2sgd", get_crf()
def get_tokenizer(self):
return CharacterTokenizer(self.spacy_pipeline)
def get_feature_extractor(self):
return FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'like_num', 'text', 'head'])