|
a |
|
b/medacy/pipelines/drug_event_pipeline.py |
|
|
1 |
import spacy |
|
|
2 |
|
|
|
3 |
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor |
|
|
4 |
from medacy.pipeline_components.feature_overlayers.lexicon_component import LexiconOverlayer |
|
|
5 |
from medacy.pipeline_components.feature_overlayers.metamap.metamap_all_types_component import MetaMapAllTypesOverlayer |
|
|
6 |
from medacy.pipeline_components.feature_overlayers.table_matcher_component import TableMatcherOverlayer |
|
|
7 |
from medacy.pipeline_components.learners.crf_learner import get_crf |
|
|
8 |
from medacy.pipeline_components.tokenizers.character_tokenizer import CharacterTokenizer |
|
|
9 |
from medacy.pipelines.base.base_pipeline import BasePipeline |
|
|
10 |
|
|
|
11 |
|
|
|
12 |
class DrugEventPipeline(BasePipeline): |
|
|
13 |
""" |
|
|
14 |
Pipeline for recognition of adverse drug events from the 2018/19 FDA OSE drug label challenge |
|
|
15 |
|
|
|
16 |
Created by Corey Sutphin of NLP@VCU |
|
|
17 |
""" |
|
|
18 |
|
|
|
19 |
def __init__(self, entities, metamap=None, lexicon={}, **kwargs): |
|
|
20 |
""" |
|
|
21 |
Init a pipeline for processing data related to identifying adverse drug events |
|
|
22 |
:param entities: a list of entities |
|
|
23 |
:param metamap: instance of MetaMap |
|
|
24 |
:param entities: entities to be identified, for this pipeline adverse drug events |
|
|
25 |
:param lexicon: Dictionary with labels and their corresponding lexicons to match on |
|
|
26 |
""" |
|
|
27 |
super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs) |
|
|
28 |
|
|
|
29 |
if metamap: |
|
|
30 |
self.add_component(MetaMapAllTypesOverlayer, metamap) |
|
|
31 |
|
|
|
32 |
if lexicon is not None: |
|
|
33 |
self.add_component(LexiconOverlayer, lexicon) |
|
|
34 |
|
|
|
35 |
self.add_component(TableMatcherOverlayer) |
|
|
36 |
|
|
|
37 |
def get_learner(self): |
|
|
38 |
return "CRF_l2sgd", get_crf() |
|
|
39 |
|
|
|
40 |
def get_tokenizer(self): |
|
|
41 |
return CharacterTokenizer(self.spacy_pipeline) |
|
|
42 |
|
|
|
43 |
def get_feature_extractor(self): |
|
|
44 |
return FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'like_num', 'text', 'head']) |