Switch to unified view

a b/medacy/pipelines/systematic_review_pipeline.py
1
import spacy
2
3
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor
4
from medacy.pipeline_components.feature_overlayers.metamap.metamap import MetaMap
5
from medacy.pipeline_components.feature_overlayers.metamap.metamap_component import MetaMapOverlayer
6
from medacy.pipeline_components.learners.crf_learner import get_crf
7
from medacy.pipeline_components.tokenizers.systematic_review_tokenizer import SystematicReviewTokenizer
8
from medacy.pipelines.base.base_pipeline import BasePipeline
9
10
11
class SystematicReviewPipeline(BasePipeline):
12
    """
13
    A pipeline for clinical named entity recognition. This pipeline was designed over-top of the TAC 2018 SRIE track
14
    challenge.
15
16
    Created by Andriy Mulyar (andriymulyar.com) of NLP@VCU
17
    """
18
19
20
    def __init__(self, entities, metamap=None, **kwargs):
21
        """
22
        Create a pipeline with the name 'clinical_pipeline' utilizing
23
        by default spaCy's small english model.
24
25
        :param entities: a list of entities
26
        :param metamap: an instance of MetaMap
27
        """
28
29
        super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
30
31
        if metamap:
32
            metamap = MetaMap(metamap)
33
            self.add_component(MetaMapOverlayer, metamap)
34
35
    def get_learner(self):
36
        return "CRF_l2sgd", get_crf()
37
38
    def get_tokenizer(self):
39
        return SystematicReviewTokenizer(self.spacy_pipeline)
40
41
    def get_feature_extractor(self):
42
        return FeatureExtractor(window_size=10, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'text'])