a b/medacy/pipelines/testing_pipeline.py
1
import spacy
2
3
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor
4
from medacy.pipeline_components.learners.crf_learner import get_crf
5
from medacy.pipeline_components.tokenizers.systematic_review_tokenizer import SystematicReviewTokenizer
6
from medacy.pipelines.base.base_pipeline import BasePipeline
7
8
9
class TestingPipeline(BasePipeline):
10
    """
11
    A pipeline for test running
12
    """
13
14
    def __init__(self, entities, **kwargs):
15
        """
16
        Create a pipeline with the name 'clinical_pipeline' utilizing
17
        by default spaCy's small english model.
18
        """
19
20
        super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
21
22
    def get_learner(self):
23
        return "CRF_l2sgd", get_crf()
24
25
    def get_tokenizer(self):
26
        return SystematicReviewTokenizer(self.spacy_pipeline)
27
28
    def get_feature_extractor(self):
29
        return FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'text'])