|
a |
|
b/medacy/pipelines/testing_pipeline.py |
|
|
1 |
import spacy |
|
|
2 |
|
|
|
3 |
from medacy.pipeline_components.feature_extractors.discrete_feature_extractor import FeatureExtractor |
|
|
4 |
from medacy.pipeline_components.learners.crf_learner import get_crf |
|
|
5 |
from medacy.pipeline_components.tokenizers.systematic_review_tokenizer import SystematicReviewTokenizer |
|
|
6 |
from medacy.pipelines.base.base_pipeline import BasePipeline |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
class TestingPipeline(BasePipeline): |
|
|
10 |
""" |
|
|
11 |
A pipeline for test running |
|
|
12 |
""" |
|
|
13 |
|
|
|
14 |
def __init__(self, entities, **kwargs): |
|
|
15 |
""" |
|
|
16 |
Create a pipeline with the name 'clinical_pipeline' utilizing |
|
|
17 |
by default spaCy's small english model. |
|
|
18 |
""" |
|
|
19 |
|
|
|
20 |
super().__init__(entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs) |
|
|
21 |
|
|
|
22 |
def get_learner(self): |
|
|
23 |
return "CRF_l2sgd", get_crf() |
|
|
24 |
|
|
|
25 |
def get_tokenizer(self): |
|
|
26 |
return SystematicReviewTokenizer(self.spacy_pipeline) |
|
|
27 |
|
|
|
28 |
def get_feature_extractor(self): |
|
|
29 |
return FeatureExtractor(window_size=3, spacy_features=['pos_', 'shape_', 'prefix_', 'suffix_', 'text']) |