[6c353a]: / medacy / pipelines / bert_pipeline.py

Download this file

71 lines (57 with data), 2.5 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
"""
BERT Pipeline
"""
import spacy
from medacy.pipelines.base.base_pipeline import BasePipeline
from medacy.pipeline_components import BertLearner
from medacy.pipeline_components import TextExtractor
from medacy.pipeline_components import SystematicReviewTokenizer
# These default values are used here and by the CLI
LEARNING_RATE = 1e-5
BATCH_SIZE = 8
EPOCHS = 3
class BertPipeline(BasePipeline):
"""
Pipeline tuned for the extraction of ADE related entities from the 2018', 'N2C2 Shared Task
Created by Jorge Vargas of NLP@VCU
"""
def __init__(self, entities, **kwargs):
"""
Create a pipeline with the name 'bert_pipeline' utilizing
by default spaCy's small english model.
:param entities: Possible entities.
:param cuda_device: Which cuda device to use. -1 for CPU.
:param batch_size: Batch size to use during training.
:param learning_rate: Learning rate to use during training.
:param epochs: Number of epochs to use for training.
"""
super().__init__(entities=entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
self.cuda_device = kwargs['cuda_device'] if 'cuda_device' in kwargs else -1
self.batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else BATCH_SIZE
self.learning_rate = kwargs['learning_rate'] if 'learning_rate' in kwargs else LEARNING_RATE
self.epochs = kwargs['epochs'] if 'epochs' in kwargs else EPOCHS
self.pretrained_model = kwargs['pretrained_model'] if 'pretrained_model' in kwargs else 'bert-large-cased'
self.using_crf = kwargs['using_crf'] if 'using_crf' in kwargs else False
def get_learner(self):
"""Get the learner object for this pipeline.
:return: BertLearner.
"""
learner = BertLearner(
self.cuda_device,
pretrained_model=self.pretrained_model,
batch_size=self.batch_size,
learning_rate=self.learning_rate,
epochs=self.epochs,
using_crf=self.using_crf
)
return 'BERT', learner
def get_tokenizer(self):
"""Get tokenizer for this pipeline.
:return: Systematic review tokenizer.
"""
return SystematicReviewTokenizer(self.spacy_pipeline)
def get_feature_extractor(self):
"""Get feature extractor for this pipeline.
:return: Text only extractor.
"""
return TextExtractor()