medaCy / Git / Diff of /medacy/pipelines/bert

Models:
philipB/
medaCy
Downloads: 1
Diff of /medacy/pipelines/bert_pipeline.py [000000] .. [6c353a]
Switch to side-by-side view

--- a
+++ b/medacy/pipelines/bert_pipeline.py
@@ -0,0 +1,70 @@
+"""
+BERT Pipeline
+"""
+import spacy
+
+from medacy.pipelines.base.base_pipeline import BasePipeline
+from medacy.pipeline_components import BertLearner
+from medacy.pipeline_components import TextExtractor
+from medacy.pipeline_components import SystematicReviewTokenizer
+
+# These default values are used here and by the CLI
+LEARNING_RATE = 1e-5
+BATCH_SIZE = 8
+EPOCHS = 3
+
+class BertPipeline(BasePipeline):
+    """
+    Pipeline tuned for the extraction of ADE related entities from the 2018', 'N2C2 Shared Task
+
+    Created by Jorge Vargas of NLP@VCU
+    """
+
+    def __init__(self, entities, **kwargs):
+        """
+        Create a pipeline with the name 'bert_pipeline' utilizing
+        by default spaCy's small english model.
+
+        :param entities: Possible entities.
+        :param cuda_device: Which cuda device to use. -1 for CPU.
+        :param batch_size: Batch size to use during training.
+        :param learning_rate: Learning rate to use during training.
+        :param epochs: Number of epochs to use for training.
+        """
+        super().__init__(entities=entities, spacy_pipeline=spacy.load("en_core_web_sm"), **kwargs)
+
+        self.cuda_device = kwargs['cuda_device'] if 'cuda_device' in kwargs else -1
+        self.batch_size = kwargs['batch_size'] if 'batch_size' in kwargs else BATCH_SIZE
+        self.learning_rate = kwargs['learning_rate'] if 'learning_rate' in kwargs else LEARNING_RATE
+        self.epochs = kwargs['epochs'] if 'epochs' in kwargs else EPOCHS
+        self.pretrained_model = kwargs['pretrained_model'] if 'pretrained_model' in kwargs else 'bert-large-cased'
+        self.using_crf = kwargs['using_crf'] if 'using_crf' in kwargs else False
+
+    def get_learner(self):
+        """Get the learner object for this pipeline.
+
+        :return: BertLearner.
+        """
+        learner = BertLearner(
+            self.cuda_device,
+            pretrained_model=self.pretrained_model,
+            batch_size=self.batch_size,
+            learning_rate=self.learning_rate,
+            epochs=self.epochs,
+            using_crf=self.using_crf
+        )
+        return 'BERT', learner
+
+    def get_tokenizer(self):
+        """Get tokenizer for this pipeline.
+
+        :return: Systematic review tokenizer.
+        """
+        return SystematicReviewTokenizer(self.spacy_pipeline)
+
+    def get_feature_extractor(self):
+        """Get feature extractor for this pipeline.
+
+        :return: Text only extractor.
+        """
+        return TextExtractor()