[7fc5df]: / deidentify / tokenizer / tokenizer_en.py

Download this file

12 lines (6 with data), 212 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
import spacy
from deidentify.tokenizer import Tokenizer
NLP = spacy.load('en_core_web_sm')
class TokenizerEN(Tokenizer):
def parse_text(self, text: str) -> spacy.tokens.doc.Doc:
return NLP(text)