a b/demo.py
1
from deidentify.base import Document
2
from deidentify.taggers import FlairTagger
3
from deidentify.tokenizer import TokenizerFactory
4
5
# Create some text
6
text = (
7
    "Dit is stukje tekst met daarin de naam Jan Jansen. De patient J. Jansen (e: "
8
    "j.jnsen@email.com, t: 06-12345678) is 64 jaar oud en woonachtig in Utrecht. Hij werd op 10 "
9
    "oktober door arts Peter de Visser ontslagen van de kliniek van het UMCU."
10
)
11
12
# Wrap text in document
13
documents = [
14
    Document(name='doc_01', text=text)
15
]
16
17
# Select downloaded model
18
model = 'model_bilstmcrf_ons_fast-v0.2.0'
19
20
# Instantiate tokenizer
21
tokenizer = TokenizerFactory().tokenizer(corpus='ons', disable=("tagger", "ner"))
22
23
# Load tagger with a downloaded model file and tokenizer
24
tagger = FlairTagger(model=model, tokenizer=tokenizer, verbose=False)
25
26
# Annotate your documents
27
annotated_docs = tagger.annotate(documents)
28
29
30
from pprint import pprint
31
32
first_doc = annotated_docs[0]
33
pprint(first_doc.annotations)
34
35
36
from deidentify.util import mask_annotations
37
38
masked_doc = mask_annotations(first_doc)
39
print(masked_doc.text)
40
41
42
from deidentify.util import surrogate_annotations
43
44
iter_docs = surrogate_annotations(docs=[first_doc], seed=1)
45
surrogate_doc = list(iter_docs)[0]
46
print(surrogate_doc.text)