|
a |
|
b/test/entlink/test_multi_entity.py |
|
|
1 |
import json |
|
|
2 |
import re |
|
|
3 |
from pathlib import Path |
|
|
4 |
from zensols.nlp import FeatureDocument, FeatureDocumentParser, FeatureToken |
|
|
5 |
from util import TestBase |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
class TestMultiEntity(TestBase): |
|
|
9 |
def test_multi_entity(self): |
|
|
10 |
def filter_json_line(s: str) -> bool: |
|
|
11 |
return re.match(r'^\s*"context_similarity":\s*-?1,?\s*$', s) is None |
|
|
12 |
|
|
|
13 |
DEBUG: bool = False |
|
|
14 |
WRITE: bool = False |
|
|
15 |
sent = 'John was diagnosed with kidney failure. He has lung cancer too.' |
|
|
16 |
parser: FeatureDocumentParser = \ |
|
|
17 |
self._get_doc_parser('mednlp-add-linker') |
|
|
18 |
doc: FeatureDocument = parser.parse(sent) |
|
|
19 |
if DEBUG: |
|
|
20 |
tok: FeatureToken |
|
|
21 |
for tok in doc.token_iter(): |
|
|
22 |
print(tok, tok.definition_) |
|
|
23 |
tok.write() |
|
|
24 |
path = Path('test-resources/entlink-features.json') |
|
|
25 |
json_str = doc.asjson(indent=4) |
|
|
26 |
obj = json.loads(json_str) |
|
|
27 |
for s in obj['sentences']: |
|
|
28 |
for t in s['tokens']: |
|
|
29 |
del t['context_similarity'] |
|
|
30 |
if WRITE: |
|
|
31 |
with open(path, 'w') as f: |
|
|
32 |
line: str |
|
|
33 |
# enable to re-write `should` test data for API changes; but |
|
|
34 |
# have to remove all `context_simirity` entries |
|
|
35 |
for line in filter(filter_json_line, json_str.split('\n')): |
|
|
36 |
f.write(line + '\n') |
|
|
37 |
with open(path) as f: |
|
|
38 |
should = json.load(f) |
|
|
39 |
self.assertEqual(should, obj) |