Switch to unified view

a b/test/entlink/test_multi_entity.py
1
import json
2
import re
3
from pathlib import Path
4
from zensols.nlp import FeatureDocument, FeatureDocumentParser, FeatureToken
5
from util import TestBase
6
7
8
class TestMultiEntity(TestBase):
9
    def test_multi_entity(self):
10
        def filter_json_line(s: str) -> bool:
11
            return re.match(r'^\s*"context_similarity":\s*-?1,?\s*$', s) is None
12
13
        DEBUG: bool = False
14
        WRITE: bool = False
15
        sent = 'John was diagnosed with kidney failure. He has lung cancer too.'
16
        parser: FeatureDocumentParser = \
17
            self._get_doc_parser('mednlp-add-linker')
18
        doc: FeatureDocument = parser.parse(sent)
19
        if DEBUG:
20
            tok: FeatureToken
21
            for tok in doc.token_iter():
22
                print(tok, tok.definition_)
23
                tok.write()
24
        path = Path('test-resources/entlink-features.json')
25
        json_str = doc.asjson(indent=4)
26
        obj = json.loads(json_str)
27
        for s in obj['sentences']:
28
            for t in s['tokens']:
29
                del t['context_similarity']
30
        if WRITE:
31
            with open(path, 'w') as f:
32
                line: str
33
                # enable to re-write `should` test data for API changes; but
34
                # have to remove all `context_simirity` entries
35
                for line in filter(filter_json_line, json_str.split('\n')):
36
                    f.write(line + '\n')
37
        with open(path) as f:
38
            should = json.load(f)
39
        self.assertEqual(should, obj)