[ca4dac]: / test / python / test_combined.py

Download this file

80 lines (64 with data), 3.2 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from typing import List, Dict, Set, Any
from collections import OrderedDict
import json
from zensols.nlp import FeatureToken, FeatureDocument, FeatureDocumentParser
from util import TestBase
class TestCombinedParsers(TestBase):
_DEFAULT_ATTRS = 'i i_sent idx norm ent_ ent_iob ent_iob_'.split()
_TRACE = False
def _compare_sents(self, parser_name: str, idx: int, write: bool,
sent: str, attrs: List[str], missing: Set[str]):
def map_tok_features(t: FeatureToken) -> Dict[str, Any]:
# sort keys to make diffing easier
dct = t.asdict()
return OrderedDict(sorted(dct.items(), key=lambda t: t[0]))
actual_file: str = f'test-resources/should/{parser_name}-{idx}.json'
p: FeatureDocumentParser = self._get_doc_parser('combined', parser_name)
doc: FeatureDocument = p(sent)
actuals = tuple(map(map_tok_features, doc.token_iter()))
if self._TRACE:
from pprint import pprint
pprint(actuals)
return
if write:
print(sent)
with open(actual_file, 'w') as f:
json.dump(actuals, f, indent=4, sort_keys=False)
for attr in attrs:
vals: str = ', '.join(map(lambda d: str(d[attr]), actuals))
print(f' {attr}: {vals}')
print('_' * 79)
with open(actual_file) as f:
shoulds: List[Dict[str, Any]] = json.load(f)
attr: str
for attr in attrs:
actual = tuple(map(lambda d: d[attr], actuals))
should = tuple(map(lambda d: d[attr], shoulds))
self.assertEqual(should, actual, f'for attribute: <{attr}>')
if missing is not None:
for attr in missing:
for tok in actuals:
self.assertFalse(hasattr(tok, attr),
f'expected missing {attr} in {tok}')
def _compare(self, parser_name: str, write: bool = False,
attrs: List[str] = None, missing: Set[str] = None):
attrs = TestCombinedParsers._DEFAULT_ATTRS if attrs is None else attrs
for i in range(2):
sent: str = getattr(self, f'text_{i + 1}')
self._compare_sents(parser_name, i, write, sent, attrs, missing)
def test_default(self):
self._compare('doc_parser', missing='cui_'.split())
def test_biomed_ner(self):
self._compare('mednlp_biomed_doc_parser', missing='cui_'.split())
def test_medcat(self):
self._compare('mednlp_medcat_doc_parser',
attrs=self._DEFAULT_ATTRS + 'cui_ tuis_'.split())
def test_biomed_combined(self):
self._compare('mednlp_combine_biomed_doc_parser',
missing='cui_'.split())
def test_medcat_combined(self):
self._compare('mednlp_combine_medcat_doc_parser',
attrs=self._DEFAULT_ATTRS + 'cui_ tuis_'.split())
def test_medcat_biomded_combined(self):
self._compare('mednlp_combine_biomed_medcat_doc_parser',
attrs=self._DEFAULT_ATTRS + 'cui_ tuis_'.split())