|
a |
|
b/test/python/test_parse.py |
|
|
1 |
from typing import Dict |
|
|
2 |
import json |
|
|
3 |
from pathlib import Path |
|
|
4 |
from zensols.nlp import ( |
|
|
5 |
FeatureToken, FeatureSentence, FeatureDocument, FeatureDocumentParser |
|
|
6 |
) |
|
|
7 |
from util import TestBase |
|
|
8 |
|
|
|
9 |
|
|
|
10 |
class TestParse(TestBase): |
|
|
11 |
def test_feature_parse(self): |
|
|
12 |
DEBUG: bool = False |
|
|
13 |
keeps = set('cui_ pref_name_'.split()) |
|
|
14 |
parser: FeatureDocumentParser = self._get_doc_parser() |
|
|
15 |
self.assertTrue(isinstance(parser, FeatureDocumentParser)) |
|
|
16 |
med_toks: Dict[str, str] = [] |
|
|
17 |
doc: FeatureDocument = parser(self.text_1) |
|
|
18 |
for tok in doc.token_iter(): |
|
|
19 |
fd = tok.asdict() |
|
|
20 |
med_toks.append({k: fd[k] for k in fd.keys() & keeps}) |
|
|
21 |
if DEBUG: |
|
|
22 |
print() |
|
|
23 |
for tok in doc.token_iter(): |
|
|
24 |
print(tok, tok.cui_, tok.pref_name_) |
|
|
25 |
print() |
|
|
26 |
for tok in med_toks: |
|
|
27 |
print(tok) |
|
|
28 |
none = FeatureToken.NONE |
|
|
29 |
for i, mtok in enumerate(med_toks): |
|
|
30 |
if DEBUG: |
|
|
31 |
print(i, mtok) |
|
|
32 |
if i >= 4 and i <= 5: |
|
|
33 |
self.assertEqual( |
|
|
34 |
{'cui_': 'C0035078', 'pref_name_': 'Kidney Failure'}, mtok) |
|
|
35 |
else: |
|
|
36 |
self.assertEqual({'cui_': none, 'pref_name_': none}, mtok) |
|
|
37 |
|
|
|
38 |
def test_doc_parse(self): |
|
|
39 |
parser: FeatureDocumentParser = self._get_doc_parser() |
|
|
40 |
self.assertTrue(isinstance(parser, FeatureDocumentParser)) |
|
|
41 |
doc: FeatureDocument = parser.parse(self.text_1) |
|
|
42 |
self.assertTrue(isinstance(doc, FeatureDocument)) |
|
|
43 |
sent: FeatureSentence = doc[0] |
|
|
44 |
self.assertTrue(isinstance(sent, FeatureSentence)) |
|
|
45 |
self.assertEqual(10, len(sent)) |
|
|
46 |
self.assertTrue(isinstance(sent[0], FeatureToken)) |
|
|
47 |
self.assertTrue('C0011900', sent[2].cui_) |
|
|
48 |
self.assertTrue('Diagnosis', sent[2].pref_name_) |
|
|
49 |
self.assertTrue('C0035078', sent[4].cui_) |
|
|
50 |
self.assertTrue('Kidney Failure', sent[4].pref_name_) |
|
|
51 |
|
|
|
52 |
def test_multi_entity(self): |
|
|
53 |
WRITE: bool = False |
|
|
54 |
parser: FeatureDocumentParser = self._get_doc_parser() |
|
|
55 |
doc: FeatureDocument = parser.parse(self.text_2) |
|
|
56 |
path = Path('test-resources/doc-features.json') |
|
|
57 |
json_str = doc.asjson(indent=4) |
|
|
58 |
obj = json.loads(json_str) |
|
|
59 |
for s in obj['sentences']: |
|
|
60 |
for t in s['tokens']: |
|
|
61 |
del t['context_similarity'] |
|
|
62 |
# enable to re-write `should` test data for API changes; but have to |
|
|
63 |
# remove all `context_simirity` entries |
|
|
64 |
if WRITE: |
|
|
65 |
with open(path, 'w') as f: |
|
|
66 |
json.dump(obj, f, indent=4) |
|
|
67 |
with open(path) as f: |
|
|
68 |
should = json.load(f) |
|
|
69 |
self.assertEqual(should, obj) |