a b/test/python/test_parse.py
1
from typing import Dict
2
import json
3
from pathlib import Path
4
from zensols.nlp import (
5
    FeatureToken, FeatureSentence, FeatureDocument, FeatureDocumentParser
6
)
7
from util import TestBase
8
9
10
class TestParse(TestBase):
11
    def test_feature_parse(self):
12
        DEBUG: bool = False
13
        keeps = set('cui_ pref_name_'.split())
14
        parser: FeatureDocumentParser = self._get_doc_parser()
15
        self.assertTrue(isinstance(parser, FeatureDocumentParser))
16
        med_toks: Dict[str, str] = []
17
        doc: FeatureDocument = parser(self.text_1)
18
        for tok in doc.token_iter():
19
            fd = tok.asdict()
20
            med_toks.append({k: fd[k] for k in fd.keys() & keeps})
21
        if DEBUG:
22
            print()
23
            for tok in doc.token_iter():
24
                print(tok, tok.cui_, tok.pref_name_)
25
            print()
26
            for tok in med_toks:
27
                print(tok)
28
        none = FeatureToken.NONE
29
        for i, mtok in enumerate(med_toks):
30
            if DEBUG:
31
                print(i, mtok)
32
            if i >= 4 and i <= 5:
33
                self.assertEqual(
34
                    {'cui_': 'C0035078', 'pref_name_': 'Kidney Failure'}, mtok)
35
            else:
36
                self.assertEqual({'cui_': none, 'pref_name_': none}, mtok)
37
38
    def test_doc_parse(self):
39
        parser: FeatureDocumentParser = self._get_doc_parser()
40
        self.assertTrue(isinstance(parser, FeatureDocumentParser))
41
        doc: FeatureDocument = parser.parse(self.text_1)
42
        self.assertTrue(isinstance(doc, FeatureDocument))
43
        sent: FeatureSentence = doc[0]
44
        self.assertTrue(isinstance(sent, FeatureSentence))
45
        self.assertEqual(10, len(sent))
46
        self.assertTrue(isinstance(sent[0], FeatureToken))
47
        self.assertTrue('C0011900', sent[2].cui_)
48
        self.assertTrue('Diagnosis', sent[2].pref_name_)
49
        self.assertTrue('C0035078', sent[4].cui_)
50
        self.assertTrue('Kidney Failure', sent[4].pref_name_)
51
52
    def test_multi_entity(self):
53
        WRITE: bool = False
54
        parser: FeatureDocumentParser = self._get_doc_parser()
55
        doc: FeatureDocument = parser.parse(self.text_2)
56
        path = Path('test-resources/doc-features.json')
57
        json_str = doc.asjson(indent=4)
58
        obj = json.loads(json_str)
59
        for s in obj['sentences']:
60
            for t in s['tokens']:
61
                del t['context_similarity']
62
        # enable to re-write `should` test data for API changes; but have to
63
        # remove all `context_simirity` entries
64
        if WRITE:
65
            with open(path, 'w') as f:
66
                json.dump(obj, f, indent=4)
67
        with open(path) as f:
68
            should = json.load(f)
69
        self.assertEqual(should, obj)