Diff of /test/python/test_parse.py [000000] .. [ca4dac]

Switch to side-by-side view

--- a
+++ b/test/python/test_parse.py
@@ -0,0 +1,69 @@
+from typing import Dict
+import json
+from pathlib import Path
+from zensols.nlp import (
+    FeatureToken, FeatureSentence, FeatureDocument, FeatureDocumentParser
+)
+from util import TestBase
+
+
+class TestParse(TestBase):
+    def test_feature_parse(self):
+        DEBUG: bool = False
+        keeps = set('cui_ pref_name_'.split())
+        parser: FeatureDocumentParser = self._get_doc_parser()
+        self.assertTrue(isinstance(parser, FeatureDocumentParser))
+        med_toks: Dict[str, str] = []
+        doc: FeatureDocument = parser(self.text_1)
+        for tok in doc.token_iter():
+            fd = tok.asdict()
+            med_toks.append({k: fd[k] for k in fd.keys() & keeps})
+        if DEBUG:
+            print()
+            for tok in doc.token_iter():
+                print(tok, tok.cui_, tok.pref_name_)
+            print()
+            for tok in med_toks:
+                print(tok)
+        none = FeatureToken.NONE
+        for i, mtok in enumerate(med_toks):
+            if DEBUG:
+                print(i, mtok)
+            if i >= 4 and i <= 5:
+                self.assertEqual(
+                    {'cui_': 'C0035078', 'pref_name_': 'Kidney Failure'}, mtok)
+            else:
+                self.assertEqual({'cui_': none, 'pref_name_': none}, mtok)
+
+    def test_doc_parse(self):
+        parser: FeatureDocumentParser = self._get_doc_parser()
+        self.assertTrue(isinstance(parser, FeatureDocumentParser))
+        doc: FeatureDocument = parser.parse(self.text_1)
+        self.assertTrue(isinstance(doc, FeatureDocument))
+        sent: FeatureSentence = doc[0]
+        self.assertTrue(isinstance(sent, FeatureSentence))
+        self.assertEqual(10, len(sent))
+        self.assertTrue(isinstance(sent[0], FeatureToken))
+        self.assertTrue('C0011900', sent[2].cui_)
+        self.assertTrue('Diagnosis', sent[2].pref_name_)
+        self.assertTrue('C0035078', sent[4].cui_)
+        self.assertTrue('Kidney Failure', sent[4].pref_name_)
+
+    def test_multi_entity(self):
+        WRITE: bool = False
+        parser: FeatureDocumentParser = self._get_doc_parser()
+        doc: FeatureDocument = parser.parse(self.text_2)
+        path = Path('test-resources/doc-features.json')
+        json_str = doc.asjson(indent=4)
+        obj = json.loads(json_str)
+        for s in obj['sentences']:
+            for t in s['tokens']:
+                del t['context_similarity']
+        # enable to re-write `should` test data for API changes; but have to
+        # remove all `context_simirity` entries
+        if WRITE:
+            with open(path, 'w') as f:
+                json.dump(obj, f, indent=4)
+        with open(path) as f:
+            should = json.load(f)
+        self.assertEqual(should, obj)