--- a +++ b/test-resources/integration/parse.txt @@ -0,0 +1,445 @@ +John Smith was diagnosed with liver disease while in Chicago. + John: + children=[] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=7037928807040764755 (int) + dep_=compound (str) + detected_name_=-<N>- + ent=380 (int) + ent_=PERSON (str) + ent_iob=3 (int) + ent_iob_=B (str) + i=0 (int) + i_sent=0 (int) + idx=0 (int) + is_concept=False + is_contraction=False (bool) + is_ent=True (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=John (str) + lexspan=(0, 4) (object) + norm=John (str) + norm_len=4 (int) + pos_=PROPN (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=10887629174180191697 (int) + shape_=Xxxx (str) + sub_names=[] + tag=15794550382381185553 (int) + tag_=NNP (str) + tui_descs_= + tuis=frozenset() + tuis_= + Smith: + children=[0] (list) + context_similarity=1 + cui=86418 + cui_=C0086418 + definition_=-<N>- + dep=430 (int) + dep_=nsubjpass (str) + detected_name_=smith + ent=380 (int) + ent_=PERSON (str) + ent_iob=1 (int) + ent_iob_=I (str) + i=1 (int) + i_sent=1 (int) + idx=5 (int) + is_concept=True + is_contraction=False (bool) + is_ent=True (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=Smith (str) + lexspan=(5, 10) (object) + norm=Smith (str) + norm_len=5 (int) + pos_=PROPN (str) + pref_name_=Homo sapiens + sent_i=0 (int) + shape=16072095006890171862 (int) + shape_=Xxxxx (str) + sub_names=('allen~james~wilcox', 'allen~j~.~wilcox', 'amar~angela~f', 'baity', 'bandoh', 'bostroem', 'carlos~arvelo', 'cotard', 'danlos', 'dr~parkes~weber', 'ehlers', 'foucauldian', 'francisco~herrera~luque', 'george~bernard~shaw', 'giuseppe~paravicini', 'hahn', 'harris', 'hilsenroth', 'homo~sapien', 'homo~sapiens', 'human', 'humanized~version', 'humans', 'human~being', 'human~beings', 'human~origin', 'human~population', 'jose~luis~vethencourt', 'jose~solane', 'jose~solanes', 'jules~cotard', 'knowles', 'lisandro~alvarado', 'max~clara', 'mmy', 'mmy~s', 'parkes~weber', 'pinel~s', 'schwartz', 'sekula~l~kathleen', 'seligman~and~maier', 'shimokawa', 'single~human~host', 'single~human~hosts', 'smith', 'stiles', 'tchernabogov', 'trump', 'trump~s') + tag=15794550382381185553 (int) + tag_=NNP (str) + tui_descs_=Human + tuis=('T016',) + tuis_=T016 + was: + children=[] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=406 (int) + dep_=auxpass (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=2 (int) + i_sent=2 (int) + idx=11 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=True (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=be (str) + lexspan=(11, 14) (object) + norm=was (str) + norm_len=3 (int) + pos_=AUX (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=4088098365541558500 (int) + shape_=xxx (str) + sub_names=[] + tag=17109001835818727656 (int) + tag_=VBD (str) + tui_descs_= + tuis=frozenset() + tuis_= + diagnosed: + children=[1, 2, 4, 7, 10] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=8206900633647566924 (int) + dep_=ROOT (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=3 (int) + i_sent=3 (int) + idx=15 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=diagnose (str) + lexspan=(15, 24) (object) + norm=diagnosed (str) + norm_len=9 (int) + pos_=VERB (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=13110060611322374290 (int) + shape_=xxxx (str) + sub_names=[] + tag=3822385049556375858 (int) + tag_=VBN (str) + tui_descs_= + tuis=frozenset() + tuis_= + with: + children=[6] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=443 (int) + dep_=prep (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=4 (int) + i_sent=4 (int) + idx=25 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=True (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=with (str) + lexspan=(25, 29) (object) + norm=with (str) + norm_len=4 (int) + pos_=ADP (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=13110060611322374290 (int) + shape_=xxxx (str) + sub_names=[] + tag=1292078113972184607 (int) + tag_=IN (str) + tui_descs_= + tuis=frozenset() + tuis_= + liver: + children=[] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=7037928807040764755 (int) + dep_=compound (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=5 (int) + i_sent=5 (int) + idx=30 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=liver (str) + lexspan=(30, 35) (object) + norm=liver (str) + norm_len=5 (int) + pos_=NOUN (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=13110060611322374290 (int) + shape_=xxxx (str) + sub_names=[] + tag=15308085513773655218 (int) + tag_=NN (str) + tui_descs_= + tuis=frozenset() + tuis_= + disease: + children=[5] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=439 (int) + dep_=pobj (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=6 (int) + i_sent=6 (int) + idx=36 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=disease (str) + lexspan=(36, 43) (object) + norm=disease (str) + norm_len=7 (int) + pos_=NOUN (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=13110060611322374290 (int) + shape_=xxxx (str) + sub_names=[] + tag=15308085513773655218 (int) + tag_=NN (str) + tui_descs_= + tuis=frozenset() + tuis_= + while: + children=[8] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=443 (int) + dep_=prep (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=7 (int) + i_sent=7 (int) + idx=44 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=True (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=while (str) + lexspan=(44, 49) (object) + norm=while (str) + norm_len=5 (int) + pos_=SCONJ (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=13110060611322374290 (int) + shape_=xxxx (str) + sub_names=[] + tag=1292078113972184607 (int) + tag_=IN (str) + tui_descs_= + tuis=frozenset() + tuis_= + in: + children=[9] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=443 (int) + dep_=prep (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=8 (int) + i_sent=8 (int) + idx=50 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=True (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=in (str) + lexspan=(50, 52) (object) + norm=in (str) + norm_len=2 (int) + pos_=ADP (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=4370460163704169311 (int) + shape_=xx (str) + sub_names=[] + tag=1292078113972184607 (int) + tag_=IN (str) + tui_descs_= + tuis=frozenset() + tuis_= + Chicago: + children=[] (list) + context_similarity=1 + cui=8044 + cui_=C0008044 + definition_=-<N>- + dep=439 (int) + dep_=pobj (str) + detected_name_=chicago + ent=384 (int) + ent_=GPE (str) + ent_iob=3 (int) + ent_iob_=B (str) + i=9 (int) + i_sent=9 (int) + idx=53 (int) + is_concept=True + is_contraction=False (bool) + is_ent=True (bool) + is_pronoun=False (bool) + is_punctuation=False (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=Chicago (str) + lexspan=(53, 60) (object) + norm=Chicago (str) + norm_len=7 (int) + pos_=PROPN (str) + pref_name_=chicago + sent_i=0 (int) + shape=16072095006890171862 (int) + shape_=Xxxxx (str) + sub_names=('chicago',) + tag=15794550382381185553 (int) + tag_=NNP (str) + tui_descs_=Geographic Area + tuis=('T083',) + tuis_=T083 + .: + children=[] (list) + context_similarity=-1 + cui=-1 + cui_=-<N>- + definition_=-<N>- + dep=445 (int) + dep_=punct (str) + detected_name_=-<N>- + ent=0 (int) + ent_=-<N>- (str) + ent_iob=0 (int) + ent_iob_=O (str) + i=10 (int) + i_sent=10 (int) + idx=60 (int) + is_concept=False + is_contraction=False (bool) + is_ent=False (bool) + is_pronoun=False (bool) + is_punctuation=True (int) + is_space=False (bool) + is_stop=False (bool) + is_superlative=False (bool) + is_wh=False (bool) + lemma_=. (str) + lexspan=(60, 61) (object) + norm=. (str) + norm_len=1 (int) + pos_=PUNCT (str) + pref_name_=-<N>- + sent_i=0 (int) + shape=12646065887601541794 (int) + shape_=. (str) + sub_names=[] + tag=12646065887601541794 (int) + tag_=. (str) + tui_descs_= + tuis=frozenset() + tuis_= +entities: + John Smith + Smith + Chicago