--- a +++ b/test-resources/doc-features.json @@ -0,0 +1,865 @@ +{ + "text": "He loved to smoke but Marlboro cigarettes gave John Smith lung cancer while he was in Chicago.", + "sentences": [ + { + "text": "He loved to smoke but Marlboro cigarettes gave John Smith lung cancer while he was in Chicago.", + "tokens": [ + { + "sent_i": 0, + "norm_len": 2, + "i_sent": 0, + "lexspan": { + "end": 2, + "begin": 0 + }, + "is_wh": false, + "i": 0, + "is_superlative": false, + "shape": 12204527652707022206, + "is_stop": true, + "dep": 429, + "ent": 0, + "norm": "He", + "pos_": "PRON", + "lemma_": "he", + "idx": 0, + "tag": 13656873538139661788, + "dep_": "nsubj", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "PRP", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "Xx", + "is_pronoun": true, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 5, + "i_sent": 1, + "lexspan": { + "end": 8, + "begin": 3 + }, + "is_wh": false, + "i": 1, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 8206900633647566924, + "ent": 0, + "norm": "loved", + "pos_": "VERB", + "lemma_": "love", + "idx": 3, + "tag": 17109001835818727656, + "dep_": "ROOT", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "VBD", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 0, + 3, + 4, + 7 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 2, + "i_sent": 2, + "lexspan": { + "end": 11, + "begin": 9 + }, + "is_wh": false, + "i": 2, + "is_superlative": false, + "shape": 4370460163704169311, + "is_stop": true, + "dep": 405, + "ent": 0, + "norm": "to", + "pos_": "PART", + "lemma_": "to", + "idx": 9, + "tag": 5595707737748328492, + "dep_": "aux", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "TO", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 5, + "i_sent": 3, + "lexspan": { + "end": 17, + "begin": 12 + }, + "is_wh": false, + "i": 3, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 450, + "ent": 0, + "norm": "smoke", + "pos_": "VERB", + "lemma_": "smoke", + "idx": 12, + "tag": 14200088355797579614, + "dep_": "xcomp", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "VB", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 2 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 3, + "i_sent": 4, + "lexspan": { + "end": 21, + "begin": 18 + }, + "is_wh": false, + "i": 4, + "is_superlative": false, + "shape": 4088098365541558500, + "is_stop": true, + "dep": 407, + "ent": 0, + "norm": "but", + "pos_": "CCONJ", + "lemma_": "but", + "idx": 18, + "tag": 17571114184892886314, + "dep_": "cc", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "CC", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 8, + "i_sent": 5, + "lexspan": { + "end": 30, + "begin": 22 + }, + "is_wh": false, + "i": 5, + "is_superlative": false, + "shape": 16072095006890171862, + "is_stop": false, + "dep": 7037928807040764755, + "ent": 383, + "norm": "Marlboro", + "pos_": "PROPN", + "lemma_": "Marlboro", + "idx": 22, + "tag": 15794550382381185553, + "dep_": "compound", + "is_punctuation": false, + "ent_iob": 3, + "tag_": "NNP", + "is_ent": true, + "ent_iob_": "B", + "children": [], + "is_space": false, + "ent_": "ORG", + "shape_": "Xxxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 10, + "i_sent": 6, + "lexspan": { + "end": 41, + "begin": 31 + }, + "is_wh": false, + "i": 6, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 429, + "ent": 0, + "norm": "cigarettes", + "pos_": "NOUN", + "lemma_": "cigarette", + "idx": 31, + "tag": 783433942507015291, + "dep_": "nsubj", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "NNS", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 5 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 4, + "i_sent": 7, + "lexspan": { + "end": 46, + "begin": 42 + }, + "is_wh": false, + "i": 7, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 410, + "ent": 0, + "norm": "gave", + "pos_": "VERB", + "lemma_": "give", + "idx": 42, + "tag": 17109001835818727656, + "dep_": "conj", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "VBD", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 6, + 11, + 14, + 17 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 4, + "i_sent": 8, + "lexspan": { + "end": 51, + "begin": 47 + }, + "is_wh": false, + "i": 8, + "is_superlative": false, + "shape": 10887629174180191697, + "is_stop": false, + "dep": 7037928807040764755, + "ent": 380, + "norm": "John", + "pos_": "PROPN", + "lemma_": "John", + "idx": 47, + "tag": 15794550382381185553, + "dep_": "compound", + "is_punctuation": false, + "ent_iob": 3, + "tag_": "NNP", + "is_ent": true, + "ent_iob_": "B", + "children": [], + "is_space": false, + "ent_": "PERSON", + "shape_": "Xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 5, + "i_sent": 9, + "lexspan": { + "end": 57, + "begin": 52 + }, + "is_wh": false, + "i": 9, + "is_superlative": false, + "shape": 16072095006890171862, + "is_stop": false, + "dep": 7037928807040764755, + "ent": 380, + "norm": "Smith", + "pos_": "PROPN", + "lemma_": "Smith", + "idx": 52, + "tag": 15794550382381185553, + "dep_": "compound", + "is_punctuation": false, + "ent_iob": 1, + "tag_": "NNP", + "is_ent": true, + "ent_iob_": "I", + "children": [ + 8 + ], + "is_space": false, + "ent_": "PERSON", + "shape_": "Xxxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": 86418, + "definition_": "-<N>-", + "tuis": [ + "T016" + ], + "detected_name_": "smith", + "sub_names": [ + "allen~james~wilcox", + "allen~j~.~wilcox", + "amar~angela~f", + "baity", + "bandoh", + "bostroem", + "carlos~arvelo", + "cotard", + "danlos", + "dr~parkes~weber", + "ehlers", + "foucauldian", + "francisco~herrera~luque", + "george~bernard~shaw", + "giuseppe~paravicini", + "hahn", + "harris", + "hilsenroth", + "homo~sapien", + "homo~sapiens", + "human", + "humanized~version", + "humans", + "human~being", + "human~beings", + "human~origin", + "human~population", + "jose~luis~vethencourt", + "jose~solane", + "jose~solanes", + "jules~cotard", + "knowles", + "lisandro~alvarado", + "max~clara", + "mmy", + "mmy~s", + "parkes~weber", + "pinel~s", + "schwartz", + "sekula~l~kathleen", + "seligman~and~maier", + "shimokawa", + "single~human~host", + "single~human~hosts", + "smith", + "stiles", + "tchernabogov", + "trump", + "trump~s" + ], + "cui_": "C0086418", + "tui_descs_": "Human", + "pref_name_": "Homo sapiens", + "is_concept": true, + "tuis_": "T016" + }, + { + "sent_i": 0, + "norm_len": 4, + "i_sent": 10, + "lexspan": { + "end": 62, + "begin": 58 + }, + "is_wh": false, + "i": 10, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 7037928807040764755, + "ent": 0, + "norm": "lung", + "pos_": "NOUN", + "lemma_": "lung", + "idx": 58, + "tag": 15308085513773655218, + "dep_": "compound", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "NN", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 6, + "i_sent": 11, + "lexspan": { + "end": 69, + "begin": 63 + }, + "is_wh": false, + "i": 11, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": false, + "dep": 416, + "ent": 0, + "norm": "cancer", + "pos_": "NOUN", + "lemma_": "cancer", + "idx": 63, + "tag": 15308085513773655218, + "dep_": "dobj", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "NN", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 9, + 10 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 5, + "i_sent": 12, + "lexspan": { + "end": 75, + "begin": 70 + }, + "is_wh": false, + "i": 12, + "is_superlative": false, + "shape": 13110060611322374290, + "is_stop": true, + "dep": 423, + "ent": 0, + "norm": "while", + "pos_": "SCONJ", + "lemma_": "while", + "idx": 70, + "tag": 1292078113972184607, + "dep_": "mark", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "IN", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 2, + "i_sent": 13, + "lexspan": { + "end": 78, + "begin": 76 + }, + "is_wh": false, + "i": 13, + "is_superlative": false, + "shape": 4370460163704169311, + "is_stop": true, + "dep": 429, + "ent": 0, + "norm": "he", + "pos_": "PRON", + "lemma_": "he", + "idx": 76, + "tag": 13656873538139661788, + "dep_": "nsubj", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "PRP", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xx", + "is_pronoun": true, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 3, + "i_sent": 14, + "lexspan": { + "end": 82, + "begin": 79 + }, + "is_wh": false, + "i": 14, + "is_superlative": false, + "shape": 4088098365541558500, + "is_stop": true, + "dep": 399, + "ent": 0, + "norm": "was", + "pos_": "AUX", + "lemma_": "be", + "idx": 79, + "tag": 17109001835818727656, + "dep_": "advcl", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "VBD", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 12, + 13, + 15 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xxx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 2, + "i_sent": 15, + "lexspan": { + "end": 85, + "begin": 83 + }, + "is_wh": false, + "i": 15, + "is_superlative": false, + "shape": 4370460163704169311, + "is_stop": true, + "dep": 443, + "ent": 0, + "norm": "in", + "pos_": "ADP", + "lemma_": "in", + "idx": 83, + "tag": 1292078113972184607, + "dep_": "prep", + "is_punctuation": false, + "ent_iob": 0, + "tag_": "IN", + "is_ent": false, + "ent_iob_": "O", + "children": [ + 16 + ], + "is_space": false, + "ent_": "-<N>-", + "shape_": "xx", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + }, + { + "sent_i": 0, + "norm_len": 7, + "i_sent": 16, + "lexspan": { + "end": 93, + "begin": 86 + }, + "is_wh": false, + "i": 16, + "is_superlative": false, + "shape": 16072095006890171862, + "is_stop": false, + "dep": 439, + "ent": 384, + "norm": "Chicago", + "pos_": "PROPN", + "lemma_": "Chicago", + "idx": 86, + "tag": 15794550382381185553, + "dep_": "pobj", + "is_punctuation": false, + "ent_iob": 3, + "tag_": "NNP", + "is_ent": true, + "ent_iob_": "B", + "children": [], + "is_space": false, + "ent_": "GPE", + "shape_": "Xxxxx", + "is_pronoun": false, + "is_contraction": false, + "cui": 8044, + "definition_": "-<N>-", + "tuis": [ + "T083" + ], + "detected_name_": "chicago", + "sub_names": [ + "chicago" + ], + "cui_": "C0008044", + "tui_descs_": "Geographic Area", + "pref_name_": "chicago", + "is_concept": true, + "tuis_": "T083" + }, + { + "sent_i": 0, + "norm_len": 1, + "i_sent": 17, + "lexspan": { + "end": 94, + "begin": 93 + }, + "is_wh": false, + "i": 17, + "is_superlative": false, + "shape": 12646065887601541794, + "is_stop": false, + "dep": 445, + "ent": 0, + "norm": ".", + "pos_": "PUNCT", + "lemma_": ".", + "idx": 93, + "tag": 12646065887601541794, + "dep_": "punct", + "is_punctuation": true, + "ent_iob": 0, + "tag_": ".", + "is_ent": false, + "ent_iob_": "O", + "children": [], + "is_space": false, + "ent_": "-<N>-", + "shape_": ".", + "is_pronoun": false, + "is_contraction": false, + "cui": -1, + "definition_": "-<N>-", + "tuis": "frozenset()", + "detected_name_": "-<N>-", + "sub_names": [], + "cui_": "-<N>-", + "tui_descs_": "", + "pref_name_": "-<N>-", + "is_concept": false, + "tuis_": "" + } + ] + } + ] +} \ No newline at end of file