--- a +++ b/test-resources/should/doc_parser-1.json @@ -0,0 +1,621 @@ +[ + { + "children": [], + "dep": 429, + "dep_": "nsubj", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 0, + "i_sent": 0, + "idx": 0, + "is_contraction": false, + "is_ent": false, + "is_pronoun": true, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "he", + "lexspan": { + "begin": 0, + "end": 2 + }, + "norm": "He", + "norm_len": 2, + "pos_": "PRON", + "sent_i": 0, + "shape": 12204527652707022206, + "shape_": "Xx", + "tag": 13656873538139661788, + "tag_": "PRP" + }, + { + "children": [ + 0, + 3, + 4, + 7 + ], + "dep": 8206900633647566924, + "dep_": "ROOT", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 1, + "i_sent": 1, + "idx": 3, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "love", + "lexspan": { + "begin": 3, + "end": 8 + }, + "norm": "loved", + "norm_len": 5, + "pos_": "VERB", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 17109001835818727656, + "tag_": "VBD" + }, + { + "children": [], + "dep": 405, + "dep_": "aux", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 2, + "i_sent": 2, + "idx": 9, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "to", + "lexspan": { + "begin": 9, + "end": 11 + }, + "norm": "to", + "norm_len": 2, + "pos_": "PART", + "sent_i": 0, + "shape": 4370460163704169311, + "shape_": "xx", + "tag": 5595707737748328492, + "tag_": "TO" + }, + { + "children": [ + 2 + ], + "dep": 450, + "dep_": "xcomp", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 3, + "i_sent": 3, + "idx": 12, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "smoke", + "lexspan": { + "begin": 12, + "end": 17 + }, + "norm": "smoke", + "norm_len": 5, + "pos_": "VERB", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 14200088355797579614, + "tag_": "VB" + }, + { + "children": [], + "dep": 407, + "dep_": "cc", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 4, + "i_sent": 4, + "idx": 18, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "but", + "lexspan": { + "begin": 18, + "end": 21 + }, + "norm": "but", + "norm_len": 3, + "pos_": "CCONJ", + "sent_i": 0, + "shape": 4088098365541558500, + "shape_": "xxx", + "tag": 17571114184892886314, + "tag_": "CC" + }, + { + "children": [], + "dep": 7037928807040764755, + "dep_": "compound", + "ent": 383, + "ent_": "ORG", + "ent_iob": 3, + "ent_iob_": "B", + "i": 5, + "i_sent": 5, + "idx": 22, + "is_contraction": false, + "is_ent": true, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "Marlboro", + "lexspan": { + "begin": 22, + "end": 30 + }, + "norm": "Marlboro", + "norm_len": 8, + "pos_": "PROPN", + "sent_i": 0, + "shape": 16072095006890171862, + "shape_": "Xxxxx", + "tag": 15794550382381185553, + "tag_": "NNP" + }, + { + "children": [ + 5 + ], + "dep": 429, + "dep_": "nsubj", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 6, + "i_sent": 6, + "idx": 31, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "cigarette", + "lexspan": { + "begin": 31, + "end": 41 + }, + "norm": "cigarettes", + "norm_len": 10, + "pos_": "NOUN", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 783433942507015291, + "tag_": "NNS" + }, + { + "children": [ + 6, + 11, + 14, + 17 + ], + "dep": 410, + "dep_": "conj", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 7, + "i_sent": 7, + "idx": 42, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "give", + "lexspan": { + "begin": 42, + "end": 46 + }, + "norm": "gave", + "norm_len": 4, + "pos_": "VERB", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 17109001835818727656, + "tag_": "VBD" + }, + { + "children": [], + "dep": 7037928807040764755, + "dep_": "compound", + "ent": 380, + "ent_": "PERSON", + "ent_iob": 3, + "ent_iob_": "B", + "i": 8, + "i_sent": 8, + "idx": 47, + "is_contraction": false, + "is_ent": true, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "John", + "lexspan": { + "begin": 47, + "end": 51 + }, + "norm": "John", + "norm_len": 4, + "pos_": "PROPN", + "sent_i": 0, + "shape": 10887629174180191697, + "shape_": "Xxxx", + "tag": 15794550382381185553, + "tag_": "NNP" + }, + { + "children": [ + 8 + ], + "dep": 7037928807040764755, + "dep_": "compound", + "ent": 380, + "ent_": "PERSON", + "ent_iob": 1, + "ent_iob_": "I", + "i": 9, + "i_sent": 9, + "idx": 52, + "is_contraction": false, + "is_ent": true, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "Smith", + "lexspan": { + "begin": 52, + "end": 57 + }, + "norm": "Smith", + "norm_len": 5, + "pos_": "PROPN", + "sent_i": 0, + "shape": 16072095006890171862, + "shape_": "Xxxxx", + "tag": 15794550382381185553, + "tag_": "NNP" + }, + { + "children": [], + "dep": 7037928807040764755, + "dep_": "compound", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 10, + "i_sent": 10, + "idx": 58, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "lung", + "lexspan": { + "begin": 58, + "end": 62 + }, + "norm": "lung", + "norm_len": 4, + "pos_": "NOUN", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 15308085513773655218, + "tag_": "NN" + }, + { + "children": [ + 9, + 10 + ], + "dep": 416, + "dep_": "dobj", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 11, + "i_sent": 11, + "idx": 63, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "cancer", + "lexspan": { + "begin": 63, + "end": 69 + }, + "norm": "cancer", + "norm_len": 6, + "pos_": "NOUN", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 15308085513773655218, + "tag_": "NN" + }, + { + "children": [], + "dep": 423, + "dep_": "mark", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 12, + "i_sent": 12, + "idx": 70, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "while", + "lexspan": { + "begin": 70, + "end": 75 + }, + "norm": "while", + "norm_len": 5, + "pos_": "SCONJ", + "sent_i": 0, + "shape": 13110060611322374290, + "shape_": "xxxx", + "tag": 1292078113972184607, + "tag_": "IN" + }, + { + "children": [], + "dep": 429, + "dep_": "nsubj", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 13, + "i_sent": 13, + "idx": 76, + "is_contraction": false, + "is_ent": false, + "is_pronoun": true, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "he", + "lexspan": { + "begin": 76, + "end": 78 + }, + "norm": "he", + "norm_len": 2, + "pos_": "PRON", + "sent_i": 0, + "shape": 4370460163704169311, + "shape_": "xx", + "tag": 13656873538139661788, + "tag_": "PRP" + }, + { + "children": [ + 12, + 13, + 15 + ], + "dep": 399, + "dep_": "advcl", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 14, + "i_sent": 14, + "idx": 79, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "be", + "lexspan": { + "begin": 79, + "end": 82 + }, + "norm": "was", + "norm_len": 3, + "pos_": "AUX", + "sent_i": 0, + "shape": 4088098365541558500, + "shape_": "xxx", + "tag": 17109001835818727656, + "tag_": "VBD" + }, + { + "children": [ + 16 + ], + "dep": 443, + "dep_": "prep", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 15, + "i_sent": 15, + "idx": 83, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": true, + "is_superlative": false, + "is_wh": false, + "lemma_": "in", + "lexspan": { + "begin": 83, + "end": 85 + }, + "norm": "in", + "norm_len": 2, + "pos_": "ADP", + "sent_i": 0, + "shape": 4370460163704169311, + "shape_": "xx", + "tag": 1292078113972184607, + "tag_": "IN" + }, + { + "children": [], + "dep": 439, + "dep_": "pobj", + "ent": 384, + "ent_": "GPE", + "ent_iob": 3, + "ent_iob_": "B", + "i": 16, + "i_sent": 16, + "idx": 86, + "is_contraction": false, + "is_ent": true, + "is_pronoun": false, + "is_punctuation": false, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": "Chicago", + "lexspan": { + "begin": 86, + "end": 93 + }, + "norm": "Chicago", + "norm_len": 7, + "pos_": "PROPN", + "sent_i": 0, + "shape": 16072095006890171862, + "shape_": "Xxxxx", + "tag": 15794550382381185553, + "tag_": "NNP" + }, + { + "children": [], + "dep": 445, + "dep_": "punct", + "ent": 0, + "ent_": "-<N>-", + "ent_iob": 0, + "ent_iob_": "O", + "i": 17, + "i_sent": 17, + "idx": 93, + "is_contraction": false, + "is_ent": false, + "is_pronoun": false, + "is_punctuation": true, + "is_space": false, + "is_stop": false, + "is_superlative": false, + "is_wh": false, + "lemma_": ".", + "lexspan": { + "begin": 93, + "end": 94 + }, + "norm": ".", + "norm_len": 1, + "pos_": "PUNCT", + "sent_i": 0, + "shape": 12646065887601541794, + "shape_": ".", + "tag": 12646065887601541794, + "tag_": "." + } +] \ No newline at end of file