a b/test-resources/should/doc_parser-1.json
1
[
2
    {
3
        "children": [],
4
        "dep": 429,
5
        "dep_": "nsubj",
6
        "ent": 0,
7
        "ent_": "-<N>-",
8
        "ent_iob": 0,
9
        "ent_iob_": "O",
10
        "i": 0,
11
        "i_sent": 0,
12
        "idx": 0,
13
        "is_contraction": false,
14
        "is_ent": false,
15
        "is_pronoun": true,
16
        "is_punctuation": false,
17
        "is_space": false,
18
        "is_stop": true,
19
        "is_superlative": false,
20
        "is_wh": false,
21
        "lemma_": "he",
22
        "lexspan": {
23
            "begin": 0,
24
            "end": 2
25
        },
26
        "norm": "He",
27
        "norm_len": 2,
28
        "pos_": "PRON",
29
        "sent_i": 0,
30
        "shape": 12204527652707022206,
31
        "shape_": "Xx",
32
        "tag": 13656873538139661788,
33
        "tag_": "PRP"
34
    },
35
    {
36
        "children": [
37
            0,
38
            3,
39
            4,
40
            7
41
        ],
42
        "dep": 8206900633647566924,
43
        "dep_": "ROOT",
44
        "ent": 0,
45
        "ent_": "-<N>-",
46
        "ent_iob": 0,
47
        "ent_iob_": "O",
48
        "i": 1,
49
        "i_sent": 1,
50
        "idx": 3,
51
        "is_contraction": false,
52
        "is_ent": false,
53
        "is_pronoun": false,
54
        "is_punctuation": false,
55
        "is_space": false,
56
        "is_stop": false,
57
        "is_superlative": false,
58
        "is_wh": false,
59
        "lemma_": "love",
60
        "lexspan": {
61
            "begin": 3,
62
            "end": 8
63
        },
64
        "norm": "loved",
65
        "norm_len": 5,
66
        "pos_": "VERB",
67
        "sent_i": 0,
68
        "shape": 13110060611322374290,
69
        "shape_": "xxxx",
70
        "tag": 17109001835818727656,
71
        "tag_": "VBD"
72
    },
73
    {
74
        "children": [],
75
        "dep": 405,
76
        "dep_": "aux",
77
        "ent": 0,
78
        "ent_": "-<N>-",
79
        "ent_iob": 0,
80
        "ent_iob_": "O",
81
        "i": 2,
82
        "i_sent": 2,
83
        "idx": 9,
84
        "is_contraction": false,
85
        "is_ent": false,
86
        "is_pronoun": false,
87
        "is_punctuation": false,
88
        "is_space": false,
89
        "is_stop": true,
90
        "is_superlative": false,
91
        "is_wh": false,
92
        "lemma_": "to",
93
        "lexspan": {
94
            "begin": 9,
95
            "end": 11
96
        },
97
        "norm": "to",
98
        "norm_len": 2,
99
        "pos_": "PART",
100
        "sent_i": 0,
101
        "shape": 4370460163704169311,
102
        "shape_": "xx",
103
        "tag": 5595707737748328492,
104
        "tag_": "TO"
105
    },
106
    {
107
        "children": [
108
            2
109
        ],
110
        "dep": 450,
111
        "dep_": "xcomp",
112
        "ent": 0,
113
        "ent_": "-<N>-",
114
        "ent_iob": 0,
115
        "ent_iob_": "O",
116
        "i": 3,
117
        "i_sent": 3,
118
        "idx": 12,
119
        "is_contraction": false,
120
        "is_ent": false,
121
        "is_pronoun": false,
122
        "is_punctuation": false,
123
        "is_space": false,
124
        "is_stop": false,
125
        "is_superlative": false,
126
        "is_wh": false,
127
        "lemma_": "smoke",
128
        "lexspan": {
129
            "begin": 12,
130
            "end": 17
131
        },
132
        "norm": "smoke",
133
        "norm_len": 5,
134
        "pos_": "VERB",
135
        "sent_i": 0,
136
        "shape": 13110060611322374290,
137
        "shape_": "xxxx",
138
        "tag": 14200088355797579614,
139
        "tag_": "VB"
140
    },
141
    {
142
        "children": [],
143
        "dep": 407,
144
        "dep_": "cc",
145
        "ent": 0,
146
        "ent_": "-<N>-",
147
        "ent_iob": 0,
148
        "ent_iob_": "O",
149
        "i": 4,
150
        "i_sent": 4,
151
        "idx": 18,
152
        "is_contraction": false,
153
        "is_ent": false,
154
        "is_pronoun": false,
155
        "is_punctuation": false,
156
        "is_space": false,
157
        "is_stop": true,
158
        "is_superlative": false,
159
        "is_wh": false,
160
        "lemma_": "but",
161
        "lexspan": {
162
            "begin": 18,
163
            "end": 21
164
        },
165
        "norm": "but",
166
        "norm_len": 3,
167
        "pos_": "CCONJ",
168
        "sent_i": 0,
169
        "shape": 4088098365541558500,
170
        "shape_": "xxx",
171
        "tag": 17571114184892886314,
172
        "tag_": "CC"
173
    },
174
    {
175
        "children": [],
176
        "dep": 7037928807040764755,
177
        "dep_": "compound",
178
        "ent": 383,
179
        "ent_": "ORG",
180
        "ent_iob": 3,
181
        "ent_iob_": "B",
182
        "i": 5,
183
        "i_sent": 5,
184
        "idx": 22,
185
        "is_contraction": false,
186
        "is_ent": true,
187
        "is_pronoun": false,
188
        "is_punctuation": false,
189
        "is_space": false,
190
        "is_stop": false,
191
        "is_superlative": false,
192
        "is_wh": false,
193
        "lemma_": "Marlboro",
194
        "lexspan": {
195
            "begin": 22,
196
            "end": 30
197
        },
198
        "norm": "Marlboro",
199
        "norm_len": 8,
200
        "pos_": "PROPN",
201
        "sent_i": 0,
202
        "shape": 16072095006890171862,
203
        "shape_": "Xxxxx",
204
        "tag": 15794550382381185553,
205
        "tag_": "NNP"
206
    },
207
    {
208
        "children": [
209
            5
210
        ],
211
        "dep": 429,
212
        "dep_": "nsubj",
213
        "ent": 0,
214
        "ent_": "-<N>-",
215
        "ent_iob": 0,
216
        "ent_iob_": "O",
217
        "i": 6,
218
        "i_sent": 6,
219
        "idx": 31,
220
        "is_contraction": false,
221
        "is_ent": false,
222
        "is_pronoun": false,
223
        "is_punctuation": false,
224
        "is_space": false,
225
        "is_stop": false,
226
        "is_superlative": false,
227
        "is_wh": false,
228
        "lemma_": "cigarette",
229
        "lexspan": {
230
            "begin": 31,
231
            "end": 41
232
        },
233
        "norm": "cigarettes",
234
        "norm_len": 10,
235
        "pos_": "NOUN",
236
        "sent_i": 0,
237
        "shape": 13110060611322374290,
238
        "shape_": "xxxx",
239
        "tag": 783433942507015291,
240
        "tag_": "NNS"
241
    },
242
    {
243
        "children": [
244
            6,
245
            11,
246
            14,
247
            17
248
        ],
249
        "dep": 410,
250
        "dep_": "conj",
251
        "ent": 0,
252
        "ent_": "-<N>-",
253
        "ent_iob": 0,
254
        "ent_iob_": "O",
255
        "i": 7,
256
        "i_sent": 7,
257
        "idx": 42,
258
        "is_contraction": false,
259
        "is_ent": false,
260
        "is_pronoun": false,
261
        "is_punctuation": false,
262
        "is_space": false,
263
        "is_stop": false,
264
        "is_superlative": false,
265
        "is_wh": false,
266
        "lemma_": "give",
267
        "lexspan": {
268
            "begin": 42,
269
            "end": 46
270
        },
271
        "norm": "gave",
272
        "norm_len": 4,
273
        "pos_": "VERB",
274
        "sent_i": 0,
275
        "shape": 13110060611322374290,
276
        "shape_": "xxxx",
277
        "tag": 17109001835818727656,
278
        "tag_": "VBD"
279
    },
280
    {
281
        "children": [],
282
        "dep": 7037928807040764755,
283
        "dep_": "compound",
284
        "ent": 380,
285
        "ent_": "PERSON",
286
        "ent_iob": 3,
287
        "ent_iob_": "B",
288
        "i": 8,
289
        "i_sent": 8,
290
        "idx": 47,
291
        "is_contraction": false,
292
        "is_ent": true,
293
        "is_pronoun": false,
294
        "is_punctuation": false,
295
        "is_space": false,
296
        "is_stop": false,
297
        "is_superlative": false,
298
        "is_wh": false,
299
        "lemma_": "John",
300
        "lexspan": {
301
            "begin": 47,
302
            "end": 51
303
        },
304
        "norm": "John",
305
        "norm_len": 4,
306
        "pos_": "PROPN",
307
        "sent_i": 0,
308
        "shape": 10887629174180191697,
309
        "shape_": "Xxxx",
310
        "tag": 15794550382381185553,
311
        "tag_": "NNP"
312
    },
313
    {
314
        "children": [
315
            8
316
        ],
317
        "dep": 7037928807040764755,
318
        "dep_": "compound",
319
        "ent": 380,
320
        "ent_": "PERSON",
321
        "ent_iob": 1,
322
        "ent_iob_": "I",
323
        "i": 9,
324
        "i_sent": 9,
325
        "idx": 52,
326
        "is_contraction": false,
327
        "is_ent": true,
328
        "is_pronoun": false,
329
        "is_punctuation": false,
330
        "is_space": false,
331
        "is_stop": false,
332
        "is_superlative": false,
333
        "is_wh": false,
334
        "lemma_": "Smith",
335
        "lexspan": {
336
            "begin": 52,
337
            "end": 57
338
        },
339
        "norm": "Smith",
340
        "norm_len": 5,
341
        "pos_": "PROPN",
342
        "sent_i": 0,
343
        "shape": 16072095006890171862,
344
        "shape_": "Xxxxx",
345
        "tag": 15794550382381185553,
346
        "tag_": "NNP"
347
    },
348
    {
349
        "children": [],
350
        "dep": 7037928807040764755,
351
        "dep_": "compound",
352
        "ent": 0,
353
        "ent_": "-<N>-",
354
        "ent_iob": 0,
355
        "ent_iob_": "O",
356
        "i": 10,
357
        "i_sent": 10,
358
        "idx": 58,
359
        "is_contraction": false,
360
        "is_ent": false,
361
        "is_pronoun": false,
362
        "is_punctuation": false,
363
        "is_space": false,
364
        "is_stop": false,
365
        "is_superlative": false,
366
        "is_wh": false,
367
        "lemma_": "lung",
368
        "lexspan": {
369
            "begin": 58,
370
            "end": 62
371
        },
372
        "norm": "lung",
373
        "norm_len": 4,
374
        "pos_": "NOUN",
375
        "sent_i": 0,
376
        "shape": 13110060611322374290,
377
        "shape_": "xxxx",
378
        "tag": 15308085513773655218,
379
        "tag_": "NN"
380
    },
381
    {
382
        "children": [
383
            9,
384
            10
385
        ],
386
        "dep": 416,
387
        "dep_": "dobj",
388
        "ent": 0,
389
        "ent_": "-<N>-",
390
        "ent_iob": 0,
391
        "ent_iob_": "O",
392
        "i": 11,
393
        "i_sent": 11,
394
        "idx": 63,
395
        "is_contraction": false,
396
        "is_ent": false,
397
        "is_pronoun": false,
398
        "is_punctuation": false,
399
        "is_space": false,
400
        "is_stop": false,
401
        "is_superlative": false,
402
        "is_wh": false,
403
        "lemma_": "cancer",
404
        "lexspan": {
405
            "begin": 63,
406
            "end": 69
407
        },
408
        "norm": "cancer",
409
        "norm_len": 6,
410
        "pos_": "NOUN",
411
        "sent_i": 0,
412
        "shape": 13110060611322374290,
413
        "shape_": "xxxx",
414
        "tag": 15308085513773655218,
415
        "tag_": "NN"
416
    },
417
    {
418
        "children": [],
419
        "dep": 423,
420
        "dep_": "mark",
421
        "ent": 0,
422
        "ent_": "-<N>-",
423
        "ent_iob": 0,
424
        "ent_iob_": "O",
425
        "i": 12,
426
        "i_sent": 12,
427
        "idx": 70,
428
        "is_contraction": false,
429
        "is_ent": false,
430
        "is_pronoun": false,
431
        "is_punctuation": false,
432
        "is_space": false,
433
        "is_stop": true,
434
        "is_superlative": false,
435
        "is_wh": false,
436
        "lemma_": "while",
437
        "lexspan": {
438
            "begin": 70,
439
            "end": 75
440
        },
441
        "norm": "while",
442
        "norm_len": 5,
443
        "pos_": "SCONJ",
444
        "sent_i": 0,
445
        "shape": 13110060611322374290,
446
        "shape_": "xxxx",
447
        "tag": 1292078113972184607,
448
        "tag_": "IN"
449
    },
450
    {
451
        "children": [],
452
        "dep": 429,
453
        "dep_": "nsubj",
454
        "ent": 0,
455
        "ent_": "-<N>-",
456
        "ent_iob": 0,
457
        "ent_iob_": "O",
458
        "i": 13,
459
        "i_sent": 13,
460
        "idx": 76,
461
        "is_contraction": false,
462
        "is_ent": false,
463
        "is_pronoun": true,
464
        "is_punctuation": false,
465
        "is_space": false,
466
        "is_stop": true,
467
        "is_superlative": false,
468
        "is_wh": false,
469
        "lemma_": "he",
470
        "lexspan": {
471
            "begin": 76,
472
            "end": 78
473
        },
474
        "norm": "he",
475
        "norm_len": 2,
476
        "pos_": "PRON",
477
        "sent_i": 0,
478
        "shape": 4370460163704169311,
479
        "shape_": "xx",
480
        "tag": 13656873538139661788,
481
        "tag_": "PRP"
482
    },
483
    {
484
        "children": [
485
            12,
486
            13,
487
            15
488
        ],
489
        "dep": 399,
490
        "dep_": "advcl",
491
        "ent": 0,
492
        "ent_": "-<N>-",
493
        "ent_iob": 0,
494
        "ent_iob_": "O",
495
        "i": 14,
496
        "i_sent": 14,
497
        "idx": 79,
498
        "is_contraction": false,
499
        "is_ent": false,
500
        "is_pronoun": false,
501
        "is_punctuation": false,
502
        "is_space": false,
503
        "is_stop": true,
504
        "is_superlative": false,
505
        "is_wh": false,
506
        "lemma_": "be",
507
        "lexspan": {
508
            "begin": 79,
509
            "end": 82
510
        },
511
        "norm": "was",
512
        "norm_len": 3,
513
        "pos_": "AUX",
514
        "sent_i": 0,
515
        "shape": 4088098365541558500,
516
        "shape_": "xxx",
517
        "tag": 17109001835818727656,
518
        "tag_": "VBD"
519
    },
520
    {
521
        "children": [
522
            16
523
        ],
524
        "dep": 443,
525
        "dep_": "prep",
526
        "ent": 0,
527
        "ent_": "-<N>-",
528
        "ent_iob": 0,
529
        "ent_iob_": "O",
530
        "i": 15,
531
        "i_sent": 15,
532
        "idx": 83,
533
        "is_contraction": false,
534
        "is_ent": false,
535
        "is_pronoun": false,
536
        "is_punctuation": false,
537
        "is_space": false,
538
        "is_stop": true,
539
        "is_superlative": false,
540
        "is_wh": false,
541
        "lemma_": "in",
542
        "lexspan": {
543
            "begin": 83,
544
            "end": 85
545
        },
546
        "norm": "in",
547
        "norm_len": 2,
548
        "pos_": "ADP",
549
        "sent_i": 0,
550
        "shape": 4370460163704169311,
551
        "shape_": "xx",
552
        "tag": 1292078113972184607,
553
        "tag_": "IN"
554
    },
555
    {
556
        "children": [],
557
        "dep": 439,
558
        "dep_": "pobj",
559
        "ent": 384,
560
        "ent_": "GPE",
561
        "ent_iob": 3,
562
        "ent_iob_": "B",
563
        "i": 16,
564
        "i_sent": 16,
565
        "idx": 86,
566
        "is_contraction": false,
567
        "is_ent": true,
568
        "is_pronoun": false,
569
        "is_punctuation": false,
570
        "is_space": false,
571
        "is_stop": false,
572
        "is_superlative": false,
573
        "is_wh": false,
574
        "lemma_": "Chicago",
575
        "lexspan": {
576
            "begin": 86,
577
            "end": 93
578
        },
579
        "norm": "Chicago",
580
        "norm_len": 7,
581
        "pos_": "PROPN",
582
        "sent_i": 0,
583
        "shape": 16072095006890171862,
584
        "shape_": "Xxxxx",
585
        "tag": 15794550382381185553,
586
        "tag_": "NNP"
587
    },
588
    {
589
        "children": [],
590
        "dep": 445,
591
        "dep_": "punct",
592
        "ent": 0,
593
        "ent_": "-<N>-",
594
        "ent_iob": 0,
595
        "ent_iob_": "O",
596
        "i": 17,
597
        "i_sent": 17,
598
        "idx": 93,
599
        "is_contraction": false,
600
        "is_ent": false,
601
        "is_pronoun": false,
602
        "is_punctuation": true,
603
        "is_space": false,
604
        "is_stop": false,
605
        "is_superlative": false,
606
        "is_wh": false,
607
        "lemma_": ".",
608
        "lexspan": {
609
            "begin": 93,
610
            "end": 94
611
        },
612
        "norm": ".",
613
        "norm_len": 1,
614
        "pos_": "PUNCT",
615
        "sent_i": 0,
616
        "shape": 12646065887601541794,
617
        "shape_": ".",
618
        "tag": 12646065887601541794,
619
        "tag_": "."
620
    }
621
]