diff --git a/docs/source/example_wsj0001.json b/docs/source/example_wsj0001.json new file mode 100644 index 000000000..25d1cf5c7 --- /dev/null +++ b/docs/source/example_wsj0001.json @@ -0,0 +1,337 @@ +{ + "id": "wsj_0001", + "paragraphs": [ + { + "raw": "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov. 29. Mr. Vinken is chairman of Elsevier N.V., the Dutch publishing group.", + + "segmented": "Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov. 29.Mr. Vinken is chairman of Elsevier N.V., the Dutch publishing group.", + + "sents": [ + 0, + 85 + ], + + "tokens": [ + { + "dep": "NMOD", + "start": 0, + "head": 7, + "tag": "NNP", + "orth": "Pierre" + }, + { + "dep": "SUB", + "start": 7, + "head": 29, + "tag": "NNP", + "orth": "Vinken" + }, + { + "dep": "P", + "start": 13, + "head": 7, + "tag": ",", + "orth": "," + }, + { + "dep": "NMOD", + "start": 15, + "head": 18, + "tag": "CD", + "orth": "61" + }, + { + "dep": "AMOD", + "start": 18, + "head": 24, + "tag": "NNS", + "orth": "years" + }, + { + "dep": "NMOD", + "start": 24, + "head": 7, + "tag": "JJ", + "orth": "old" + }, + { + "dep": "P", + "start": 27, + "head": 7, + "tag": ",", + "orth": "," + }, + { + "dep": "ROOT", + "start": 29, + "head": -1, + "tag": "MD", + "orth": "will" + }, + { + "dep": "VC", + "start": 34, + "head": 29, + "tag": "VB", + "orth": "join" + }, + { + "dep": "NMOD", + "start": 39, + "head": 43, + "tag": "DT", + "orth": "the" + }, + { + "dep": "OBJ", + "start": 43, + "head": 34, + "tag": "NN", + "orth": "board" + }, + { + "dep": "VMOD", + "start": 49, + "head": 34, + "tag": "IN", + "orth": "as" + }, + { + "dep": "NMOD", + "start": 52, + "head": 67, + "tag": "DT", + "orth": "a" + }, + { + "dep": "NMOD", + "start": 54, + "head": 67, + "tag": "JJ", + "orth": "nonexecutive" + }, + { + "dep": "PMOD", + "start": 67, + "head": 49, + "tag": "NN", + "orth": "director" + }, + { + "dep": "VMOD", + "start": 76, + "head": 34, + "tag": "NNP", + "orth": "Nov." + }, + { + "dep": "NMOD", + "start": 81, + "head": 76, + "tag": "CD", + "orth": "29" + }, + { + "dep": "P", + "start": 83, + "head": 29, + "tag": ".", + "orth": "." + }, + { + "dep": "NMOD", + "start": 85, + "head": 89, + "tag": "NNP", + "orth": "Mr." + }, + { + "dep": "SUB", + "start": 89, + "head": 96, + "tag": "NNP", + "orth": "Vinken" + }, + { + "dep": "ROOT", + "start": 96, + "head": -1, + "tag": "VBZ", + "orth": "is" + }, + { + "dep": "PRD", + "start": 99, + "head": 96, + "tag": "NN", + "orth": "chairman" + }, + { + "dep": "NMOD", + "start": 108, + "head": 99, + "tag": "IN", + "orth": "of" + }, + { + "dep": "NMOD", + "start": 111, + "head": 120, + "tag": "NNP", + "orth": "Elsevier" + }, + { + "dep": "NMOD", + "start": 120, + "head": 147, + "tag": "NNP", + "orth": "N.V." + }, + { + "dep": "P", + "start": 124, + "head": 147, + "tag": ",", + "orth": "," + }, + { + "dep": "NMOD", + "start": 126, + "head": 147, + "tag": "DT", + "orth": "the" + }, + { + "dep": "NMOD", + "start": 130, + "head": 147, + "tag": "NNP", + "orth": "Dutch" + }, + { + "dep": "NMOD", + "start": 136, + "head": 147, + "tag": "VBG", + "orth": "publishing" + }, + { + "dep": "PMOD", + "start": 147, + "head": 108, + "tag": "NN", + "orth": "group" + }, + { + "dep": "P", + "start": 152, + "head": 96, + "tag": ".", + "orth": "." + } + ], + "brackets": [ + { + "start": 0, + "end": 7, + "label": "NP" + }, + { + "start": 15, + "end": 18, + "label": "NP" + }, + { + "start": 15, + "end": 24, + "label": "ADJP" + }, + { + "start": 0, + "end": 27, + "label": "NP-SBJ" + }, + { + "start": 39, + "end": 43, + "label": "NP" + }, + { + "start": 52, + "end": 67, + "label": "NP" + }, + { + "start": 49, + "end": 67, + "label": "PP-CLR" + }, + { + "start": 76, + "end": 81, + "label": "NP-TMP" + }, + { + "start": 34, + "end": 81, + "label": "VP" + }, + { + "start": 29, + "end": 81, + "label": "VP" + }, + { + "start": 0, + "end": 83, + "label": "S" + }, + { + "start": 85, + "end": 89, + "label": "NP-SBJ" + }, + { + "start": 99, + "end": 99, + "label": "NP" + }, + { + "start": 111, + "end": 120, + "label": "NP" + }, + { + "start": 126, + "end": 147, + "label": "NP" + }, + { + "start": 111, + "end": 147, + "label": "NP" + }, + { + "start": 108, + "end": 147, + "label": "PP" + }, + { + "start": 99, + "end": 147, + "label": "NP-PRD" + }, + { + "start": 96, + "end": 147, + "label": "VP" + }, + { + "start": 85, + "end": 152, + "label": "S" + } + ] + } + ] +}