spaCy/examples/training/training-data.json
2017-10-24 12:22:17 +02:00

1104 lines
26 KiB
JSON

[
{
"id": 0,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "prep",
"head": 44,
"tag": "IN",
"orth": "In"
},
{
"dep": "det",
"head": 3,
"tag": "DT",
"orth": "an"
},
{
"dep": "nn",
"head": 2,
"tag": "NNP",
"orth": "Oct."
},
{
"dep": "num",
"head": 1,
"tag": "CD",
"orth": "19"
},
{
"dep": "pobj",
"head": -4,
"tag": "NN",
"orth": "review"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "of"
},
{
"dep": "punct",
"head": 2,
"tag": "``",
"orth": "``"
},
{
"dep": "det",
"head": 1,
"tag": "DT",
"orth": "The"
},
{
"dep": "pobj",
"head": -3,
"tag": "NN",
"orth": "Misanthrope"
},
{
"dep": "punct",
"head": -1,
"tag": "''",
"orth": "''"
},
{
"dep": "prep",
"head": -2,
"tag": "IN",
"orth": "at"
},
{
"dep": "poss",
"head": 3,
"tag": "NNP",
"orth": "Chicago"
},
{
"dep": "possessive",
"head": -1,
"tag": "POS",
"orth": "'s"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Goodman"
},
{
"dep": "pobj",
"head": -4,
"tag": "NNP",
"orth": "Theatre"
},
{
"dep": "punct",
"head": 4,
"tag": "-LRB-",
"orth": "-LRB-"
},
{
"dep": "punct",
"head": 3,
"tag": "``",
"orth": "``"
},
{
"dep": "amod",
"head": 1,
"tag": "VBN",
"orth": "Revitalized"
},
{
"dep": "nsubj",
"head": 1,
"tag": "NNS",
"orth": "Classics"
},
{
"dep": "dep",
"head": -15,
"tag": "VBP",
"orth": "Take"
},
{
"dep": "det",
"head": 1,
"tag": "DT",
"orth": "the"
},
{
"dep": "dobj",
"head": -2,
"tag": "NN",
"orth": "Stage"
},
{
"dep": "prep",
"head": -3,
"tag": "IN",
"orth": "in"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Windy"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNP",
"orth": "City"
},
{
"dep": "punct",
"head": -6,
"tag": ",",
"orth": ","
},
{
"dep": "punct",
"head": -7,
"tag": "''",
"orth": "''"
},
{
"dep": "dep",
"head": -8,
"tag": "NN",
"orth": "Leisure"
},
{
"dep": "cc",
"head": -1,
"tag": "CC",
"orth": "&"
},
{
"dep": "conj",
"head": -2,
"tag": "NNS",
"orth": "Arts"
},
{
"dep": "punct",
"head": -11,
"tag": "-RRB-",
"orth": "-RRB-"
},
{
"dep": "punct",
"head": 13,
"tag": ",",
"orth": ","
},
{
"dep": "det",
"head": 1,
"tag": "DT",
"orth": "the"
},
{
"dep": "nsubjpass",
"head": 11,
"tag": "NN",
"orth": "role"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "of"
},
{
"dep": "pobj",
"head": -1,
"tag": "NNP",
"orth": "Celimene"
},
{
"dep": "punct",
"head": -3,
"tag": ",",
"orth": ","
},
{
"dep": "partmod",
"head": -4,
"tag": "VBN",
"orth": "played"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "by"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Kim"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNP",
"orth": "Cattrall"
},
{
"dep": "punct",
"head": -8,
"tag": ",",
"orth": ","
},
{
"dep": "auxpass",
"head": 2,
"tag": "VBD",
"orth": "was"
},
{
"dep": "advmod",
"head": 1,
"tag": "RB",
"orth": "mistakenly"
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBN",
"orth": "attributed"
},
{
"dep": "prep",
"head": -1,
"tag": "TO",
"orth": "to"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Christina"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNP",
"orth": "Haag"
},
{
"dep": "punct",
"head": -4,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 1,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Ms."
},
{
"dep": "nsubj",
"head": 1,
"tag": "NNP",
"orth": "Haag"
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBZ",
"orth": "plays"
},
{
"dep": "dobj",
"head": -1,
"tag": "NNP",
"orth": "Elianti"
},
{
"dep": "punct",
"head": -2,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 2,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "nn",
"head": 3,
"tag": "NNP",
"orth": "Rolls-Royce"
},
{
"dep": "nn",
"head": 2,
"tag": "NNP",
"orth": "Motor"
},
{
"dep": "nn",
"head": 1,
"tag": "NNPS",
"orth": "Cars"
},
{
"dep": "nsubj",
"head": 1,
"tag": "NNP",
"orth": "Inc."
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBD",
"orth": "said"
},
{
"dep": "nsubj",
"head": 1,
"tag": "PRP",
"orth": "it"
},
{
"dep": "ccomp",
"head": -2,
"tag": "VBZ",
"orth": "expects"
},
{
"dep": "poss",
"head": 2,
"tag": "PRP$",
"orth": "its"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "U.S."
},
{
"dep": "nsubj",
"head": 3,
"tag": "NNS",
"orth": "sales"
},
{
"dep": "aux",
"head": 2,
"tag": "TO",
"orth": "to"
},
{
"dep": "cop",
"head": 1,
"tag": "VB",
"orth": "remain"
},
{
"dep": "xcomp",
"head": -6,
"tag": "JJ",
"orth": "steady"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "at"
},
{
"dep": "quantmod",
"head": 1,
"tag": "IN",
"orth": "about"
},
{
"dep": "num",
"head": 1,
"tag": "CD",
"orth": "1,200"
},
{
"dep": "pobj",
"head": -3,
"tag": "NNS",
"orth": "cars"
},
{
"dep": "prep",
"head": -5,
"tag": "IN",
"orth": "in"
},
{
"dep": "pobj",
"head": -1,
"tag": "CD",
"orth": "1990"
},
{
"dep": "punct",
"head": -15,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 3,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "det",
"head": 3,
"tag": "DT",
"orth": "The"
},
{
"dep": "nn",
"head": 2,
"tag": "NN",
"orth": "luxury"
},
{
"dep": "nn",
"head": 1,
"tag": "NN",
"orth": "auto"
},
{
"dep": "nsubj",
"head": 3,
"tag": "NN",
"orth": "maker"
},
{
"dep": "amod",
"head": 1,
"tag": "JJ",
"orth": "last"
},
{
"dep": "tmod",
"head": 1,
"tag": "NN",
"orth": "year"
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBD",
"orth": "sold"
},
{
"dep": "num",
"head": 1,
"tag": "CD",
"orth": "1,214"
},
{
"dep": "dobj",
"head": -2,
"tag": "NNS",
"orth": "cars"
},
{
"dep": "prep",
"head": -3,
"tag": "IN",
"orth": "in"
},
{
"dep": "det",
"head": 1,
"tag": "DT",
"orth": "the"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNP",
"orth": "U.S."
}
]
}
]
}
]
},
{
"id": 4,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Howard"
},
{
"dep": "nsubj",
"head": 8,
"tag": "NNP",
"orth": "Mosher"
},
{
"dep": "punct",
"head": -1,
"tag": ",",
"orth": ","
},
{
"dep": "appos",
"head": -2,
"tag": "NN",
"orth": "president"
},
{
"dep": "cc",
"head": -1,
"tag": "CC",
"orth": "and"
},
{
"dep": "amod",
"head": 2,
"tag": "JJ",
"orth": "chief"
},
{
"dep": "nn",
"head": 1,
"tag": "NN",
"orth": "executive"
},
{
"dep": "conj",
"head": -4,
"tag": "NN",
"orth": "officer"
},
{
"dep": "punct",
"head": -7,
"tag": ",",
"orth": ","
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBD",
"orth": "said"
},
{
"dep": "nsubj",
"head": 1,
"tag": "PRP",
"orth": "he"
},
{
"dep": "ccomp",
"head": -2,
"tag": "VBZ",
"orth": "anticipates"
},
{
"dep": "dobj",
"head": -1,
"tag": "NN",
"orth": "growth"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "for"
},
{
"dep": "det",
"head": 3,
"tag": "DT",
"orth": "the"
},
{
"dep": "nn",
"head": 2,
"tag": "NN",
"orth": "luxury"
},
{
"dep": "nn",
"head": 1,
"tag": "NN",
"orth": "auto"
},
{
"dep": "pobj",
"head": -4,
"tag": "NN",
"orth": "maker"
},
{
"dep": "prep",
"head": -6,
"tag": "IN",
"orth": "in"
},
{
"dep": "pobj",
"head": -1,
"tag": "NNP",
"orth": "Britain"
},
{
"dep": "cc",
"head": -1,
"tag": "CC",
"orth": "and"
},
{
"dep": "conj",
"head": -2,
"tag": "NNP",
"orth": "Europe"
},
{
"dep": "punct",
"head": -4,
"tag": ",",
"orth": ","
},
{
"dep": "cc",
"head": -5,
"tag": "CC",
"orth": "and"
},
{
"dep": "conj",
"head": -6,
"tag": "IN",
"orth": "in"
},
{
"dep": "amod",
"head": 1,
"tag": "JJ",
"orth": "Far"
},
{
"dep": "amod",
"head": 1,
"tag": "JJ",
"orth": "Eastern"
},
{
"dep": "pobj",
"head": -3,
"tag": "NNS",
"orth": "markets"
},
{
"dep": "punct",
"head": -19,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 5,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "nn",
"head": 2,
"tag": "NNP",
"orth": "BELL"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "INDUSTRIES"
},
{
"dep": "nsubj",
"head": 1,
"tag": "NNP",
"orth": "Inc."
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBD",
"orth": "increased"
},
{
"dep": "poss",
"head": 1,
"tag": "PRP$",
"orth": "its"
},
{
"dep": "dobj",
"head": -2,
"tag": "NN",
"orth": "quarterly"
},
{
"dep": "prep",
"head": -3,
"tag": "TO",
"orth": "to"
},
{
"dep": "num",
"head": 1,
"tag": "CD",
"orth": "10"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNS",
"orth": "cents"
},
{
"dep": "prep",
"head": -6,
"tag": "IN",
"orth": "from"
},
{
"dep": "num",
"head": 1,
"tag": "CD",
"orth": "seven"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNS",
"orth": "cents"
},
{
"dep": "det",
"head": 1,
"tag": "DT",
"orth": "a"
},
{
"dep": "npadvmod",
"head": -2,
"tag": "NN",
"orth": "share"
},
{
"dep": "punct",
"head": -11,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 6,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "det",
"head": 2,
"tag": "DT",
"orth": "The"
},
{
"dep": "amod",
"head": 1,
"tag": "JJ",
"orth": "new"
},
{
"dep": "nsubj",
"head": 3,
"tag": "NN",
"orth": "rate"
},
{
"dep": "aux",
"head": 2,
"tag": "MD",
"orth": "will"
},
{
"dep": "cop",
"head": 1,
"tag": "VB",
"orth": "be"
},
{
"dep": "ROOT",
"head": 0,
"tag": "JJ",
"orth": "payable"
},
{
"dep": "tmod",
"head": -1,
"tag": "NNP",
"orth": "Feb."
},
{
"dep": "num",
"head": -1,
"tag": "CD",
"orth": "15"
},
{
"dep": "punct",
"head": -3,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 7,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "det",
"head": 2,
"tag": "DT",
"orth": "A"
},
{
"dep": "nn",
"head": 1,
"tag": "NN",
"orth": "record"
},
{
"dep": "nsubjpass",
"head": 4,
"tag": "NN",
"orth": "date"
},
{
"dep": "aux",
"head": 3,
"tag": "VBZ",
"orth": "has"
},
{
"dep": "neg",
"head": 2,
"tag": "RB",
"orth": "n't"
},
{
"dep": "auxpass",
"head": 1,
"tag": "VBN",
"orth": "been"
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBN",
"orth": "set"
},
{
"dep": "punct",
"head": -1,
"tag": ".",
"orth": "."
}
]
}
]
}
]
},
{
"id": 8,
"paragraphs": [
{
"sentences": [
{
"tokens": [
{
"dep": "nsubj",
"head": 7,
"tag": "NNP",
"orth": "Bell"
},
{
"dep": "punct",
"head": -1,
"tag": ",",
"orth": ","
},
{
"dep": "partmod",
"head": -2,
"tag": "VBN",
"orth": "based"
},
{
"dep": "prep",
"head": -1,
"tag": "IN",
"orth": "in"
},
{
"dep": "nn",
"head": 1,
"tag": "NNP",
"orth": "Los"
},
{
"dep": "pobj",
"head": -2,
"tag": "NNP",
"orth": "Angeles"
},
{
"dep": "punct",
"head": -6,
"tag": ",",
"orth": ","
},
{
"dep": "ROOT",
"head": 0,
"tag": "VBZ",
"orth": "makes"
},
{
"dep": "cc",
"head": -1,
"tag": "CC",
"orth": "and"
},
{
"dep": "conj",
"head": -2,
"tag": "VBZ",
"orth": "distributes"
},
{
"dep": "amod",
"head": 5,
"tag": "JJ",
"orth": "electronic"
},
{
"dep": "punct",
"head": -1,
"tag": ",",
"orth": ","
},
{
"dep": "conj",
"head": -2,
"tag": "NN",
"orth": "computer"
},
{
"dep": "cc",
"head": -3,
"tag": "CC",
"orth": "and"
},
{
"dep": "conj",
"head": -4,
"tag": "NN",
"orth": "building"
},
{
"dep": "dobj",
"head": -8,
"tag": "NNS",
"orth": "products"
},
{
"dep": "punct",
"head": -9,
"tag": ".",
"orth": "."
}
]
}
]
}
]
}
]