spaCy/examples/training/training-data.json

1104 lines
25 KiB
JSON
Raw Normal View History

2017-10-24 13:05:10 +03:00
[
{
"id":0,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"prep",
"head":44,
"tag":"IN",
"orth":"In"
},
{
"dep":"det",
"head":3,
"tag":"DT",
"orth":"an"
},
{
"dep":"nn",
"head":2,
"tag":"NNP",
"orth":"Oct."
},
{
"dep":"num",
"head":1,
"tag":"CD",
"orth":"19"
},
{
"dep":"pobj",
"head":-4,
"tag":"NN",
"orth":"review"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"of"
},
{
"dep":"punct",
"head":2,
"tag":"``",
"orth":"``"
},
{
"dep":"det",
"head":1,
"tag":"DT",
"orth":"The"
},
{
"dep":"pobj",
"head":-3,
"tag":"NN",
"orth":"Misanthrope"
},
{
"dep":"punct",
"head":-1,
"tag":"''",
"orth":"''"
},
{
"dep":"prep",
"head":-2,
"tag":"IN",
"orth":"at"
},
{
"dep":"poss",
"head":3,
"tag":"NNP",
"orth":"Chicago"
},
{
"dep":"possessive",
"head":-1,
"tag":"POS",
"orth":"'s"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Goodman"
},
{
"dep":"pobj",
"head":-4,
"tag":"NNP",
"orth":"Theatre"
},
{
"dep":"punct",
"head":4,
"tag":"-LRB-",
"orth":"-LRB-"
},
{
"dep":"punct",
"head":3,
"tag":"``",
"orth":"``"
},
{
"dep":"amod",
"head":1,
"tag":"VBN",
"orth":"Revitalized"
},
{
"dep":"nsubj",
"head":1,
"tag":"NNS",
"orth":"Classics"
},
{
"dep":"dep",
"head":-15,
"tag":"VBP",
"orth":"Take"
},
{
"dep":"det",
"head":1,
"tag":"DT",
"orth":"the"
},
{
"dep":"dobj",
"head":-2,
"tag":"NN",
"orth":"Stage"
},
{
"dep":"prep",
"head":-3,
"tag":"IN",
"orth":"in"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Windy"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNP",
"orth":"City"
},
{
"dep":"punct",
"head":-6,
"tag":",",
"orth":","
},
{
"dep":"punct",
"head":-7,
"tag":"''",
"orth":"''"
},
{
"dep":"dep",
"head":-8,
"tag":"NN",
"orth":"Leisure"
},
{
"dep":"cc",
"head":-1,
"tag":"CC",
"orth":"&"
},
{
"dep":"conj",
"head":-2,
"tag":"NNS",
"orth":"Arts"
},
{
"dep":"punct",
"head":-11,
"tag":"-RRB-",
"orth":"-RRB-"
},
{
"dep":"punct",
"head":13,
"tag":",",
"orth":","
},
{
"dep":"det",
"head":1,
"tag":"DT",
"orth":"the"
},
{
"dep":"nsubjpass",
"head":11,
"tag":"NN",
"orth":"role"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"of"
},
{
"dep":"pobj",
"head":-1,
"tag":"NNP",
"orth":"Celimene"
},
{
"dep":"punct",
"head":-3,
"tag":",",
"orth":","
},
{
"dep":"partmod",
"head":-4,
"tag":"VBN",
"orth":"played"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"by"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Kim"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNP",
"orth":"Cattrall"
},
{
"dep":"punct",
"head":-8,
"tag":",",
"orth":","
},
{
"dep":"auxpass",
"head":2,
"tag":"VBD",
"orth":"was"
},
{
"dep":"advmod",
"head":1,
"tag":"RB",
"orth":"mistakenly"
},
{
"dep":"ROOT",
"head":0,
"tag":"VBN",
"orth":"attributed"
},
{
"dep":"prep",
"head":-1,
"tag":"TO",
"orth":"to"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Christina"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNP",
"orth":"Haag"
},
{
"dep":"punct",
"head":-4,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":1,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Ms."
},
{
"dep":"nsubj",
"head":1,
"tag":"NNP",
"orth":"Haag"
},
{
"dep":"ROOT",
"head":0,
"tag":"VBZ",
"orth":"plays"
},
{
"dep":"dobj",
"head":-1,
"tag":"NNP",
"orth":"Elianti"
},
{
"dep":"punct",
"head":-2,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":2,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"nn",
"head":3,
"tag":"NNP",
"orth":"Rolls-Royce"
},
{
"dep":"nn",
"head":2,
"tag":"NNP",
"orth":"Motor"
},
{
"dep":"nn",
"head":1,
"tag":"NNPS",
"orth":"Cars"
},
{
"dep":"nsubj",
"head":1,
"tag":"NNP",
"orth":"Inc."
},
{
"dep":"ROOT",
"head":0,
"tag":"VBD",
"orth":"said"
},
{
"dep":"nsubj",
"head":1,
"tag":"PRP",
"orth":"it"
},
{
"dep":"ccomp",
"head":-2,
"tag":"VBZ",
"orth":"expects"
},
{
"dep":"poss",
"head":2,
"tag":"PRP$",
"orth":"its"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"U.S."
},
{
"dep":"nsubj",
"head":3,
"tag":"NNS",
"orth":"sales"
},
{
"dep":"aux",
"head":2,
"tag":"TO",
"orth":"to"
},
{
"dep":"cop",
"head":1,
"tag":"VB",
"orth":"remain"
},
{
"dep":"xcomp",
"head":-6,
"tag":"JJ",
"orth":"steady"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"at"
},
{
"dep":"quantmod",
"head":1,
"tag":"IN",
"orth":"about"
},
{
"dep":"num",
"head":1,
"tag":"CD",
"orth":"1,200"
},
{
"dep":"pobj",
"head":-3,
"tag":"NNS",
"orth":"cars"
},
{
"dep":"prep",
"head":-5,
"tag":"IN",
"orth":"in"
},
{
"dep":"pobj",
"head":-1,
"tag":"CD",
"orth":"1990"
},
{
"dep":"punct",
"head":-15,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":3,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"det",
"head":3,
"tag":"DT",
"orth":"The"
},
{
"dep":"nn",
"head":2,
"tag":"NN",
"orth":"luxury"
},
{
"dep":"nn",
"head":1,
"tag":"NN",
"orth":"auto"
},
{
"dep":"nsubj",
"head":3,
"tag":"NN",
"orth":"maker"
},
{
"dep":"amod",
"head":1,
"tag":"JJ",
"orth":"last"
},
{
"dep":"tmod",
"head":1,
"tag":"NN",
"orth":"year"
},
{
"dep":"ROOT",
"head":0,
"tag":"VBD",
"orth":"sold"
},
{
"dep":"num",
"head":1,
"tag":"CD",
"orth":"1,214"
},
{
"dep":"dobj",
"head":-2,
"tag":"NNS",
"orth":"cars"
},
{
"dep":"prep",
"head":-3,
"tag":"IN",
"orth":"in"
},
{
"dep":"det",
"head":1,
"tag":"DT",
"orth":"the"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNP",
"orth":"U.S."
}
]
}
]
}
]
},
{
"id":4,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Howard"
},
{
"dep":"nsubj",
"head":8,
"tag":"NNP",
"orth":"Mosher"
},
{
"dep":"punct",
"head":-1,
"tag":",",
"orth":","
},
{
"dep":"appos",
"head":-2,
"tag":"NN",
"orth":"president"
},
{
"dep":"cc",
"head":-1,
"tag":"CC",
"orth":"and"
},
{
"dep":"amod",
"head":2,
"tag":"JJ",
"orth":"chief"
},
{
"dep":"nn",
"head":1,
"tag":"NN",
"orth":"executive"
},
{
"dep":"conj",
"head":-4,
"tag":"NN",
"orth":"officer"
},
{
"dep":"punct",
"head":-7,
"tag":",",
"orth":","
},
{
"dep":"ROOT",
"head":0,
"tag":"VBD",
"orth":"said"
},
{
"dep":"nsubj",
"head":1,
"tag":"PRP",
"orth":"he"
},
{
"dep":"ccomp",
"head":-2,
"tag":"VBZ",
"orth":"anticipates"
},
{
"dep":"dobj",
"head":-1,
"tag":"NN",
"orth":"growth"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"for"
},
{
"dep":"det",
"head":3,
"tag":"DT",
"orth":"the"
},
{
"dep":"nn",
"head":2,
"tag":"NN",
"orth":"luxury"
},
{
"dep":"nn",
"head":1,
"tag":"NN",
"orth":"auto"
},
{
"dep":"pobj",
"head":-4,
"tag":"NN",
"orth":"maker"
},
{
"dep":"prep",
"head":-6,
"tag":"IN",
"orth":"in"
},
{
"dep":"pobj",
"head":-1,
"tag":"NNP",
"orth":"Britain"
},
{
"dep":"cc",
"head":-1,
"tag":"CC",
"orth":"and"
},
{
"dep":"conj",
"head":-2,
"tag":"NNP",
"orth":"Europe"
},
{
"dep":"punct",
"head":-4,
"tag":",",
"orth":","
},
{
"dep":"cc",
"head":-5,
"tag":"CC",
"orth":"and"
},
{
"dep":"conj",
"head":-6,
"tag":"IN",
"orth":"in"
},
{
"dep":"amod",
"head":1,
"tag":"JJ",
"orth":"Far"
},
{
"dep":"amod",
"head":1,
"tag":"JJ",
"orth":"Eastern"
},
{
"dep":"pobj",
"head":-3,
"tag":"NNS",
"orth":"markets"
},
{
"dep":"punct",
"head":-19,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":5,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"nn",
"head":2,
"tag":"NNP",
"orth":"BELL"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"INDUSTRIES"
},
{
"dep":"nsubj",
"head":1,
"tag":"NNP",
"orth":"Inc."
},
{
"dep":"ROOT",
"head":0,
"tag":"VBD",
"orth":"increased"
},
{
"dep":"poss",
"head":1,
"tag":"PRP$",
"orth":"its"
},
{
"dep":"dobj",
"head":-2,
"tag":"NN",
"orth":"quarterly"
},
{
"dep":"prep",
"head":-3,
"tag":"TO",
"orth":"to"
},
{
"dep":"num",
"head":1,
"tag":"CD",
"orth":"10"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNS",
"orth":"cents"
},
{
"dep":"prep",
"head":-6,
"tag":"IN",
"orth":"from"
},
{
"dep":"num",
"head":1,
"tag":"CD",
"orth":"seven"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNS",
"orth":"cents"
},
{
"dep":"det",
"head":1,
"tag":"DT",
"orth":"a"
},
{
"dep":"npadvmod",
"head":-2,
"tag":"NN",
"orth":"share"
},
{
"dep":"punct",
"head":-11,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":6,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"det",
"head":2,
"tag":"DT",
"orth":"The"
},
{
"dep":"amod",
"head":1,
"tag":"JJ",
"orth":"new"
},
{
"dep":"nsubj",
"head":3,
"tag":"NN",
"orth":"rate"
},
{
"dep":"aux",
"head":2,
"tag":"MD",
"orth":"will"
},
{
"dep":"cop",
"head":1,
"tag":"VB",
"orth":"be"
},
{
"dep":"ROOT",
"head":0,
"tag":"JJ",
"orth":"payable"
},
{
"dep":"tmod",
"head":-1,
"tag":"NNP",
"orth":"Feb."
},
{
"dep":"num",
"head":-1,
"tag":"CD",
"orth":"15"
},
{
"dep":"punct",
"head":-3,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":7,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"det",
"head":2,
"tag":"DT",
"orth":"A"
},
{
"dep":"nn",
"head":1,
"tag":"NN",
"orth":"record"
},
{
"dep":"nsubjpass",
"head":4,
"tag":"NN",
"orth":"date"
},
{
"dep":"aux",
"head":3,
"tag":"VBZ",
"orth":"has"
},
{
"dep":"neg",
"head":2,
"tag":"RB",
"orth":"n't"
},
{
"dep":"auxpass",
"head":1,
"tag":"VBN",
"orth":"been"
},
{
"dep":"ROOT",
"head":0,
"tag":"VBN",
"orth":"set"
},
{
"dep":"punct",
"head":-1,
"tag":".",
"orth":"."
}
]
}
]
}
]
},
{
"id":8,
"paragraphs":[
{
"sentences":[
{
"tokens":[
{
"dep":"nsubj",
"head":7,
"tag":"NNP",
"orth":"Bell"
},
{
"dep":"punct",
"head":-1,
"tag":",",
"orth":","
},
{
"dep":"partmod",
"head":-2,
"tag":"VBN",
"orth":"based"
},
{
"dep":"prep",
"head":-1,
"tag":"IN",
"orth":"in"
},
{
"dep":"nn",
"head":1,
"tag":"NNP",
"orth":"Los"
},
{
"dep":"pobj",
"head":-2,
"tag":"NNP",
"orth":"Angeles"
},
{
"dep":"punct",
"head":-6,
"tag":",",
"orth":","
},
{
"dep":"ROOT",
"head":0,
"tag":"VBZ",
"orth":"makes"
},
{
"dep":"cc",
"head":-1,
"tag":"CC",
"orth":"and"
},
{
"dep":"conj",
"head":-2,
"tag":"VBZ",
"orth":"distributes"
},
{
"dep":"amod",
"head":5,
"tag":"JJ",
"orth":"electronic"
},
{
"dep":"punct",
"head":-1,
"tag":",",
"orth":","
},
{
"dep":"conj",
"head":-2,
"tag":"NN",
"orth":"computer"
},
{
"dep":"cc",
"head":-3,
"tag":"CC",
"orth":"and"
},
{
"dep":"conj",
"head":-4,
"tag":"NN",
"orth":"building"
},
{
"dep":"dobj",
"head":-8,
"tag":"NNS",
"orth":"products"
},
{
"dep":"punct",
"head":-9,
"tag":".",
"orth":"."
}
]
}
]
}
]
}
]