mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
* Note broken sense data in prepare_treebank
This commit is contained in:
parent
96442d9c3e
commit
8f0fe1a4ea
|
@ -114,6 +114,9 @@ def format_sentence(deps, ner, brackets, senses):
|
||||||
def format_token(token_id, token, ner, senses):
|
def format_token(token_id, token, ner, senses):
|
||||||
assert token_id == token['id']
|
assert token_id == token['id']
|
||||||
head = (token['head'] - token_id) if token['head'] != -1 else 0
|
head = (token['head'] - token_id) if token['head'] != -1 else 0
|
||||||
|
# TODO: Sense data currently broken, due to alignment problems. Also should
|
||||||
|
# output OntoNotes groups, not WordNet supersenses. Don't print the information
|
||||||
|
# until this is fixed.
|
||||||
return {
|
return {
|
||||||
'id': token_id,
|
'id': token_id,
|
||||||
'orth': token['word'],
|
'orth': token['word'],
|
||||||
|
@ -121,7 +124,6 @@ def format_token(token_id, token, ner, senses):
|
||||||
'head': head,
|
'head': head,
|
||||||
'dep': token['dep'],
|
'dep': token['dep'],
|
||||||
'ner': ner,
|
'ner': ner,
|
||||||
'ssenses': senses[token_id]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user