* Note broken sense data in prepare_treebank

This commit is contained in:
Matthew Honnibal 2015-07-05 21:04:57 +02:00
parent 96442d9c3e
commit 8f0fe1a4ea

View File

@ -114,6 +114,9 @@ def format_sentence(deps, ner, brackets, senses):
def format_token(token_id, token, ner, senses): def format_token(token_id, token, ner, senses):
assert token_id == token['id'] assert token_id == token['id']
head = (token['head'] - token_id) if token['head'] != -1 else 0 head = (token['head'] - token_id) if token['head'] != -1 else 0
# TODO: Sense data currently broken, due to alignment problems. Also should
# output OntoNotes groups, not WordNet supersenses. Don't print the information
# until this is fixed.
return { return {
'id': token_id, 'id': token_id,
'orth': token['word'], 'orth': token['word'],
@ -121,7 +124,6 @@ def format_token(token_id, token, ner, senses):
'head': head, 'head': head,
'dep': token['dep'], 'dep': token['dep'],
'ner': ner, 'ner': ner,
'ssenses': senses[token_id]
} }