Merge pull request #357 from wbwseeker/german_ner

German ner
This commit is contained in:
Matthew Honnibal 2016-05-02 23:39:34 +10:00
commit 6e1f1c4b9e

View File

@ -151,9 +151,9 @@ def read_json_file(loc, docs_filter=None):
for i, token in enumerate(sent['tokens']): for i, token in enumerate(sent['tokens']):
words.append(token['orth']) words.append(token['orth'])
ids.append(i) ids.append(i)
tags.append(token['tag']) tags.append(token.get('tag','-'))
heads.append(token['head'] + i) heads.append(token.get('head',0) + i)
labels.append(token['dep']) labels.append(token.get('dep',''))
# Ensure ROOT label is case-insensitive # Ensure ROOT label is case-insensitive
if labels[-1].lower() == 'root': if labels[-1].lower() == 'root':
labels[-1] = 'ROOT' labels[-1] = 'ROOT'