* When encoding POS/NER tags, accept '-' as a missing value

This commit is contained in:
Matthew Honnibal 2014-11-07 04:42:31 +11:00
parent 949a6245f9
commit 68d1cdad62

View File

@ -36,6 +36,8 @@ def read_gold(file_, tag_list, col):
return golds return golds
def _encode_pos(tag, tag_ids, tag_list): def _encode_pos(tag, tag_ids, tag_list):
if tag == '-':
return 0
if tag not in tag_ids: if tag not in tag_ids:
tag_ids[tag] = len(tag_list) tag_ids[tag] = len(tag_list)
tag_list.append(tag) tag_list.append(tag)