Add option to control how missing entities are handled when getting NER tags

This commit is contained in:
Matthew Honnibal 2017-07-29 21:58:37 +02:00
parent aff325b7e0
commit ec63f4fe7b

View File

@ -483,7 +483,7 @@ cdef class GoldParse:
return not nonproj.is_nonproj_tree(self.heads) return not nonproj.is_nonproj_tree(self.heads)
def biluo_tags_from_offsets(doc, entities): def biluo_tags_from_offsets(doc, entities, missing='O'):
"""Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out """Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out
scheme (BILUO). scheme (BILUO).
@ -535,7 +535,7 @@ def biluo_tags_from_offsets(doc, entities):
if i in entity_chars: if i in entity_chars:
break break
else: else:
biluo[token.i] = 'O' biluo[token.i] = missing
return biluo return biluo