Add biluo_tags_from_doc function

This commit is contained in:
Matthew Honnibal 2020-06-13 23:10:54 +02:00
parent caa7508725
commit 3a0bbcfb4c

View File

@ -47,6 +47,14 @@ def _consume_ent(tags):
return [start] + middle + [end] return [start] + middle + [end]
def biluo_tags_from_doc(doc, missing="O"):
return biluo_tags_from_offsets(
doc,
[(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents],
missing=missing
)
def biluo_tags_from_offsets(doc, entities, missing="O"): def biluo_tags_from_offsets(doc, entities, missing="O"):
"""Encode labelled spans into per-token tags, using the """Encode labelled spans into per-token tags, using the
Begin/In/Last/Unit/Out scheme (BILUO). Begin/In/Last/Unit/Out scheme (BILUO).