From 3a0bbcfb4ca31c89a8235e91d454ae5ceb6da424 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 13 Jun 2020 23:10:54 +0200 Subject: [PATCH] Add biluo_tags_from_doc function --- spacy/gold/iob_utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/spacy/gold/iob_utils.py b/spacy/gold/iob_utils.py index 2f0f116a1..6d16cf1a5 100644 --- a/spacy/gold/iob_utils.py +++ b/spacy/gold/iob_utils.py @@ -47,6 +47,14 @@ def _consume_ent(tags): return [start] + middle + [end] +def biluo_tags_from_doc(doc, missing="O"): + return biluo_tags_from_offsets( + doc, + [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents], + missing=missing + ) + + def biluo_tags_from_offsets(doc, entities, missing="O"): """Encode labelled spans into per-token tags, using the Begin/In/Last/Unit/Out scheme (BILUO).