allow None as BILUO annotation

This commit is contained in:
svlandeg 2020-06-16 15:30:05 +02:00
parent 44a0f9c2c8
commit 43d41d6bb6
3 changed files with 15 additions and 7 deletions

View File

@ -285,6 +285,10 @@ def _parse_ner_tags(biluo_or_offsets, vocab, words, spaces=None):
ent_iobs = [] ent_iobs = []
ent_types = [] ent_types = []
for iob_tag in biluo_to_iob(biluo): for iob_tag in biluo_to_iob(biluo):
if iob_tag is None:
ent_iobs.append("")
ent_types.append("")
else:
ent_iobs.append(iob_tag.split("-")[0]) ent_iobs.append(iob_tag.split("-")[0])
if iob_tag.startswith("I") or iob_tag.startswith("B"): if iob_tag.startswith("I") or iob_tag.startswith("B"):
ent_types.append(iob_tag.split("-", 1)[1]) ent_types.append(iob_tag.split("-", 1)[1])

View File

@ -15,6 +15,9 @@ def iob_to_biluo(tags):
def biluo_to_iob(tags): def biluo_to_iob(tags):
out = [] out = []
for tag in tags: for tag in tags:
if tag is None:
out.append(tag)
else:
tag = tag.replace("U-", "B-", 1).replace("L-", "I-", 1) tag = tag.replace("U-", "B-", 1).replace("L-", "I-", 1)
out.append(tag) out.append(tag)
return out return out

View File

@ -237,7 +237,8 @@ def test_biluo_spans(en_tokenizer):
def test_gold_ner_missing_tags(en_tokenizer): def test_gold_ner_missing_tags(en_tokenizer):
doc = en_tokenizer("I flew to Silicon Valley via London.") doc = en_tokenizer("I flew to Silicon Valley via London.")
biluo_tags = [None, "O", "O", "B-LOC", "L-LOC", "O", "U-GPE", "O"] biluo_tags = [None, "O", "O", "B-LOC", "L-LOC", "O", "U-GPE", "O"]
gold = GoldParse(doc, entities=biluo_tags) # noqa: F841 example = Example.from_dict(doc, {"entities": biluo_tags})
assert example.get_aligned("ENT_IOB") == [0, 2, 2, 3, 1, 2, 3, 2]
def test_iob_to_biluo(): def test_iob_to_biluo():