Add test for ent_iob during span merge

This commit is contained in:
Matthew Honnibal 2018-03-25 22:16:19 +02:00
parent 070b6c6495
commit cbd2794be0

View File

@ -2,6 +2,8 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from ..util import get_doc from ..util import get_doc
from ...vocab import Vocab
from ...tokens import Doc
import pytest import pytest
@ -95,6 +97,21 @@ def test_spans_entity_merge(en_tokenizer):
assert len(doc) == 15 assert len(doc) == 15
def test_spans_entity_merge_iob():
# Test entity IOB stays consistent after merging
words = ["a", "b", "c", "d", "e"]
doc = Doc(Vocab(), words=words)
doc.ents = [(doc.vocab.strings.add('ent-abc'), 0, 3),
(doc.vocab.strings.add('ent-d'), 3, 4)]
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
assert doc[2].ent_iob_ == "I"
assert doc[3].ent_iob_ == "B"
doc[0:1].merge()
assert doc[0].ent_iob_ == "B"
assert doc[1].ent_iob_ == "I"
def test_spans_sentence_update_after_merge(en_tokenizer): def test_spans_sentence_update_after_merge(en_tokenizer):
text = "Stewart Lee is a stand up comedian. He lives in England and loves Joe Pasquale." text = "Stewart Lee is a stand up comedian. He lives in England and loves Joe Pasquale."
heads = [1, 1, 0, 1, 2, -1, -4, -5, 1, 0, -1, -1, -3, -4, 1, -2, -7] heads = [1, 1, 0, 1, 2, -1, -4, -5, 1, 0, -1, -1, -3, -4, 1, -2, -7]