mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Add test for ent_iob during span merge
This commit is contained in:
parent
070b6c6495
commit
cbd2794be0
|
@ -2,6 +2,8 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..util import get_doc
|
||||
from ...vocab import Vocab
|
||||
from ...tokens import Doc
|
||||
|
||||
import pytest
|
||||
|
||||
|
@ -95,6 +97,21 @@ def test_spans_entity_merge(en_tokenizer):
|
|||
assert len(doc) == 15
|
||||
|
||||
|
||||
def test_spans_entity_merge_iob():
|
||||
# Test entity IOB stays consistent after merging
|
||||
words = ["a", "b", "c", "d", "e"]
|
||||
doc = Doc(Vocab(), words=words)
|
||||
doc.ents = [(doc.vocab.strings.add('ent-abc'), 0, 3),
|
||||
(doc.vocab.strings.add('ent-d'), 3, 4)]
|
||||
assert doc[0].ent_iob_ == "B"
|
||||
assert doc[1].ent_iob_ == "I"
|
||||
assert doc[2].ent_iob_ == "I"
|
||||
assert doc[3].ent_iob_ == "B"
|
||||
doc[0:1].merge()
|
||||
assert doc[0].ent_iob_ == "B"
|
||||
assert doc[1].ent_iob_ == "I"
|
||||
|
||||
|
||||
def test_spans_sentence_update_after_merge(en_tokenizer):
|
||||
text = "Stewart Lee is a stand up comedian. He lives in England and loves Joe Pasquale."
|
||||
heads = [1, 1, 0, 1, 2, -1, -4, -5, 1, 0, -1, -1, -3, -4, 1, -2, -7]
|
||||
|
|
Loading…
Reference in New Issue
Block a user