diff --git a/spacy/tests/regression/test_issue1547.py b/spacy/tests/regression/test_issue1547.py new file mode 100644 index 000000000..aa7076dd3 --- /dev/null +++ b/spacy/tests/regression/test_issue1547.py @@ -0,0 +1,17 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + +from ...vocab import Vocab +from ...tokens import Doc, Span + + +@pytest.mark.xfail +def test_issue1547(): + """Test that entity labels still match after merging tokens.""" + words = ['\n', 'worda', '.', '\n', 'wordb', '-', 'Biosphere', '2', '-', ' \n'] + doc = Doc(Vocab(), words=words) + doc.ents = [Span(doc, 6, 8, label=doc.vocab.strings['PRODUCT'])] + doc[5:7].merge() + assert [ent.text for ent in doc.ents]