From ee97fd3cb4b3fe5d62a28e8cd936073d11678bbf Mon Sep 17 00:00:00 2001 From: ines Date: Sat, 11 Nov 2017 00:14:03 +0100 Subject: [PATCH] Add regression test for #1547 --- spacy/tests/regression/test_issue1547.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 spacy/tests/regression/test_issue1547.py diff --git a/spacy/tests/regression/test_issue1547.py b/spacy/tests/regression/test_issue1547.py new file mode 100644 index 000000000..aa7076dd3 --- /dev/null +++ b/spacy/tests/regression/test_issue1547.py @@ -0,0 +1,17 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + +from ...vocab import Vocab +from ...tokens import Doc, Span + + +@pytest.mark.xfail +def test_issue1547(): + """Test that entity labels still match after merging tokens.""" + words = ['\n', 'worda', '.', '\n', 'wordb', '-', 'Biosphere', '2', '-', ' \n'] + doc = Doc(Vocab(), words=words) + doc.ents = [Span(doc, 6, 8, label=doc.vocab.strings['PRODUCT'])] + doc[5:7].merge() + assert [ent.text for ent in doc.ents]