Make test for #1945 more precise

This commit is contained in:
Matthew Honnibal 2018-02-07 02:06:11 +01:00
parent c087a14380
commit fd9fd275c5

View File

@ -1,17 +1,19 @@
'''Test regression in PhraseMatcher introduced in v2.0.6.''' '''Test regression in Matcher introduced in v2.0.6.'''
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest
from ...lang.en import English from ...vocab import Vocab
from ...matcher import PhraseMatcher from ...tokens import Doc
from ...matcher import Matcher
@pytest.mark.xfail @pytest.mark.xfail
def test_issue1945(): def test_issue1945():
text = "deep machine learning" text = "a a a"
mw_list = ["machine learning", "deep blue", "planing machine"] matcher = Matcher(Vocab())
matcher.add('MWE', None, [{'orth': 'a'}, {'orth': 'a'}])
nlp = English() doc = Doc(matcher.vocab, words=['a', 'a', 'a'])
matcher = PhraseMatcher(nlp.vocab) matches = matcher(doc)
matcher.add("MWE", None, *[nlp.tokenizer(item) for item in mw_list]) # We should see two overlapping matches here
assert len(matches) == 2
assert len(matcher(nlp(text))) == 1 assert matches[0][1:] == (0, 2)
assert matches[1][1:] == (1, 3)