mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Make test for #1945 more precise
This commit is contained in:
parent
c087a14380
commit
fd9fd275c5
|
@ -1,17 +1,19 @@
|
||||||
'''Test regression in PhraseMatcher introduced in v2.0.6.'''
|
'''Test regression in Matcher introduced in v2.0.6.'''
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from ...lang.en import English
|
from ...vocab import Vocab
|
||||||
from ...matcher import PhraseMatcher
|
from ...tokens import Doc
|
||||||
|
from ...matcher import Matcher
|
||||||
|
|
||||||
@pytest.mark.xfail
|
@pytest.mark.xfail
|
||||||
def test_issue1945():
|
def test_issue1945():
|
||||||
text = "deep machine learning"
|
text = "a a a"
|
||||||
mw_list = ["machine learning", "deep blue", "planing machine"]
|
matcher = Matcher(Vocab())
|
||||||
|
matcher.add('MWE', None, [{'orth': 'a'}, {'orth': 'a'}])
|
||||||
nlp = English()
|
doc = Doc(matcher.vocab, words=['a', 'a', 'a'])
|
||||||
matcher = PhraseMatcher(nlp.vocab)
|
matches = matcher(doc)
|
||||||
matcher.add("MWE", None, *[nlp.tokenizer(item) for item in mw_list])
|
# We should see two overlapping matches here
|
||||||
|
assert len(matches) == 2
|
||||||
assert len(matcher(nlp(text))) == 1
|
assert matches[0][1:] == (0, 2)
|
||||||
|
assert matches[1][1:] == (1, 3)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user