Update regression test for variable-length pattern problem in the matcher.

This commit is contained in:
Matthew Honnibal 2017-03-07 16:08:32 +01:00
parent 6d67213b80
commit 4e75e74247

View File

@ -2,6 +2,7 @@
Test Matcher matches with '*' operator and Boolean flag Test Matcher matches with '*' operator and Boolean flag
''' '''
from __future__ import unicode_literals from __future__ import unicode_literals
from __future__ import print_function
import pytest import pytest
from ...matcher import Matcher from ...matcher import Matcher
@ -10,9 +11,30 @@ from ...attrs import LOWER
from ...tokens import Doc from ...tokens import Doc
def test_basic_case():
matcher = Matcher(Vocab(
lex_attr_getters={LOWER: lambda string: string.lower()}))
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
matcher.add_pattern(
"FarAway",
[
{LOWER: "bob"},
{'OP': '*', LOWER: 'and'},
{LOWER: 'frank'}
])
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
match = matcher(doc)
assert len(match) == 1
ent_id, label, start, end = match[0]
assert start == 0
assert end == 4
@pytest.mark.xfail @pytest.mark.xfail
def test_issue850(): def test_issue850():
matcher = Matcher(Vocab()) '''The problem here is that the variable-length pattern matches the
succeeding token. We then don't handle the ambiguity correctly.'''
matcher = Matcher(Vocab(
lex_attr_getters={LOWER: lambda string: string.lower()}))
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True) IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
matcher.add_pattern( matcher.add_pattern(
"FarAway", "FarAway",
@ -21,9 +43,9 @@ def test_issue850():
{'OP': '*', IS_ANY_TOKEN: True}, {'OP': '*', IS_ANY_TOKEN: True},
{LOWER: 'frank'} {LOWER: 'frank'}
]) ])
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'cat', 'frank']) doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
match = matcher(doc) match = matcher(doc)
assert len(match) == 1 assert len(match) == 1
start, end, label, ent_id = match ent_id, label, start, end = match[0]
assert start == 0 assert start == 0
assert end == 4 assert end == 4