mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Update regression test for variable-length pattern problem in the matcher.
This commit is contained in:
parent
6d67213b80
commit
4e75e74247
|
@ -2,6 +2,7 @@
|
|||
Test Matcher matches with '*' operator and Boolean flag
|
||||
'''
|
||||
from __future__ import unicode_literals
|
||||
from __future__ import print_function
|
||||
import pytest
|
||||
|
||||
from ...matcher import Matcher
|
||||
|
@ -10,9 +11,30 @@ from ...attrs import LOWER
|
|||
from ...tokens import Doc
|
||||
|
||||
|
||||
def test_basic_case():
|
||||
matcher = Matcher(Vocab(
|
||||
lex_attr_getters={LOWER: lambda string: string.lower()}))
|
||||
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
|
||||
matcher.add_pattern(
|
||||
"FarAway",
|
||||
[
|
||||
{LOWER: "bob"},
|
||||
{'OP': '*', LOWER: 'and'},
|
||||
{LOWER: 'frank'}
|
||||
])
|
||||
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
|
||||
match = matcher(doc)
|
||||
assert len(match) == 1
|
||||
ent_id, label, start, end = match[0]
|
||||
assert start == 0
|
||||
assert end == 4
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_issue850():
|
||||
matcher = Matcher(Vocab())
|
||||
'''The problem here is that the variable-length pattern matches the
|
||||
succeeding token. We then don't handle the ambiguity correctly.'''
|
||||
matcher = Matcher(Vocab(
|
||||
lex_attr_getters={LOWER: lambda string: string.lower()}))
|
||||
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
|
||||
matcher.add_pattern(
|
||||
"FarAway",
|
||||
|
@ -21,9 +43,9 @@ def test_issue850():
|
|||
{'OP': '*', IS_ANY_TOKEN: True},
|
||||
{LOWER: 'frank'}
|
||||
])
|
||||
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'cat', 'frank'])
|
||||
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
|
||||
match = matcher(doc)
|
||||
assert len(match) == 1
|
||||
start, end, label, ent_id = match
|
||||
ent_id, label, start, end = match[0]
|
||||
assert start == 0
|
||||
assert end == 4
|
||||
|
|
Loading…
Reference in New Issue
Block a user