Update regression test for variable-length pattern problem in the matcher.

2025-12-23 10:03:15 +03:00 · 2017-03-07 16:08:32 +01:00 · 2017-03-07 16:08:32 +01:00 · 4e75e74247
commit 4e75e74247
parent 6d67213b80
1 changed files with 25 additions and 3 deletions
--- a/spacy/tests/regression/test_issue850.py
+++ b/spacy/tests/regression/test_issue850.py
@ -2,6 +2,7 @@
 Test Matcher matches with '*' operator and Boolean flag
 '''
 from __future__ import unicode_literals
 from __future__ import print_function
 import pytest
 from ...matcher import Matcher
@ -10,9 +11,30 @@ from ...attrs import LOWER
 from ...tokens import Doc
 def test_basic_case():
    matcher = Matcher(Vocab(
                lex_attr_getters={LOWER: lambda string: string.lower()}))
    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
    matcher.add_pattern(
        "FarAway",
        [
            {LOWER: "bob"},
            {'OP': '*', LOWER: 'and'},
            {LOWER: 'frank'}
        ])
    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
    match = matcher(doc)
    assert len(match) == 1
    ent_id, label, start, end = match[0]
    assert start == 0
    assert end == 4
@pytest.mark.xfail
 def test_issue850():
-    matcher = Matcher(Vocab())
+    '''The problem here is that the variable-length pattern matches the
    succeeding token. We then don't handle the ambiguity correctly.'''
    matcher = Matcher(Vocab(
                lex_attr_getters={LOWER: lambda string: string.lower()}))
    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
    matcher.add_pattern(
        "FarAway",
@ -21,9 +43,9 @@ def test_issue850():
            {'OP': '*', IS_ANY_TOKEN: True},
            {LOWER: 'frank'}
        ])
-    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'cat', 'frank'])
+    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
    match = matcher(doc)
    assert len(match) == 1
-    start, end, label, ent_id = match 
+    ent_id, label, start, end = match[0]
    assert start == 0
    assert end == 4