Update regression test for variable-length pattern problem in the matcher.

2025-11-08 03:47:39 +03:00 · 2017-03-07 16:08:32 +01:00 · 2017-03-07 16:08:32 +01:00 · 4e75e74247
commit 4e75e74247
parent 6d67213b80
1 changed files with 25 additions and 3 deletions
--- a/spacy/tests/regression/test_issue850.py
+++ b/spacy/tests/regression/test_issue850.py
@ -2,6 +2,7 @@
 Test Matcher matches with '*' operator and Boolean flag
 '''
 from __future__ import unicode_literals
+from __future__ import print_function
 import pytest

 from ...matcher import Matcher
@ -10,9 +11,30 @@ from ...attrs import LOWER
 from ...tokens import Doc


+def test_basic_case():
+    matcher = Matcher(Vocab(
+                lex_attr_getters={LOWER: lambda string: string.lower()}))
+    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
+    matcher.add_pattern(
+        "FarAway",
+        [
+            {LOWER: "bob"},
+            {'OP': '*', LOWER: 'and'},
+            {LOWER: 'frank'}
+        ])
+    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
+    match = matcher(doc)
+    assert len(match) == 1
+    ent_id, label, start, end = match[0]
+    assert start == 0
+    assert end == 4
+
@pytest.mark.xfail
 def test_issue850():
-    matcher = Matcher(Vocab())
+    '''The problem here is that the variable-length pattern matches the
+    succeeding token. We then don't handle the ambiguity correctly.'''
+    matcher = Matcher(Vocab(
+                lex_attr_getters={LOWER: lambda string: string.lower()}))
    IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
    matcher.add_pattern(
        "FarAway",
@ -21,9 +43,9 @@ def test_issue850():
            {'OP': '*', IS_ANY_TOKEN: True},
            {LOWER: 'frank'}
        ])
-    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'cat', 'frank'])
+    doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
    match = matcher(doc)
    assert len(match) == 1
-    start, end, label, ent_id = match 
+    ent_id, label, start, end = match[0]
    assert start == 0
    assert end == 4