Improve matcher tests re issue #3328

2025-07-16 03:02:41 +03:00 · 2019-02-27 10:25:56 +01:00 · 2019-02-27 10:25:56 +01:00 · 2d3ce89b78
commit 2d3ce89b78
parent 8d6954e0e7
1 changed files with 15 additions and 1 deletions
--- a/spacy/tests/matcher/test_matcher_logic.py
+++ b/spacy/tests/matcher/test_matcher_logic.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 import pytest
 import re
 from spacy.matcher import Matcher
-from spacy.tokens import Doc
+from spacy.tokens import Doc, Span


 pattern1 = [{"ORTH": "A", "OP": "1"}, {"ORTH": "A", "OP": "*"}]
@ -129,3 +129,17 @@ def test_matcher_end_zero_plus(en_vocab):
    assert len(matcher(nlp("a b c"))) == 2
    assert len(matcher(nlp("a b b c"))) == 3
    assert len(matcher(nlp("a b b"))) == 3
+
+
+def test_matcher_sets_return_correct_tokens(en_vocab):
+    matcher = Matcher(en_vocab)
+    patterns = [
+        [{'LOWER': {'IN': ["zero"]}}],
+        [{'LOWER': {'IN': ["one"]}}],
+        [{'LOWER': {'IN': ["two"]}}],
+    ]
+    matcher.add('TEST', None, *patterns)
+    doc = Doc(en_vocab, words="zero one two three".split())
+    matches = matcher(doc)
+    texts = [Span(doc, s, e, label=L).text for L, s, e in matches]
+    assert texts == ['zero', 'one', 'two']