💫 Fix issue #3839: Incorrect entity IDs from Matcher with operators (#3949)

* Add regression test for issue #3541 * Add comment on bugfix * Remove incorrect test * Un-xfail test
2026-03-06 21:01:34 +03:00 · 2019-07-11 12:55:11 +02:00 · 2019-07-11 12:55:11 +02:00 · b40b4c2c31
commit b40b4c2c31
parent e19f4ee719
2 changed files with 4 additions and 5 deletions
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None,


 cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
+    # There have been a few bugs here.
    # The code was originally designed to always have pattern[1].attrs.value
    # be the ent_id when we get to the end of a pattern. However, Issue #2671
    # showed this wasn't the case when we had a reject-and-continue before a
-    # match. I still don't really understand what's going on here, but this
-    # workaround does resolve the issue.
-    while pattern.attrs.attr != ID and \
-            (pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0):
+    # match.
+    # The patch to #2671 was wrong though, which came up in #3839.
+    while pattern.attrs.attr != ID:
        pattern += 1
    return pattern.attrs.value

--- a/spacy/tests/regression/test_issue3839.py
+++ b/spacy/tests/regression/test_issue3839.py
@ -6,7 +6,6 @@ from spacy.matcher import Matcher
 from spacy.tokens import Doc


-@pytest.mark.xfail
 def test_issue3839(en_vocab):
    """Test that match IDs returned by the matcher are correct, are in the string """
    doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])