💫 Fix issue #3839: Incorrect entity IDs from Matcher with operators (#3949)

* Add regression test for issue #3541

* Add comment on bugfix

* Remove incorrect test

* Un-xfail test
This commit is contained in:
Matthew Honnibal 2019-07-11 12:55:11 +02:00 committed by GitHub
parent e19f4ee719
commit b40b4c2c31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 5 deletions

View File

@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None,
cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
# There have been a few bugs here.
# The code was originally designed to always have pattern[1].attrs.value
# be the ent_id when we get to the end of a pattern. However, Issue #2671
# showed this wasn't the case when we had a reject-and-continue before a
# match. I still don't really understand what's going on here, but this
# workaround does resolve the issue.
while pattern.attrs.attr != ID and \
(pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0):
# match.
# The patch to #2671 was wrong though, which came up in #3839.
while pattern.attrs.attr != ID:
pattern += 1
return pattern.attrs.value

View File

@ -6,7 +6,6 @@ from spacy.matcher import Matcher
from spacy.tokens import Doc
@pytest.mark.xfail
def test_issue3839(en_vocab):
"""Test that match IDs returned by the matcher are correct, are in the string """
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])