💫 Fix issue #3839: Incorrect entity IDs from Matcher with operators (#3949)

* Add regression test for issue #3541

* Add comment on bugfix

* Remove incorrect test

* Un-xfail test
This commit is contained in:
Matthew Honnibal 2019-07-11 12:55:11 +02:00 committed by GitHub
parent e19f4ee719
commit b40b4c2c31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 5 deletions

View File

@ -262,13 +262,13 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc, extensions=None,
cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil: cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
# There have been a few bugs here.
# The code was originally designed to always have pattern[1].attrs.value # The code was originally designed to always have pattern[1].attrs.value
# be the ent_id when we get to the end of a pattern. However, Issue #2671 # be the ent_id when we get to the end of a pattern. However, Issue #2671
# showed this wasn't the case when we had a reject-and-continue before a # showed this wasn't the case when we had a reject-and-continue before a
# match. I still don't really understand what's going on here, but this # match.
# workaround does resolve the issue. # The patch to #2671 was wrong though, which came up in #3839.
while pattern.attrs.attr != ID and \ while pattern.attrs.attr != ID:
(pattern.nr_attr > 0 or pattern.nr_extra_attr > 0 or pattern.nr_py > 0):
pattern += 1 pattern += 1
return pattern.attrs.value return pattern.attrs.value

View File

@ -6,7 +6,6 @@ from spacy.matcher import Matcher
from spacy.tokens import Doc from spacy.tokens import Doc
@pytest.mark.xfail
def test_issue3839(en_vocab): def test_issue3839(en_vocab):
"""Test that match IDs returned by the matcher are correct, are in the string """ """Test that match IDs returned by the matcher are correct, are in the string """
doc = Doc(en_vocab, words=["terrific", "group", "of", "people"]) doc = Doc(en_vocab, words=["terrific", "group", "of", "people"])