mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 23:33:40 +03:00
Add bandaid solution to the 'shadowing' problem in #864
This commit is contained in:
parent
d8a2506023
commit
2534cd57d7
|
@ -71,6 +71,11 @@ cdef enum action_t:
|
||||||
ADVANCE_ZERO
|
ADVANCE_ZERO
|
||||||
PANIC
|
PANIC
|
||||||
|
|
||||||
|
# A "match expression" conists of one or more token patterns
|
||||||
|
# Each token pattern consists of a quantifier and 0+ (attr, value) pairs.
|
||||||
|
# A state is an (int, pattern pointer) pair, where the int is the start
|
||||||
|
# position, and the pattern pointer shows where we're up to
|
||||||
|
# in the pattern.
|
||||||
|
|
||||||
cdef struct AttrValueC:
|
cdef struct AttrValueC:
|
||||||
attr_id_t attr
|
attr_id_t attr
|
||||||
|
@ -130,7 +135,10 @@ cdef int get_action(const TokenPatternC* pattern, const TokenC* token) nogil:
|
||||||
elif pattern.quantifier in (ONE, ZERO_ONE):
|
elif pattern.quantifier in (ONE, ZERO_ONE):
|
||||||
return ACCEPT if (pattern+1).nr_attr == 0 else ADVANCE
|
return ACCEPT if (pattern+1).nr_attr == 0 else ADVANCE
|
||||||
elif pattern.quantifier == ZERO_PLUS:
|
elif pattern.quantifier == ZERO_PLUS:
|
||||||
return REPEAT
|
# This is a bandaid over the 'shadowing' problem described here:
|
||||||
|
# https://github.com/explosion/spaCy/issues/864
|
||||||
|
next_action = get_action(pattern+1, token)
|
||||||
|
return REPEAT if next_action is REJECT else next_action
|
||||||
else:
|
else:
|
||||||
return PANIC
|
return PANIC
|
||||||
|
|
||||||
|
|
|
@ -111,6 +111,16 @@ def test_matcher_empty_dict(en_vocab):
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
assert matches[0][1:] == (0, 2)
|
assert matches[0][1:] == (0, 2)
|
||||||
|
|
||||||
|
def test_matcher_operator_shadow(en_vocab):
|
||||||
|
matcher = Matcher(en_vocab)
|
||||||
|
abc = ["a", "b", "c"]
|
||||||
|
doc = get_doc(matcher.vocab, abc)
|
||||||
|
matcher.add('A.C', None, [{'ORTH': 'a'},
|
||||||
|
{"IS_ALPHA": True, "OP": "+"},
|
||||||
|
{'ORTH': 'c'}])
|
||||||
|
matches = matcher(doc)
|
||||||
|
assert len(matches) == 1
|
||||||
|
assert matches[0][1:] == (0, 3)
|
||||||
|
|
||||||
def test_matcher_phrase_matcher(en_vocab):
|
def test_matcher_phrase_matcher(en_vocab):
|
||||||
words = ["Google", "Now"]
|
words = ["Google", "Now"]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user