mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Add 1 operator to matcher, and make sure open patterns are closed at end of document. Closes Issue #766
This commit is contained in:
parent
f028f8ad28
commit
8f94897d07
|
@ -138,7 +138,7 @@ cdef int get_action(const TokenPatternC* pattern, const TokenC* token) nogil:
|
|||
def _convert_strings(token_specs, string_store):
|
||||
# Support 'syntactic sugar' operator '+', as combination of ONE, ZERO_PLUS
|
||||
operators = {'!': (ZERO,), '*': (ZERO_PLUS,), '+': (ONE, ZERO_PLUS),
|
||||
'?': (ZERO_ONE,)}
|
||||
'?': (ZERO_ONE,), '1': (ONE,)}
|
||||
tokens = []
|
||||
op = ONE
|
||||
for spec in token_specs:
|
||||
|
@ -150,7 +150,7 @@ def _convert_strings(token_specs, string_store):
|
|||
ops = operators[value]
|
||||
else:
|
||||
raise KeyError(
|
||||
"Unknown operator. Options: %s" % ', '.join(operators.keys()))
|
||||
"Unknown operator '%s'. Options: %s" % (value, ', '.join(operators.keys())))
|
||||
if isinstance(attr, basestring):
|
||||
attr = attrs.IDS.get(attr.upper())
|
||||
if isinstance(value, basestring):
|
||||
|
@ -418,6 +418,22 @@ cdef class Matcher:
|
|||
match = acceptor(doc, ent_id, label, start, end)
|
||||
if match:
|
||||
matches.append(match)
|
||||
# Look for open patterns that are actually satisfied
|
||||
for state in partials:
|
||||
while state.second.quantifier in (ZERO, ZERO_PLUS):
|
||||
state.second += 1
|
||||
if state.second.nr_attr == 0:
|
||||
start = state.first
|
||||
end = len(doc)
|
||||
ent_id = state.second.attrs[0].value
|
||||
label = state.second.attrs[0].value
|
||||
acceptor = self._acceptors.get(ent_id)
|
||||
if acceptor is None:
|
||||
matches.append((ent_id, label, start, end))
|
||||
else:
|
||||
match = acceptor(doc, ent_id, label, start, end)
|
||||
if match:
|
||||
matches.append(match)
|
||||
for i, (ent_id, label, start, end) in enumerate(matches):
|
||||
on_match = self._callbacks.get(ent_id)
|
||||
if on_match is not None:
|
||||
|
|
|
@ -105,3 +105,21 @@ def test_matcher_match_zero_plus(matcher):
|
|||
matcher.add('Quote', '', {}, [pattern])
|
||||
doc = get_doc(matcher.vocab, words)
|
||||
assert len(matcher(doc)) == 1
|
||||
|
||||
def test_matcher_match_one_plus(matcher):
|
||||
control = Matcher(matcher.vocab)
|
||||
control.add_pattern('BasicPhilippe',
|
||||
[{'ORTH': 'Philippe'}], label=321)
|
||||
|
||||
doc = get_doc(control.vocab, ['Philippe', 'Philippe'])
|
||||
|
||||
m = control(doc)
|
||||
assert len(m) == 2
|
||||
matcher.add_pattern('KleenePhilippe',
|
||||
[
|
||||
{'ORTH': 'Philippe', 'OP': '1'},
|
||||
{'ORTH': 'Philippe', 'OP': '+'}], label=321)
|
||||
m = matcher(doc)
|
||||
assert len(m) == 1
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user