mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-03 19:08:06 +03:00
Have the matcher return character offsets, to handle the match better.
This commit is contained in:
parent
9b60186266
commit
049c937540
|
@ -291,8 +291,8 @@ cdef class Matcher:
|
||||||
elif action == ACCEPT:
|
elif action == ACCEPT:
|
||||||
# TODO: What to do about patterns starting with ZERO? Need to
|
# TODO: What to do about patterns starting with ZERO? Need to
|
||||||
# adjust the start position.
|
# adjust the start position.
|
||||||
start = state.first
|
start = doc.c[state.first].idx
|
||||||
end = token_i+1
|
end = doc.c[token_i].idx + doc.c[token_i].lex.length
|
||||||
ent_id = state.second[1].attrs[0].value
|
ent_id = state.second[1].attrs[0].value
|
||||||
label = state.second[1].attrs[1].value
|
label = state.second[1].attrs[1].value
|
||||||
acceptor = self._acceptors.get(ent_id)
|
acceptor = self._acceptors.get(ent_id)
|
||||||
|
@ -319,11 +319,15 @@ cdef class Matcher:
|
||||||
state.second = pattern + 1
|
state.second = pattern + 1
|
||||||
partials.push_back(state)
|
partials.push_back(state)
|
||||||
elif action == ACCEPT:
|
elif action == ACCEPT:
|
||||||
start = token_i
|
start = token.idx
|
||||||
end = token_i+1
|
end = token.idx + token.lex.length
|
||||||
ent_id = pattern[1].attrs[0].value
|
ent_id = pattern[1].attrs[0].value
|
||||||
label = pattern[1].attrs[1].value
|
label = pattern[1].attrs[1].value
|
||||||
if acceptor is None or acceptor(doc, ent_id, label, start, end):
|
acceptor = self._acceptors.get(ent_id)
|
||||||
|
if acceptor is not None:
|
||||||
|
match = acceptor(doc, ent_id, label, start, end)
|
||||||
|
if match:
|
||||||
|
ent_id, label, start, end = match
|
||||||
matches.append((ent_id, label, start, end))
|
matches.append((ent_id, label, start, end))
|
||||||
for i, (ent_id, label, start, end) in enumerate(matches):
|
for i, (ent_id, label, start, end) in enumerate(matches):
|
||||||
on_match = self._callbacks.get(ent_id)
|
on_match = self._callbacks.get(ent_id)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user