Revert "Have the matcher return character offsets, to handle the match better."

This reverts commit 049c937540.
This commit is contained in:
Matthew Honnibal 2016-10-17 16:49:51 +02:00
parent 7d446e5094
commit 9258db788a

View File

@ -291,8 +291,8 @@ cdef class Matcher:
elif action == ACCEPT: elif action == ACCEPT:
# TODO: What to do about patterns starting with ZERO? Need to # TODO: What to do about patterns starting with ZERO? Need to
# adjust the start position. # adjust the start position.
start = doc.c[state.first].idx start = state.first
end = doc.c[token_i].idx + doc.c[token_i].lex.length end = token_i+1
ent_id = state.second[1].attrs[0].value ent_id = state.second[1].attrs[0].value
label = state.second[1].attrs[1].value label = state.second[1].attrs[1].value
acceptor = self._acceptors.get(ent_id) acceptor = self._acceptors.get(ent_id)
@ -319,16 +319,12 @@ cdef class Matcher:
state.second = pattern + 1 state.second = pattern + 1
partials.push_back(state) partials.push_back(state)
elif action == ACCEPT: elif action == ACCEPT:
start = token.idx start = token_i
end = token.idx + token.lex.length end = token_i+1
ent_id = pattern[1].attrs[0].value ent_id = pattern[1].attrs[0].value
label = pattern[1].attrs[1].value label = pattern[1].attrs[1].value
acceptor = self._acceptors.get(ent_id) if acceptor is None or acceptor(doc, ent_id, label, start, end):
if acceptor is not None: matches.append((ent_id, label, start, end))
match = acceptor(doc, ent_id, label, start, end)
if match:
ent_id, label, start, end = match
matches.append((ent_id, label, start, end))
for i, (ent_id, label, start, end) in enumerate(matches): for i, (ent_id, label, start, end) in enumerate(matches):
on_match = self._callbacks.get(ent_id) on_match = self._callbacks.get(ent_id)
if on_match is not None: if on_match is not None: