Fix ZERO_PLUS operator

This commit is contained in:
Matthew Honnibal 2018-02-12 12:28:03 +01:00
parent 9115c3ba0a
commit 1b01685f47

View File

@ -68,13 +68,11 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc):
cache = PreshMap() cache = PreshMap()
nexts.clear() nexts.clear()
for j in range(curr_states.size()): for j in range(curr_states.size()):
action = get_action(curr_states[j], &doc.c[i], extra_attrs[i], cache)
transition(matches, nexts, transition(matches, nexts,
action, curr_states[j], i) curr_states[j], i, doc, extra_attrs, cache)
for j in range(init_states.size()): for j in range(init_states.size()):
action = get_action(init_states[j], &doc.c[i], extra_attrs[i], cache)
transition(matches, nexts, transition(matches, nexts,
action, init_states[j], i) init_states[j], i, doc, extra_attrs, cache)
nexts, curr_states = curr_states, nexts nexts, curr_states = curr_states, nexts
# Filter out matches that have a longer equivalent. # Filter out matches that have a longer equivalent.
longest_matches = {} longest_matches = {}
@ -89,19 +87,26 @@ cdef find_matches(TokenPatternC** patterns, int n, Doc doc):
cdef void transition(vector[MatchC]& matches, vector[PatternStateC]& nexts, cdef void transition(vector[MatchC]& matches, vector[PatternStateC]& nexts,
ActionC action, PatternStateC state, int token) except *: PatternStateC state, int token,
Doc doc, const attr_t* const* extra_attrs, PreshMap cache) except *:
action = get_action(state, &doc.c[token], extra_attrs[token], cache)
if state.start == -1: if state.start == -1:
state.start = token state.start = token
if action.is_match: if action.is_match:
ent_id = state.state[1].attrs.value ent_id = state.state[1].attrs.value
matches.push_back( matches.push_back(
MatchC(pattern_id=ent_id, start=state.start, end=token+1)) MatchC(pattern_id=ent_id, start=state.start, end=token+1))
if action.keep_state:
nexts.push_back(PatternStateC(start=state.start, state=state.state,
last_action=action))
if action.advance_state: if action.advance_state:
nexts.push_back(PatternStateC(start=state.start, nexts.push_back(PatternStateC(start=state.start,
state=state.state+1, last_action=action)) state=state.state+1, last_action=action))
cdef PatternStateC next_state
if action.keep_state and token < doc.length:
# Keeping the state needs to not consume a token, so we call transition
# with the next state
next_state = PatternStateC(start=state.start, state=state.state+1,
last_action=action)
transition(matches, nexts, next_state, token, doc, extra_attrs, cache)
cdef ActionC get_action(PatternStateC state, const TokenC* token, const attr_t* extra_attrs, cdef ActionC get_action(PatternStateC state, const TokenC* token, const attr_t* extra_attrs,