Minor formatting

This commit is contained in:
Adriane Boyd 2022-08-03 14:35:04 +02:00
parent ed889db5ee
commit 102fb8a8a1

View File

@ -438,7 +438,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
# 'MatchAlignmentC' maps 'original token index of current pattern' to 'current matching length'
if with_alignments != 0:
align_states[q].push_back(MatchAlignmentC(states[q].pattern.token_idx, states[q].length))
if action in [RETRY_EXTEND, RETRY_OR_EXTEND]:
if action in (RETRY_EXTEND, RETRY_OR_EXTEND):
# This handles the 'extend'
new_states.push_back(
PatternStateC(pattern=states[q].pattern, start=state.start,
@ -511,7 +511,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
elif action == MATCH_EXTEND:
matches.push_back(
MatchC(pattern_id=ent_id, start=state.start,
length=state.length))
length=state.length))
# `align_matches` always corresponds to `matches` 1:1
if with_alignments != 0:
align_matches.push_back(align_states[q])
@ -669,72 +669,73 @@ cdef action_t get_action(PatternStateC state,
is_match = not is_match
quantifier = ONE
if quantifier == ONE:
if is_match and is_final:
# Yes, final: 1000
return MATCH
elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final:
# Yes, non-final: 1100
# Modification for +?:
# Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT
# and other actions from other tokens to produce a match.
# is_non_greedy_plus() verifies that the current state's pattern is +?
# has_star_tail() verifies the remaining pattern tokens are either * or *?,
# so that it is valid for the current match to exist.
# TODO if this impacts the performance, "ONE_MINUS" could be created
return MATCH_ADVANCE
elif is_match and not is_final:
# Yes, non-final: 0100
return ADVANCE
elif not is_match and is_final:
# No, final: 0000
return REJECT
else:
return REJECT
if is_match and is_final:
# Yes, final: 1000
return MATCH
elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final:
# Yes, non-final: 1100
# Modification for +?:
# Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT
# and other actions from other tokens to produce a match.
# is_non_greedy_plus() verifies that the current state's pattern is +?
# has_star_tail() verifies the remaining pattern tokens are either * or *?,
# so that it is valid for the current match to exist.
# TODO if this impacts the performance, "ONE_MINUS" could be created
return MATCH_ADVANCE
elif is_match and not is_final:
# Yes, non-final: 0100
return ADVANCE
elif not is_match and is_final:
# No, final: 0000
return REJECT
else:
return REJECT
elif quantifier == ZERO_PLUS:
if is_match and is_final:
# Yes, final: 1001
return MATCH_EXTEND
elif is_match and not is_final:
# Yes, non-final: 0011
return RETRY_EXTEND
elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!)
return MATCH_REJECT
else:
# No, non-final 0010
return RETRY
if is_match and is_final:
# Yes, final: 1001
return MATCH_EXTEND
elif is_match and not is_final:
# Yes, non-final: 0011
return RETRY_EXTEND
elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!)
return MATCH_REJECT
else:
# No, non-final 0010
return RETRY
elif quantifier == ZERO_MINUS:
if is_final or has_non_greedy_tail(state):
# Yes/No, final: 2000 (note: Don't include last token!)
return MATCH_REJECT
elif is_match:
# Yes, non-final: 0022
# If there is a match, further extensions are skipped so that the behaviour is non-greedy
# pattern: b*?b string: b b
# We do not extend on first b to exhibit non-greedy behaviour
# such that "b" is matched but "b b" is not matched
return RETRY_OR_EXTEND
else:
# No, non-final 0010
return RETRY
if is_final or has_non_greedy_tail(state):
# Yes/No, final: 2000 (note: Don't include last token!)
return MATCH_REJECT
elif is_match:
# Yes, non-final: 0022
# If there is a match, further extensions are skipped so that the behaviour is non-greedy
# pattern: b*?b string: b b
# We do not extend on first b to exhibit non-greedy behaviour
# such that "b" is matched but "b b" is not matched
return RETRY_OR_EXTEND
else:
# No, non-final 0010
return RETRY
elif quantifier == ZERO_ONE:
if is_match and is_final:
# Yes, final: 3000
# To cater for a pattern ending in "?", we need to add
# a match both with and without the last token
return MATCH_DOUBLE
elif is_match and not is_final:
# Yes, non-final: 0110
# We need both branches here, consider a pair like:
# pattern: .?b string: b
# If we 'ADVANCE' on the .?, we miss the match.
return RETRY_ADVANCE
elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!)
return MATCH_REJECT
else:
# No, non-final 0010
return RETRY
if is_match and is_final:
# Yes, final: 3000
# To cater for a pattern ending in "?", we need to add
# a match both with and without the last token
return MATCH_DOUBLE
elif is_match and not is_final:
# Yes, non-final: 0110
# We need both branches here, consider a pair like:
# pattern: .?b string: b
# If we 'ADVANCE' on the .?, we miss the match.
return RETRY_ADVANCE
elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!)
return MATCH_REJECT
else:
# No, non-final 0010
return RETRY
cdef int8_t get_is_match(PatternStateC state,
const TokenC* token, const attr_t* extra_attrs,
@ -785,7 +786,7 @@ cdef action_t cast_to_non_greedy_action(action_t action, action_t next_action, v
- MATCH_DOUBLE adds 2 matches, one with the last token and one without the token, casting the action to MATCH
- removes the match without the last token which is the match that ends with a '*?' pattern token.
- E.g. pattern = "a* b?" doc = "a b"
- MATCH_DOUBLE will add add the following 2 matches ['a' and 'a b']
- MATCH_DOUBLE will add the following 2 matches ['a' and 'a b']
- and casting MATCH_DOUBLE to MATCH removes 'a'.
"""
if action == RETRY_OR_EXTEND and next_action == MATCH: