Minor formatting

This commit is contained in:
Adriane Boyd 2022-08-03 14:35:04 +02:00
parent ed889db5ee
commit 102fb8a8a1

View File

@ -438,7 +438,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
# 'MatchAlignmentC' maps 'original token index of current pattern' to 'current matching length' # 'MatchAlignmentC' maps 'original token index of current pattern' to 'current matching length'
if with_alignments != 0: if with_alignments != 0:
align_states[q].push_back(MatchAlignmentC(states[q].pattern.token_idx, states[q].length)) align_states[q].push_back(MatchAlignmentC(states[q].pattern.token_idx, states[q].length))
if action in [RETRY_EXTEND, RETRY_OR_EXTEND]: if action in (RETRY_EXTEND, RETRY_OR_EXTEND):
# This handles the 'extend' # This handles the 'extend'
new_states.push_back( new_states.push_back(
PatternStateC(pattern=states[q].pattern, start=state.start, PatternStateC(pattern=states[q].pattern, start=state.start,
@ -511,7 +511,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
elif action == MATCH_EXTEND: elif action == MATCH_EXTEND:
matches.push_back( matches.push_back(
MatchC(pattern_id=ent_id, start=state.start, MatchC(pattern_id=ent_id, start=state.start,
length=state.length)) length=state.length))
# `align_matches` always corresponds to `matches` 1:1 # `align_matches` always corresponds to `matches` 1:1
if with_alignments != 0: if with_alignments != 0:
align_matches.push_back(align_states[q]) align_matches.push_back(align_states[q])
@ -669,72 +669,73 @@ cdef action_t get_action(PatternStateC state,
is_match = not is_match is_match = not is_match
quantifier = ONE quantifier = ONE
if quantifier == ONE: if quantifier == ONE:
if is_match and is_final: if is_match and is_final:
# Yes, final: 1000 # Yes, final: 1000
return MATCH return MATCH
elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final: elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final:
# Yes, non-final: 1100 # Yes, non-final: 1100
# Modification for +?: # Modification for +?:
# Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT # Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT
# and other actions from other tokens to produce a match. # and other actions from other tokens to produce a match.
# is_non_greedy_plus() verifies that the current state's pattern is +? # is_non_greedy_plus() verifies that the current state's pattern is +?
# has_star_tail() verifies the remaining pattern tokens are either * or *?, # has_star_tail() verifies the remaining pattern tokens are either * or *?,
# so that it is valid for the current match to exist. # so that it is valid for the current match to exist.
# TODO if this impacts the performance, "ONE_MINUS" could be created # TODO if this impacts the performance, "ONE_MINUS" could be created
return MATCH_ADVANCE return MATCH_ADVANCE
elif is_match and not is_final: elif is_match and not is_final:
# Yes, non-final: 0100 # Yes, non-final: 0100
return ADVANCE return ADVANCE
elif not is_match and is_final: elif not is_match and is_final:
# No, final: 0000 # No, final: 0000
return REJECT return REJECT
else: else:
return REJECT return REJECT
elif quantifier == ZERO_PLUS: elif quantifier == ZERO_PLUS:
if is_match and is_final: if is_match and is_final:
# Yes, final: 1001 # Yes, final: 1001
return MATCH_EXTEND return MATCH_EXTEND
elif is_match and not is_final: elif is_match and not is_final:
# Yes, non-final: 0011 # Yes, non-final: 0011
return RETRY_EXTEND return RETRY_EXTEND
elif not is_match and is_final: elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!) # No, final 2000 (note: Don't include last token!)
return MATCH_REJECT return MATCH_REJECT
else: else:
# No, non-final 0010 # No, non-final 0010
return RETRY return RETRY
elif quantifier == ZERO_MINUS: elif quantifier == ZERO_MINUS:
if is_final or has_non_greedy_tail(state): if is_final or has_non_greedy_tail(state):
# Yes/No, final: 2000 (note: Don't include last token!) # Yes/No, final: 2000 (note: Don't include last token!)
return MATCH_REJECT return MATCH_REJECT
elif is_match: elif is_match:
# Yes, non-final: 0022 # Yes, non-final: 0022
# If there is a match, further extensions are skipped so that the behaviour is non-greedy # If there is a match, further extensions are skipped so that the behaviour is non-greedy
# pattern: b*?b string: b b # pattern: b*?b string: b b
# We do not extend on first b to exhibit non-greedy behaviour # We do not extend on first b to exhibit non-greedy behaviour
# such that "b" is matched but "b b" is not matched # such that "b" is matched but "b b" is not matched
return RETRY_OR_EXTEND return RETRY_OR_EXTEND
else: else:
# No, non-final 0010 # No, non-final 0010
return RETRY return RETRY
elif quantifier == ZERO_ONE: elif quantifier == ZERO_ONE:
if is_match and is_final: if is_match and is_final:
# Yes, final: 3000 # Yes, final: 3000
# To cater for a pattern ending in "?", we need to add # To cater for a pattern ending in "?", we need to add
# a match both with and without the last token # a match both with and without the last token
return MATCH_DOUBLE return MATCH_DOUBLE
elif is_match and not is_final: elif is_match and not is_final:
# Yes, non-final: 0110 # Yes, non-final: 0110
# We need both branches here, consider a pair like: # We need both branches here, consider a pair like:
# pattern: .?b string: b # pattern: .?b string: b
# If we 'ADVANCE' on the .?, we miss the match. # If we 'ADVANCE' on the .?, we miss the match.
return RETRY_ADVANCE return RETRY_ADVANCE
elif not is_match and is_final: elif not is_match and is_final:
# No, final 2000 (note: Don't include last token!) # No, final 2000 (note: Don't include last token!)
return MATCH_REJECT return MATCH_REJECT
else: else:
# No, non-final 0010 # No, non-final 0010
return RETRY return RETRY
cdef int8_t get_is_match(PatternStateC state, cdef int8_t get_is_match(PatternStateC state,
const TokenC* token, const attr_t* extra_attrs, const TokenC* token, const attr_t* extra_attrs,
@ -785,7 +786,7 @@ cdef action_t cast_to_non_greedy_action(action_t action, action_t next_action, v
- MATCH_DOUBLE adds 2 matches, one with the last token and one without the token, casting the action to MATCH - MATCH_DOUBLE adds 2 matches, one with the last token and one without the token, casting the action to MATCH
- removes the match without the last token which is the match that ends with a '*?' pattern token. - removes the match without the last token which is the match that ends with a '*?' pattern token.
- E.g. pattern = "a* b?" doc = "a b" - E.g. pattern = "a* b?" doc = "a b"
- MATCH_DOUBLE will add add the following 2 matches ['a' and 'a b'] - MATCH_DOUBLE will add the following 2 matches ['a' and 'a b']
- and casting MATCH_DOUBLE to MATCH removes 'a'. - and casting MATCH_DOUBLE to MATCH removes 'a'.
""" """
if action == RETRY_OR_EXTEND and next_action == MATCH: if action == RETRY_OR_EXTEND and next_action == MATCH: