mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-07 13:44:55 +03:00
Minor formatting
This commit is contained in:
parent
ed889db5ee
commit
102fb8a8a1
|
@ -438,7 +438,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
|
||||||
# 'MatchAlignmentC' maps 'original token index of current pattern' to 'current matching length'
|
# 'MatchAlignmentC' maps 'original token index of current pattern' to 'current matching length'
|
||||||
if with_alignments != 0:
|
if with_alignments != 0:
|
||||||
align_states[q].push_back(MatchAlignmentC(states[q].pattern.token_idx, states[q].length))
|
align_states[q].push_back(MatchAlignmentC(states[q].pattern.token_idx, states[q].length))
|
||||||
if action in [RETRY_EXTEND, RETRY_OR_EXTEND]:
|
if action in (RETRY_EXTEND, RETRY_OR_EXTEND):
|
||||||
# This handles the 'extend'
|
# This handles the 'extend'
|
||||||
new_states.push_back(
|
new_states.push_back(
|
||||||
PatternStateC(pattern=states[q].pattern, start=state.start,
|
PatternStateC(pattern=states[q].pattern, start=state.start,
|
||||||
|
@ -511,7 +511,7 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
|
||||||
elif action == MATCH_EXTEND:
|
elif action == MATCH_EXTEND:
|
||||||
matches.push_back(
|
matches.push_back(
|
||||||
MatchC(pattern_id=ent_id, start=state.start,
|
MatchC(pattern_id=ent_id, start=state.start,
|
||||||
length=state.length))
|
length=state.length))
|
||||||
# `align_matches` always corresponds to `matches` 1:1
|
# `align_matches` always corresponds to `matches` 1:1
|
||||||
if with_alignments != 0:
|
if with_alignments != 0:
|
||||||
align_matches.push_back(align_states[q])
|
align_matches.push_back(align_states[q])
|
||||||
|
@ -669,72 +669,73 @@ cdef action_t get_action(PatternStateC state,
|
||||||
is_match = not is_match
|
is_match = not is_match
|
||||||
quantifier = ONE
|
quantifier = ONE
|
||||||
if quantifier == ONE:
|
if quantifier == ONE:
|
||||||
if is_match and is_final:
|
if is_match and is_final:
|
||||||
# Yes, final: 1000
|
# Yes, final: 1000
|
||||||
return MATCH
|
return MATCH
|
||||||
elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final:
|
elif is_non_greedy_plus(state) and has_star_tail(state) and is_match and not is_final:
|
||||||
# Yes, non-final: 1100
|
# Yes, non-final: 1100
|
||||||
# Modification for +?:
|
# Modification for +?:
|
||||||
# Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT
|
# Having MATCH_ADVANCE handles the match at the 'ONE' part of the token instead of relying on MATCH_REJECT
|
||||||
# and other actions from other tokens to produce a match.
|
# and other actions from other tokens to produce a match.
|
||||||
# is_non_greedy_plus() verifies that the current state's pattern is +?
|
# is_non_greedy_plus() verifies that the current state's pattern is +?
|
||||||
# has_star_tail() verifies the remaining pattern tokens are either * or *?,
|
# has_star_tail() verifies the remaining pattern tokens are either * or *?,
|
||||||
# so that it is valid for the current match to exist.
|
# so that it is valid for the current match to exist.
|
||||||
# TODO if this impacts the performance, "ONE_MINUS" could be created
|
# TODO if this impacts the performance, "ONE_MINUS" could be created
|
||||||
return MATCH_ADVANCE
|
return MATCH_ADVANCE
|
||||||
elif is_match and not is_final:
|
elif is_match and not is_final:
|
||||||
# Yes, non-final: 0100
|
# Yes, non-final: 0100
|
||||||
return ADVANCE
|
return ADVANCE
|
||||||
elif not is_match and is_final:
|
elif not is_match and is_final:
|
||||||
# No, final: 0000
|
# No, final: 0000
|
||||||
return REJECT
|
return REJECT
|
||||||
else:
|
else:
|
||||||
return REJECT
|
return REJECT
|
||||||
elif quantifier == ZERO_PLUS:
|
elif quantifier == ZERO_PLUS:
|
||||||
if is_match and is_final:
|
if is_match and is_final:
|
||||||
# Yes, final: 1001
|
# Yes, final: 1001
|
||||||
return MATCH_EXTEND
|
return MATCH_EXTEND
|
||||||
elif is_match and not is_final:
|
elif is_match and not is_final:
|
||||||
# Yes, non-final: 0011
|
# Yes, non-final: 0011
|
||||||
return RETRY_EXTEND
|
return RETRY_EXTEND
|
||||||
elif not is_match and is_final:
|
elif not is_match and is_final:
|
||||||
# No, final 2000 (note: Don't include last token!)
|
# No, final 2000 (note: Don't include last token!)
|
||||||
return MATCH_REJECT
|
return MATCH_REJECT
|
||||||
else:
|
else:
|
||||||
# No, non-final 0010
|
# No, non-final 0010
|
||||||
return RETRY
|
return RETRY
|
||||||
elif quantifier == ZERO_MINUS:
|
elif quantifier == ZERO_MINUS:
|
||||||
if is_final or has_non_greedy_tail(state):
|
if is_final or has_non_greedy_tail(state):
|
||||||
# Yes/No, final: 2000 (note: Don't include last token!)
|
# Yes/No, final: 2000 (note: Don't include last token!)
|
||||||
return MATCH_REJECT
|
return MATCH_REJECT
|
||||||
elif is_match:
|
elif is_match:
|
||||||
# Yes, non-final: 0022
|
# Yes, non-final: 0022
|
||||||
# If there is a match, further extensions are skipped so that the behaviour is non-greedy
|
# If there is a match, further extensions are skipped so that the behaviour is non-greedy
|
||||||
# pattern: b*?b string: b b
|
# pattern: b*?b string: b b
|
||||||
# We do not extend on first b to exhibit non-greedy behaviour
|
# We do not extend on first b to exhibit non-greedy behaviour
|
||||||
# such that "b" is matched but "b b" is not matched
|
# such that "b" is matched but "b b" is not matched
|
||||||
return RETRY_OR_EXTEND
|
return RETRY_OR_EXTEND
|
||||||
else:
|
else:
|
||||||
# No, non-final 0010
|
# No, non-final 0010
|
||||||
return RETRY
|
return RETRY
|
||||||
elif quantifier == ZERO_ONE:
|
elif quantifier == ZERO_ONE:
|
||||||
if is_match and is_final:
|
if is_match and is_final:
|
||||||
# Yes, final: 3000
|
# Yes, final: 3000
|
||||||
# To cater for a pattern ending in "?", we need to add
|
# To cater for a pattern ending in "?", we need to add
|
||||||
# a match both with and without the last token
|
# a match both with and without the last token
|
||||||
return MATCH_DOUBLE
|
return MATCH_DOUBLE
|
||||||
elif is_match and not is_final:
|
elif is_match and not is_final:
|
||||||
# Yes, non-final: 0110
|
# Yes, non-final: 0110
|
||||||
# We need both branches here, consider a pair like:
|
# We need both branches here, consider a pair like:
|
||||||
# pattern: .?b string: b
|
# pattern: .?b string: b
|
||||||
# If we 'ADVANCE' on the .?, we miss the match.
|
# If we 'ADVANCE' on the .?, we miss the match.
|
||||||
return RETRY_ADVANCE
|
return RETRY_ADVANCE
|
||||||
elif not is_match and is_final:
|
elif not is_match and is_final:
|
||||||
# No, final 2000 (note: Don't include last token!)
|
# No, final 2000 (note: Don't include last token!)
|
||||||
return MATCH_REJECT
|
return MATCH_REJECT
|
||||||
else:
|
else:
|
||||||
# No, non-final 0010
|
# No, non-final 0010
|
||||||
return RETRY
|
return RETRY
|
||||||
|
|
||||||
|
|
||||||
cdef int8_t get_is_match(PatternStateC state,
|
cdef int8_t get_is_match(PatternStateC state,
|
||||||
const TokenC* token, const attr_t* extra_attrs,
|
const TokenC* token, const attr_t* extra_attrs,
|
||||||
|
@ -785,7 +786,7 @@ cdef action_t cast_to_non_greedy_action(action_t action, action_t next_action, v
|
||||||
- MATCH_DOUBLE adds 2 matches, one with the last token and one without the token, casting the action to MATCH
|
- MATCH_DOUBLE adds 2 matches, one with the last token and one without the token, casting the action to MATCH
|
||||||
- removes the match without the last token which is the match that ends with a '*?' pattern token.
|
- removes the match without the last token which is the match that ends with a '*?' pattern token.
|
||||||
- E.g. pattern = "a* b?" doc = "a b"
|
- E.g. pattern = "a* b?" doc = "a b"
|
||||||
- MATCH_DOUBLE will add add the following 2 matches ['a' and 'a b']
|
- MATCH_DOUBLE will add the following 2 matches ['a' and 'a b']
|
||||||
- and casting MATCH_DOUBLE to MATCH removes 'a'.
|
- and casting MATCH_DOUBLE to MATCH removes 'a'.
|
||||||
"""
|
"""
|
||||||
if action == RETRY_OR_EXTEND and next_action == MATCH:
|
if action == RETRY_OR_EXTEND and next_action == MATCH:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user