Removal of formatting changes

2025-08-07 21:54:54 +03:00 · 2022-07-05 16:54:10 +08:00 · 2022-07-05 16:54:10 +08:00 · 1e1acf640a
commit 1e1acf640a
parent 57ec153587
1 changed files with 71 additions and 49 deletions
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -25,8 +25,10 @@ from ..errors import Errors, MatchPatternError, Warnings
 from ..strings import get_string_id
 from ..attrs import IDS

+
 DEF PADDING = 5

+
 cdef class Matcher:
    """Match sequences of tokens, based on pattern rules.

@ -253,8 +255,7 @@ cdef class Matcher:
            matches = []
        else:
            matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
-                                   extensions=self._extensions, predicates=self._extra_predicates,
-                                   with_alignments=with_alignments)
+                                    extensions=self._extensions, predicates=self._extra_predicates, with_alignments=with_alignments)
        final_matches = []
        pairs_by_id = {}
        # For each key, either add all matches, or only the filtered,
@ -326,6 +327,7 @@ cdef class Matcher:
        else:
            return key

+
 def unpickle_matcher(vocab, patterns, callbacks):
    matcher = Matcher(vocab)
    for key, pattern in patterns.items():
@ -333,8 +335,7 @@ def unpickle_matcher(vocab, patterns, callbacks):
        matcher.add(key, pattern, on_match=callback)
    return matcher

-cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(),
-                  bint with_alignments=0):
+cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(), bint with_alignments=0):
    """Find matches in a doc, with a compiled array of patterns. Matches are
    returned as a list of (id, start, end) tuples or (id, start, end, alignments) tuples (if with_alignments != 0)

@ -378,8 +379,8 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
            states.push_back(PatternStateC(patterns[j], i, 0))
        if with_alignments != 0:
            align_states.resize(states.size())
-        transition_states(states, matches, align_states, align_matches, predicate_cache, doclike[i], extra_attr_values,
-                          predicates, with_alignments)
+        transition_states(states, matches, align_states, align_matches, predicate_cache,
+                          doclike[i], extra_attr_values, predicates, with_alignments)
        extra_attr_values += nr_extra_attr
        predicate_cache += len(predicates)
    # Handle matches that end in 0-width patterns
@ -404,19 +405,21 @@ cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, e
            seen.add(match)
    return output

+
 cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& matches,
-                            vector[vector[MatchAlignmentC]]& align_states,
-                            vector[vector[MatchAlignmentC]]& align_matches,
-                            int8_t * cached_py_predicates, Token token,
-                            const attr_t * extra_attrs, py_predicates, bint with_alignments) except *:
+                            vector[vector[MatchAlignmentC]]& align_states, vector[vector[MatchAlignmentC]]& align_matches,
+                            int8_t * cached_py_predicates,
+        Token token, const attr_t * extra_attrs, py_predicates, bint with_alignments) except *:
    cdef int q = 0
    cdef vector[PatternStateC] new_states
    cdef vector[vector[MatchAlignmentC]] align_new_states
    cdef int nr_predicate = len(py_predicates)
    for i in range(states.size()):
        if states[i].pattern.nr_py >= 1:
-            update_predicate_cache(cached_py_predicates, states[i].pattern, token, py_predicates)
-        action = get_action(states[i], token.c, extra_attrs, cached_py_predicates)
+            update_predicate_cache(cached_py_predicates,
+                states[i].pattern, token, py_predicates)
+        action = get_action(states[i], token.c, extra_attrs,
+                            cached_py_predicates)
        if action == REJECT:
            continue
        # Keep only a subset of states (the active ones). Index q is the
@ -437,19 +440,23 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
            if action in [RETRY_EXTEND, RETRY_OR_EXTEND]:
                # This handles the 'extend'
                new_states.push_back(
-                    PatternStateC(pattern=states[q].pattern, start=state.start, length=state.length + 1))
+                    PatternStateC(pattern=states[q].pattern, start=state.start,
+                                  length=state.length + 1))
                if with_alignments != 0:
                    align_new_states.push_back(align_states[q])
            if action == RETRY_ADVANCE:
                # This handles the 'advance'
                new_states.push_back(
-                    PatternStateC(pattern=states[q].pattern + 1, start=state.start, length=state.length + 1))
+                    PatternStateC(pattern=states[q].pattern + 1, start=state.start,
+                                  length=state.length + 1))
                if with_alignments != 0:
                    align_new_states.push_back(align_states[q])
            states[q].pattern += 1
            if states[q].pattern.nr_py != 0:
-                update_predicate_cache(cached_py_predicates, states[q].pattern, token, py_predicates)
-            next_action = get_action(states[q], token.c, extra_attrs, cached_py_predicates)
+                update_predicate_cache(cached_py_predicates,
+                    states[q].pattern, token, py_predicates)
+            next_action = get_action(states[q], token.c, extra_attrs,
+                                     cached_py_predicates)
            # To account for *? and +?
            if get_quantifier(state) == ZERO_MINUS:
                next_action = cast_to_non_greedy_action(action, next_action, new_states, align_new_states,
@ -470,37 +477,49 @@ cdef void transition_states(vector[PatternStateC]& states, vector[MatchC]& match
        else:
            ent_id = get_ent_id(state.pattern)
            if action == MATCH:
-                matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length + 1))
+                matches.push_back(
+                    MatchC(pattern_id=ent_id, start=state.start,
+                            length=state.length + 1))
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
            elif action == MATCH_DOUBLE:
                # push match without last token if length > 0
                if state.length > 0:
-                    matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length))
+                    matches.push_back(
+                        MatchC(pattern_id=ent_id, start=state.start,
+                                length=state.length))
                    # MATCH_DOUBLE emits matches twice,
                    # add one more to align_matches in order to keep 1:1 relationship
                    if with_alignments != 0:
                        align_matches.push_back(align_states[q])
                # push match with last token
-                matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length + 1))
+                matches.push_back(
+                    MatchC(pattern_id=ent_id, start=state.start,
+                            length=state.length + 1))
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
            elif action == MATCH_REJECT:
-                matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length))
+                matches.push_back(
+                    MatchC(pattern_id=ent_id, start=state.start,
+                            length=state.length))
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
            elif action == MATCH_EXTEND:
-                matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length))
+                matches.push_back(
+                    MatchC(pattern_id=ent_id, start=state.start,
+                            length=state.length))
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
                states[q].length += 1
                q += 1
            elif action == MATCH_ADVANCE:
-                matches.push_back(MatchC(pattern_id=ent_id, start=state.start, length=state.length + 1))
+                matches.push_back(
+                    MatchC(pattern_id=ent_id, start=state.start,
+                            length=state.length + 1))
                # `align_matches` always corresponds to `matches` 1:1
                if with_alignments != 0:
                    align_matches.push_back(align_states[q])
@ -535,6 +554,7 @@ cdef int update_predicate_cache(int8_t * cache,
            else:
                raise ValueError(Errors.E125.format(value=result))

+
 cdef void finish_states(vector[MatchC]& matches, vector[PatternStateC]& states,
                        vector[vector[MatchAlignmentC]]& align_matches,
                        vector[vector[MatchAlignmentC]]& align_states,
@ -565,7 +585,8 @@ cdef void finish_states(vector[MatchC]& matches, vector[PatternStateC]& states,
            else:
                state.pattern += 1

-cdef action_t get_action(PatternStateC state, const TokenC * token, const attr_t * extra_attrs,
+cdef action_t get_action(PatternStateC state,
+        const TokenC* token, const attr_t* extra_attrs,
        const int8_t* predicate_matches) nogil:
    """We need to consider:
    a) Does the token match the specification? [Yes, No]
@ -656,6 +677,7 @@ cdef action_t get_action(PatternStateC state, const TokenC * token, const attr_t
            # is_non_greedy_plus() verifies that the current state's pattern is +?
            # has_star_tail() verifies the remaining pattern tokens are either * or *?,
            # so that it is valid for the current match to exist.
+            # TODO if this impacts the performance, "ONE_MINUS" could be created
            return MATCH_ADVANCE
        elif is_match and not is_final:
            # Yes, non-final: 0100