Move fuzzy_match to a standalone method

2025-09-15 08:32:37 +03:00 · 2022-11-11 13:58:05 +01:00 · 2022-11-11 13:58:05 +01:00 · 6ae4c99365
commit 6ae4c99365
parent 7e25c7f10f
2 changed files with 15 additions and 16 deletions
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -205,17 +205,6 @@ cdef class Matcher:
                else:
                    yield doc

-    @staticmethod
-    def fuzzy_match(input_string: str, rule_string: str, fuzzy: int=-1) -> bool:
-        distance = min(len(input_string), len(rule_string))
-        distance -= 1 # don't allow completely different tokens
-        if fuzzy == -1: # FUZZY operator with unspecified fuzzy
-            fuzzy = 5 # default max fuzzy
-            distance -= 1 # be more restrictive
-        distance = min(fuzzy, distance if distance > 0 else 1)
-        return levenshtein(input_string, rule_string, distance) <= distance
-
-
    def __call__(self, object doclike, *, as_spans=False, allow_missing=False, with_alignments=False):
        """Find all token sequences matching the supplied pattern.

@ -862,7 +851,7 @@ class _FuzzyPredicate:
            value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
        if self.value == value:
            return True
-        return Matcher.fuzzy_match(value, self.value, self.fuzzy)
+        return fuzzy_match(value, self.value, self.fuzzy)


 class _RegexPredicate:
@ -945,7 +934,7 @@ class _SetPredicate:
                return True
            elif self.fuzzy is not None:
                value = self.vocab.strings[value]
-                return any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
+                return any(fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
                           for v in self.value)
            else:
                return False
@ -957,7 +946,7 @@ class _SetPredicate:
                return False
            elif self.fuzzy is not None:
                value = self.vocab.strings[value]
-                return not any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
+                return not any(fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
                               for v in self.value)
            else:
                return True
@ -1156,3 +1145,13 @@ def _get_extensions(spec, string_store, name2index):
            name2index[name] = len(name2index)
        attr_values.append((name2index[name], value))
    return attr_values
+
+
+def fuzzy_match(input_string: str, rule_string: str, fuzzy: int=-1) -> bool:
+    distance = min(len(input_string), len(rule_string))
+    distance -= 1 # don't allow completely different tokens
+    if fuzzy == -1: # FUZZY operator with unspecified fuzzy
+        fuzzy = 5 # default max fuzzy
+        distance -= 1 # be more restrictive
+    distance = min(fuzzy, distance if distance > 0 else 1)
+    return levenshtein(input_string, rule_string, distance) <= distance
--- a/spacy/tests/matcher/test_levenshtein.py
+++ b/spacy/tests/matcher/test_levenshtein.py
@ -1,6 +1,6 @@
 import pytest
 from spacy.matcher import levenshtein
-from spacy.matcher import Matcher
+from spacy.matcher.matcher import fuzzy_match


 # empty string plus 10 random ASCII, 10 random unicode, and 2 random long tests
@ -70,4 +70,4 @@ def test_levenshtein(dist, a, b):
    ],
 )
 def test_fuzzy_match(a, b, fuzzy, expected):
-    assert Matcher.fuzzy_match(a, b, fuzzy) == expected
+    assert fuzzy_match(a, b, fuzzy) == expected