remove is_oov check

2025-09-15 16:42:36 +03:00 · 2022-10-14 15:51:20 -07:00 · 2022-10-14 15:51:20 -07:00 · 9c83b804f1
commit 9c83b804f1
parent bf4b353ce5
1 changed files with 11 additions and 12 deletions
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -206,15 +206,14 @@ cdef class Matcher:
                    yield doc

    @staticmethod
-    def fuzzy_match(s1: str, s2: str, distance: int, token: Token) -> bool:
-        if token.is_oov: # (TODO: param?)
-            threshold = min(len(s1), len(s2)) - 1 # max edit distance
-            if distance: # FUZZYn operators
-                threshold = min(distance, threshold)
-            else: # FUZZY operator
-                threshold = min(5, threshold - 1) # default fuzziness (TODO: param?)
-            if threshold > 0:
-                return levenshtein(s1, s2) <= threshold
+    def fuzzy_match(s1: str, s2: str, distance: int) -> bool:
+        min_length = min(len(s1), len(s2))
+        if distance: # FUZZYn operators with explicit distance
+            threshold = min(distance, min_length - 1)
+        else: # FUZZY operator with default distance
+            threshold = min(5, min_length - 2)
+        if threshold > 0:
+            return levenshtein(s1, s2) <= threshold
        return False

    def __call__(self, object doclike, *, as_spans=False, allow_missing=False, with_alignments=False):
@ -863,7 +862,7 @@ class _FuzzyPredicate:
            value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
        if self.value == value:
            return True
-        return Matcher.fuzzy_match(value, self.value, self.fuzzy, token)
+        return Matcher.fuzzy_match(value, self.value, self.fuzzy)


 class _RegexPredicate:
@ -946,7 +945,7 @@ class _SetPredicate:
                return True
            elif self.fuzzy is not None:
                value = self.vocab.strings[value]
-                return any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy, token)
+                return any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
                           for v in self.value)
            else:
                return False
@ -958,7 +957,7 @@ class _SetPredicate:
                return False
            elif self.fuzzy is not None:
                value = self.vocab.strings[value]
-                return not any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy, token)
+                return not any(Matcher.fuzzy_match(value, self.vocab.strings[v], self.fuzzy)
                               for v in self.value)
            else:
                return True