From 4a677acf5d18abe0f5a99ff253aa9afc12431744 Mon Sep 17 00:00:00 2001 From: Kevin Humphreys Date: Thu, 15 Sep 2022 16:14:24 -0700 Subject: [PATCH] don't allow more edits than characters --- spacy/matcher/matcher.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 7d111ca7d..04ecfa546 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -850,7 +850,7 @@ class _FuzzyPredicate: if self.value == value: return True elif self.distance and token.is_oov and not token.is_space: - return bool(levenshtein(self.value, value) <= self.distance) + return bool(levenshtein(self.value, value) <= min(self.distance, len(token.text)-1)) return False @@ -927,7 +927,7 @@ class _SetPredicate: elif self.distance and token.is_oov and not token.is_space: for v in self.value: if levenshtein(self.vocab.strings[value], - self.vocab.strings[v]) <= self.distance: + self.vocab.strings[v]) <= min(self.distance, len(token.text)-1): return True return False elif self.predicate == "NOT_IN": @@ -936,7 +936,7 @@ class _SetPredicate: elif self.distance and token.is_oov and not token.is_space: for v in self.value: if levenshtein(self.vocab.strings[value], - self.vocab.strings[v]) <= self.distance: + self.vocab.strings[v]) <= min(self.distance, len(token.text)-1): return False return True elif self.predicate == "IS_SUBSET":