fix min distance

This commit is contained in:
Kevin Humphreys 2022-09-22 15:37:19 -07:00
parent 4a677acf5d
commit eab96f7c03

View File

@ -849,8 +849,8 @@ class _FuzzyPredicate:
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)] value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
if self.value == value: if self.value == value:
return True return True
elif self.distance and token.is_oov and not token.is_space: elif self.distance and token.is_oov:
return bool(levenshtein(self.value, value) <= min(self.distance, len(token.text)-1)) return bool(levenshtein(value, self.value) <= min(self.distance, min(len(value), len(self.value))-1))
return False return False
@ -924,19 +924,21 @@ class _SetPredicate:
if self.predicate == "IN": if self.predicate == "IN":
if value in self.value: if value in self.value:
return True return True
elif self.distance and token.is_oov and not token.is_space: elif self.distance and token.is_oov:
s1 = self.vocab.strings[value]
for v in self.value: for v in self.value:
if levenshtein(self.vocab.strings[value], s2 = self.vocab.strings[v]
self.vocab.strings[v]) <= min(self.distance, len(token.text)-1): if levenshtein(s1, s2) <= min(self.distance, min(len(s1), len(s2))-1):
return True return True
return False return False
elif self.predicate == "NOT_IN": elif self.predicate == "NOT_IN":
if value in self.value: if value in self.value:
return False return False
elif self.distance and token.is_oov and not token.is_space: elif self.distance and token.is_oov:
s1 = self.vocab.strings[value]
for v in self.value: for v in self.value:
if levenshtein(self.vocab.strings[value], s2 = self.vocab.strings[v]
self.vocab.strings[v]) <= min(self.distance, len(token.text)-1): if levenshtein(s1, s2) <= min(self.distance, min(len(s1), len(s2))-1):
return False return False
return True return True
elif self.predicate == "IS_SUBSET": elif self.predicate == "IS_SUBSET":