mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-06 05:10:21 +03:00
fuzzy match only on oov tokens
This commit is contained in:
parent
a6d26a0195
commit
b7599dfb2f
|
@ -846,7 +846,11 @@ class _FuzzyPredicate:
|
||||||
value = token._.get(self.attr)
|
value = token._.get(self.attr)
|
||||||
else:
|
else:
|
||||||
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
|
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
|
||||||
return bool(levenshtein(self.value, value) <= self.distance)
|
if self.value == value:
|
||||||
|
return True
|
||||||
|
elif self.distance and token.is_oov:
|
||||||
|
return bool(levenshtein(self.value, value) <= self.distance)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class _RegexPredicate:
|
class _RegexPredicate:
|
||||||
|
@ -912,7 +916,7 @@ class _SetPredicate:
|
||||||
if self.predicate == "IN":
|
if self.predicate == "IN":
|
||||||
if value in self.value:
|
if value in self.value:
|
||||||
return True
|
return True
|
||||||
elif self.distance:
|
elif self.distance and token.is_oov:
|
||||||
for v in self.value:
|
for v in self.value:
|
||||||
if levenshtein(self.vocab.strings[value],
|
if levenshtein(self.vocab.strings[value],
|
||||||
self.vocab.strings[v]) <= self.distance:
|
self.vocab.strings[v]) <= self.distance:
|
||||||
|
@ -921,7 +925,7 @@ class _SetPredicate:
|
||||||
elif self.predicate == "NOT_IN":
|
elif self.predicate == "NOT_IN":
|
||||||
if value in self.value:
|
if value in self.value:
|
||||||
return False
|
return False
|
||||||
elif self.distance:
|
elif self.distance and token.is_oov:
|
||||||
for v in self.value:
|
for v in self.value:
|
||||||
if levenshtein(self.vocab.strings[value],
|
if levenshtein(self.vocab.strings[value],
|
||||||
self.vocab.strings[v]) <= self.distance:
|
self.vocab.strings[v]) <= self.distance:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user