switch to polyleven

(Python package)
This commit is contained in:
Kevin Humphreys 2022-08-29 21:42:10 +02:00
parent a8a4d86bae
commit 59021f7d25
3 changed files with 8 additions and 8 deletions

View File

@ -18,7 +18,7 @@ tqdm>=4.38.0,<5.0.0
pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0 pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
jinja2 jinja2
langcodes>=3.2.0,<4.0.0 langcodes>=3.2.0,<4.0.0
rapidfuzz>=2.4.0,<3.0.0 polyleven>=0.7,<1.0
# Official Python utilities # Official Python utilities
setuptools setuptools
packaging>=20.0 packaging>=20.0

View File

@ -58,7 +58,7 @@ install_requires =
requests>=2.13.0,<3.0.0 requests>=2.13.0,<3.0.0
pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0 pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
jinja2 jinja2
rapidfuzz>=2.4.0,<3.0.0 polyleven>=0.7,<1.0
# Official Python utilities # Official Python utilities
setuptools setuptools
packaging>=20.0 packaging>=20.0

View File

@ -10,7 +10,7 @@ from murmurhash.mrmr cimport hash64
import re import re
import srsly import srsly
import warnings import warnings
from rapidfuzz.distance import Levenshtein from polyleven import levenshtein
from ..typedefs cimport attr_t from ..typedefs cimport attr_t
from ..structs cimport TokenC from ..structs cimport TokenC
@ -846,7 +846,7 @@ class _FuzzyPredicate:
value = token._.get(self.attr) value = token._.get(self.attr)
else: else:
value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)] value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)]
return bool(Levenshtein.distance(self.value, value) <= self.distance) return bool(levenshtein(self.value, value) <= self.distance)
class _RegexPredicate: class _RegexPredicate:
@ -914,8 +914,8 @@ class _SetPredicate:
return True return True
elif self.distance: elif self.distance:
for v in self.value: for v in self.value:
if Levenshtein.distance(self.vocab.strings[value], if levenshtein(self.vocab.strings[value],
self.vocab.strings[v]) <= self.distance: self.vocab.strings[v]) <= self.distance:
return True return True
return False return False
elif self.predicate == "NOT_IN": elif self.predicate == "NOT_IN":
@ -923,8 +923,8 @@ class _SetPredicate:
return False return False
elif self.distance: elif self.distance:
for v in self.value: for v in self.value:
if Levenshtein.distance(self.vocab.strings[value], if levenshtein(self.vocab.strings[value],
self.vocab.strings[v]) <= self.distance: self.vocab.strings[v]) <= self.distance:
return False return False
return True return True
elif self.predicate == "IS_SUBSET": elif self.predicate == "IS_SUBSET":