diff --git a/requirements.txt b/requirements.txt index 38b4cbf0d..070ffe7a4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ tqdm>=4.38.0,<5.0.0 pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0 jinja2 langcodes>=3.2.0,<4.0.0 -rapidfuzz>=2.4.0,<3.0.0 +polyleven>=0.7,<1.0 # Official Python utilities setuptools packaging>=20.0 diff --git a/setup.cfg b/setup.cfg index a149b1f7e..de58de3bc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -58,7 +58,7 @@ install_requires = requests>=2.13.0,<3.0.0 pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0 jinja2 - rapidfuzz>=2.4.0,<3.0.0 + polyleven>=0.7,<1.0 # Official Python utilities setuptools packaging>=20.0 diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 56fd11365..d27397f8b 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -10,7 +10,7 @@ from murmurhash.mrmr cimport hash64 import re import srsly import warnings -from rapidfuzz.distance import Levenshtein +from polyleven import levenshtein from ..typedefs cimport attr_t from ..structs cimport TokenC @@ -846,7 +846,7 @@ class _FuzzyPredicate: value = token._.get(self.attr) else: value = token.vocab.strings[get_token_attr_for_matcher(token.c, self.attr)] - return bool(Levenshtein.distance(self.value, value) <= self.distance) + return bool(levenshtein(self.value, value) <= self.distance) class _RegexPredicate: @@ -914,8 +914,8 @@ class _SetPredicate: return True elif self.distance: for v in self.value: - if Levenshtein.distance(self.vocab.strings[value], - self.vocab.strings[v]) <= self.distance: + if levenshtein(self.vocab.strings[value], + self.vocab.strings[v]) <= self.distance: return True return False elif self.predicate == "NOT_IN": @@ -923,8 +923,8 @@ class _SetPredicate: return False elif self.distance: for v in self.value: - if Levenshtein.distance(self.vocab.strings[value], - self.vocab.strings[v]) <= self.distance: + if levenshtein(self.vocab.strings[value], + self.vocab.strings[v]) <= self.distance: return False return True elif self.predicate == "IS_SUBSET":