mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
Add types for fuzzy_match
- Refactor variable names - Add test for symmetrical behavior
This commit is contained in:
parent
c81330d02f
commit
ae0bb75d70
|
@ -52,3 +52,5 @@ class Matcher:
|
||||||
with_alignments: bool = ...
|
with_alignments: bool = ...
|
||||||
) -> List[Span]: ...
|
) -> List[Span]: ...
|
||||||
def _normalize_key(self, key: Any) -> Any: ...
|
def _normalize_key(self, key: Any) -> Any: ...
|
||||||
|
|
||||||
|
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool: ...
|
||||||
|
|
|
@ -1147,11 +1147,11 @@ def _get_extensions(spec, string_store, name2index):
|
||||||
return attr_values
|
return attr_values
|
||||||
|
|
||||||
|
|
||||||
def fuzzy_match(input_string: str, rule_string: str, fuzzy: int=-1) -> bool:
|
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool:
|
||||||
distance = min(len(input_string), len(rule_string))
|
distance = min(len(s1), len(s2))
|
||||||
distance -= 1 # don't allow completely different tokens
|
distance -= 1 # don't allow completely different tokens
|
||||||
if fuzzy == -1: # FUZZY operator with unspecified fuzzy
|
if fuzzy == -1: # FUZZY operator with unspecified fuzzy
|
||||||
fuzzy = 5 # default max fuzzy
|
fuzzy = 5 # default max fuzzy
|
||||||
distance -= 1 # be more restrictive
|
distance -= 1 # be more restrictive
|
||||||
distance = min(fuzzy, distance if distance > 0 else 1)
|
distance = min(fuzzy, distance if distance > 0 else 1)
|
||||||
return levenshtein(input_string, rule_string, distance) <= distance
|
return levenshtein(s1, s2, distance) <= distance
|
||||||
|
|
|
@ -71,3 +71,4 @@ def test_levenshtein(dist, a, b):
|
||||||
)
|
)
|
||||||
def test_fuzzy_match(a, b, fuzzy, expected):
|
def test_fuzzy_match(a, b, fuzzy, expected):
|
||||||
assert fuzzy_match(a, b, fuzzy) == expected
|
assert fuzzy_match(a, b, fuzzy) == expected
|
||||||
|
assert fuzzy_match(b, a, fuzzy) == expected
|
||||||
|
|
Loading…
Reference in New Issue
Block a user