mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
Add types for fuzzy_match
- Refactor variable names - Add test for symmetrical behavior
This commit is contained in:
parent
c81330d02f
commit
ae0bb75d70
|
@ -52,3 +52,5 @@ class Matcher:
|
|||
with_alignments: bool = ...
|
||||
) -> List[Span]: ...
|
||||
def _normalize_key(self, key: Any) -> Any: ...
|
||||
|
||||
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool: ...
|
||||
|
|
|
@ -1147,11 +1147,11 @@ def _get_extensions(spec, string_store, name2index):
|
|||
return attr_values
|
||||
|
||||
|
||||
def fuzzy_match(input_string: str, rule_string: str, fuzzy: int=-1) -> bool:
|
||||
distance = min(len(input_string), len(rule_string))
|
||||
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool:
|
||||
distance = min(len(s1), len(s2))
|
||||
distance -= 1 # don't allow completely different tokens
|
||||
if fuzzy == -1: # FUZZY operator with unspecified fuzzy
|
||||
fuzzy = 5 # default max fuzzy
|
||||
distance -= 1 # be more restrictive
|
||||
distance = min(fuzzy, distance if distance > 0 else 1)
|
||||
return levenshtein(input_string, rule_string, distance) <= distance
|
||||
return levenshtein(s1, s2, distance) <= distance
|
||||
|
|
|
@ -71,3 +71,4 @@ def test_levenshtein(dist, a, b):
|
|||
)
|
||||
def test_fuzzy_match(a, b, fuzzy, expected):
|
||||
assert fuzzy_match(a, b, fuzzy) == expected
|
||||
assert fuzzy_match(b, a, fuzzy) == expected
|
||||
|
|
Loading…
Reference in New Issue
Block a user