Add types for fuzzy_match

- Refactor variable names
- Add test for symmetrical behavior
This commit is contained in:
Adriane Boyd 2022-11-14 08:37:39 +01:00
parent c81330d02f
commit ae0bb75d70
3 changed files with 6 additions and 3 deletions

View File

@ -52,3 +52,5 @@ class Matcher:
with_alignments: bool = ...
) -> List[Span]: ...
def _normalize_key(self, key: Any) -> Any: ...
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool: ...

View File

@ -1147,11 +1147,11 @@ def _get_extensions(spec, string_store, name2index):
return attr_values
def fuzzy_match(input_string: str, rule_string: str, fuzzy: int=-1) -> bool:
distance = min(len(input_string), len(rule_string))
def fuzzy_match(s1: str, s2: str, fuzzy: int=-1) -> bool:
distance = min(len(s1), len(s2))
distance -= 1 # don't allow completely different tokens
if fuzzy == -1: # FUZZY operator with unspecified fuzzy
fuzzy = 5 # default max fuzzy
distance -= 1 # be more restrictive
distance = min(fuzzy, distance if distance > 0 else 1)
return levenshtein(input_string, rule_string, distance) <= distance
return levenshtein(s1, s2, distance) <= distance

View File

@ -71,3 +71,4 @@ def test_levenshtein(dist, a, b):
)
def test_fuzzy_match(a, b, fuzzy, expected):
assert fuzzy_match(a, b, fuzzy) == expected
assert fuzzy_match(b, a, fuzzy) == expected