Increase minimum to 2 in fuzzy_compare to allow one transposition

This commit is contained in:
Adriane Boyd 2022-11-30 08:46:31 +01:00
parent 7a27fa7ac8
commit 8a749fccbc

View File

@ -7,7 +7,6 @@ from libc.string cimport memset, memcmp
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from murmurhash.mrmr cimport hash64 from murmurhash.mrmr cimport hash64
from math import ceil
import re import re
import srsly import srsly
import warnings import warnings
@ -37,8 +36,9 @@ cpdef bint fuzzy_compare(input_text: str, pattern_text: str, fuzzy: int = -1):
if fuzzy >= 0: if fuzzy >= 0:
max_edits = fuzzy max_edits = fuzzy
else: else:
# allow at least one edit and up to 20% of the pattern string length # allow at least two edits (to allow at least one transposition) and up
max_edits = ceil(0.2 * len(pattern_text)) # to 20% of the pattern string length
max_edits = max(2, int(0.2 * len(pattern_text)))
return levenshtein(input_text, pattern_text, max_edits) <= max_edits return levenshtein(input_text, pattern_text, max_edits) <= max_edits