mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Increase minimum to 2 in fuzzy_compare to allow one transposition
This commit is contained in:
parent
7a27fa7ac8
commit
8a749fccbc
|
@ -7,7 +7,6 @@ from libc.string cimport memset, memcmp
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from murmurhash.mrmr cimport hash64
|
from murmurhash.mrmr cimport hash64
|
||||||
|
|
||||||
from math import ceil
|
|
||||||
import re
|
import re
|
||||||
import srsly
|
import srsly
|
||||||
import warnings
|
import warnings
|
||||||
|
@ -37,8 +36,9 @@ cpdef bint fuzzy_compare(input_text: str, pattern_text: str, fuzzy: int = -1):
|
||||||
if fuzzy >= 0:
|
if fuzzy >= 0:
|
||||||
max_edits = fuzzy
|
max_edits = fuzzy
|
||||||
else:
|
else:
|
||||||
# allow at least one edit and up to 20% of the pattern string length
|
# allow at least two edits (to allow at least one transposition) and up
|
||||||
max_edits = ceil(0.2 * len(pattern_text))
|
# to 20% of the pattern string length
|
||||||
|
max_edits = max(2, int(0.2 * len(pattern_text)))
|
||||||
return levenshtein(input_text, pattern_text, max_edits) <= max_edits
|
return levenshtein(input_text, pattern_text, max_edits) <= max_edits
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user