diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 9316d3e7c..bea7e67cd 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -38,7 +38,7 @@ cpdef bint fuzzy_compare(input_text: str, pattern_text: str, fuzzy: int = -1): else: # allow at least two edits (to allow at least one transposition) and up # to 20% of the pattern string length - max_edits = max(2, int(0.2 * len(pattern_text))) + max_edits = max(2, round(0.3 * len(pattern_text))) return levenshtein(input_text, pattern_text, max_edits) <= max_edits diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index 87b0f32e3..f04b06a34 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -145,8 +145,8 @@ def test_matcher_match_multi(matcher): # caching don't collide) ( { - "A": [[{"ORTH": {"FUZZY": "Javascript"}}]], - "B": [[{"ORTH": {"FUZZY5": "Javascript"}}]], + "A": [[{"ORTH": {"FUZZY": "Javascripts"}}]], + "B": [[{"ORTH": {"FUZZY5": "Javascripts"}}]], }, [(8, 9)], ), diff --git a/website/docs/api/matcher.md b/website/docs/api/matcher.md index 122fab243..bd5f6ac24 100644 --- a/website/docs/api/matcher.md +++ b/website/docs/api/matcher.md @@ -89,7 +89,7 @@ it compares to another value. | Attribute | Description | | -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `REGEX` | Attribute value matches the regular expression at any position in the string. ~~Any~~ | -| `FUZZY` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, -1)`. The default method allows a Levenshtein edit distance of at least 2 and up to 20% of the pattern string length. ~~Any~~ | +| `FUZZY` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, -1)`. The default method allows a Levenshtein edit distance of at least 2 and up to 30% of the pattern string length. ~~Any~~ | | `FUZZY1`, `FUZZY2`, ... `FUZZY9` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, N)`. The default method allows a Levenshtein edit distance of at most N (1-9). ~~Any~~ | | `IN` | Attribute value is member of a list. ~~Any~~ | | `NOT_IN` | Attribute value is _not_ member of a list. ~~Any~~ | diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index ce1f3672d..3e15fca36 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -378,7 +378,7 @@ pattern = [{"TEXT": {"FUZZY": "favorite"}}, The `FUZZY` attribute allows fuzzy matches for any attribute string value, including custom attributes. Just like `REGEX`, it always needs to be applied to an attribute like `TEXT` or `LOWER`. By default `FUZZY` allows a Levenshtein -edit distance of at least 2 and up to 20% of the pattern string length. Using +edit distance of at least 2 and up to 30% of the pattern string length. Using the more specific attributes `FUZZY1`..`FUZZY9` you can specify the maximum allowed edit distance directly.