mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
Increase default fuzzy to rounded 30% of pattern length
This commit is contained in:
parent
b690c9120c
commit
e88c724c99
|
@ -38,7 +38,7 @@ cpdef bint fuzzy_compare(input_text: str, pattern_text: str, fuzzy: int = -1):
|
||||||
else:
|
else:
|
||||||
# allow at least two edits (to allow at least one transposition) and up
|
# allow at least two edits (to allow at least one transposition) and up
|
||||||
# to 20% of the pattern string length
|
# to 20% of the pattern string length
|
||||||
max_edits = max(2, int(0.2 * len(pattern_text)))
|
max_edits = max(2, round(0.3 * len(pattern_text)))
|
||||||
return levenshtein(input_text, pattern_text, max_edits) <= max_edits
|
return levenshtein(input_text, pattern_text, max_edits) <= max_edits
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -145,8 +145,8 @@ def test_matcher_match_multi(matcher):
|
||||||
# caching don't collide)
|
# caching don't collide)
|
||||||
(
|
(
|
||||||
{
|
{
|
||||||
"A": [[{"ORTH": {"FUZZY": "Javascript"}}]],
|
"A": [[{"ORTH": {"FUZZY": "Javascripts"}}]],
|
||||||
"B": [[{"ORTH": {"FUZZY5": "Javascript"}}]],
|
"B": [[{"ORTH": {"FUZZY5": "Javascripts"}}]],
|
||||||
},
|
},
|
||||||
[(8, 9)],
|
[(8, 9)],
|
||||||
),
|
),
|
||||||
|
|
|
@ -89,7 +89,7 @@ it compares to another value.
|
||||||
| Attribute | Description |
|
| Attribute | Description |
|
||||||
| -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `REGEX` | Attribute value matches the regular expression at any position in the string. ~~Any~~ |
|
| `REGEX` | Attribute value matches the regular expression at any position in the string. ~~Any~~ |
|
||||||
| `FUZZY` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, -1)`. The default method allows a Levenshtein edit distance of at least 2 and up to 20% of the pattern string length. ~~Any~~ |
|
| `FUZZY` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, -1)`. The default method allows a Levenshtein edit distance of at least 2 and up to 30% of the pattern string length. ~~Any~~ |
|
||||||
| `FUZZY1`, `FUZZY2`, ... `FUZZY9` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, N)`. The default method allows a Levenshtein edit distance of at most N (1-9). ~~Any~~ |
|
| `FUZZY1`, `FUZZY2`, ... `FUZZY9` | Attribute value matches if the `fuzzy_compare` method matches for `(value, pattern, N)`. The default method allows a Levenshtein edit distance of at most N (1-9). ~~Any~~ |
|
||||||
| `IN` | Attribute value is member of a list. ~~Any~~ |
|
| `IN` | Attribute value is member of a list. ~~Any~~ |
|
||||||
| `NOT_IN` | Attribute value is _not_ member of a list. ~~Any~~ |
|
| `NOT_IN` | Attribute value is _not_ member of a list. ~~Any~~ |
|
||||||
|
|
|
@ -378,7 +378,7 @@ pattern = [{"TEXT": {"FUZZY": "favorite"}},
|
||||||
The `FUZZY` attribute allows fuzzy matches for any attribute string value,
|
The `FUZZY` attribute allows fuzzy matches for any attribute string value,
|
||||||
including custom attributes. Just like `REGEX`, it always needs to be applied to
|
including custom attributes. Just like `REGEX`, it always needs to be applied to
|
||||||
an attribute like `TEXT` or `LOWER`. By default `FUZZY` allows a Levenshtein
|
an attribute like `TEXT` or `LOWER`. By default `FUZZY` allows a Levenshtein
|
||||||
edit distance of at least 2 and up to 20% of the pattern string length. Using
|
edit distance of at least 2 and up to 30% of the pattern string length. Using
|
||||||
the more specific attributes `FUZZY1`..`FUZZY9` you can specify the maximum
|
the more specific attributes `FUZZY1`..`FUZZY9` you can specify the maximum
|
||||||
allowed edit distance directly.
|
allowed edit distance directly.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user