mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 20:30:24 +03:00
Add FUZZY6-9 operators and update tests
This commit is contained in:
parent
8722f857eb
commit
213fb85d12
|
@ -848,7 +848,8 @@ def _get_attr_values(spec, string_store):
|
|||
# extensions to the matcher introduced in #3173.
|
||||
|
||||
class _FuzzyPredicate:
|
||||
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5")
|
||||
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5",
|
||||
"FUZZY6", "FUZZY7", "FUZZY8", "FUZZY9")
|
||||
|
||||
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None,
|
||||
regex=False, fuzzy=None, fuzzy_compare=None):
|
||||
|
|
|
@ -198,15 +198,20 @@ def test_matcher_match_fuzzy_set_multiple(en_vocab):
|
|||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fuzzyn", range(1, 6))
|
||||
def test_matcher_match_fuzzyn(en_vocab, fuzzyn):
|
||||
@pytest.mark.parametrize("fuzzyn", range(1, 10))
|
||||
def test_matcher_match_fuzzyn_all_insertions(en_vocab, fuzzyn):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
||||
# words with increasing edit distance
|
||||
words = ["GoogleNow" + "a" * i for i in range(0, 6)]
|
||||
words = ["GoogleNow" + "a" * i for i in range(0, 10)]
|
||||
doc = Doc(en_vocab, words)
|
||||
assert len(matcher(doc)) == fuzzyn + 1
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fuzzyn", range(1, 6))
|
||||
def test_matcher_match_fuzzyn_various_edits(en_vocab, fuzzyn):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
||||
# words with increasing edit distance of different edit types
|
||||
words = [
|
||||
"GoogleNow",
|
||||
|
|
Loading…
Reference in New Issue
Block a user