Add FUZZY6-9 operators and update tests

This commit is contained in:
Adriane Boyd 2023-01-09 13:05:24 +01:00
parent 8722f857eb
commit 213fb85d12
2 changed files with 10 additions and 4 deletions

View File

@ -848,7 +848,8 @@ def _get_attr_values(spec, string_store):
# extensions to the matcher introduced in #3173. # extensions to the matcher introduced in #3173.
class _FuzzyPredicate: class _FuzzyPredicate:
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5") operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5",
"FUZZY6", "FUZZY7", "FUZZY8", "FUZZY9")
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None, def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None,
regex=False, fuzzy=None, fuzzy_compare=None): regex=False, fuzzy=None, fuzzy_compare=None):

View File

@ -198,15 +198,20 @@ def test_matcher_match_fuzzy_set_multiple(en_vocab):
] ]
@pytest.mark.parametrize("fuzzyn", range(1, 6)) @pytest.mark.parametrize("fuzzyn", range(1, 10))
def test_matcher_match_fuzzyn(en_vocab, fuzzyn): def test_matcher_match_fuzzyn_all_insertions(en_vocab, fuzzyn):
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]]) matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
# words with increasing edit distance # words with increasing edit distance
words = ["GoogleNow" + "a" * i for i in range(0, 6)] words = ["GoogleNow" + "a" * i for i in range(0, 10)]
doc = Doc(en_vocab, words) doc = Doc(en_vocab, words)
assert len(matcher(doc)) == fuzzyn + 1 assert len(matcher(doc)) == fuzzyn + 1
@pytest.mark.parametrize("fuzzyn", range(1, 6))
def test_matcher_match_fuzzyn_various_edits(en_vocab, fuzzyn):
matcher = Matcher(en_vocab)
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
# words with increasing edit distance of different edit types # words with increasing edit distance of different edit types
words = [ words = [
"GoogleNow", "GoogleNow",