Add FUZZY6-9 operators and update tests

This commit is contained in:
Adriane Boyd 2023-01-09 13:05:24 +01:00
parent 8722f857eb
commit 213fb85d12
2 changed files with 10 additions and 4 deletions

View File

@ -848,7 +848,8 @@ def _get_attr_values(spec, string_store):
# extensions to the matcher introduced in #3173.
class _FuzzyPredicate:
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5")
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5",
"FUZZY6", "FUZZY7", "FUZZY8", "FUZZY9")
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None,
regex=False, fuzzy=None, fuzzy_compare=None):

View File

@ -198,15 +198,20 @@ def test_matcher_match_fuzzy_set_multiple(en_vocab):
]
@pytest.mark.parametrize("fuzzyn", range(1, 6))
def test_matcher_match_fuzzyn(en_vocab, fuzzyn):
@pytest.mark.parametrize("fuzzyn", range(1, 10))
def test_matcher_match_fuzzyn_all_insertions(en_vocab, fuzzyn):
matcher = Matcher(en_vocab)
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
# words with increasing edit distance
words = ["GoogleNow" + "a" * i for i in range(0, 6)]
words = ["GoogleNow" + "a" * i for i in range(0, 10)]
doc = Doc(en_vocab, words)
assert len(matcher(doc)) == fuzzyn + 1
@pytest.mark.parametrize("fuzzyn", range(1, 6))
def test_matcher_match_fuzzyn_various_edits(en_vocab, fuzzyn):
matcher = Matcher(en_vocab)
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
# words with increasing edit distance of different edit types
words = [
"GoogleNow",