From 213fb85d128fe70e3635e2bbc2ad44208e849c61 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 9 Jan 2023 13:05:24 +0100 Subject: [PATCH] Add FUZZY6-9 operators and update tests --- spacy/matcher/matcher.pyx | 3 ++- spacy/tests/matcher/test_matcher_api.py | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 9e4789cbe..2ca8d6322 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -848,7 +848,8 @@ def _get_attr_values(spec, string_store): # extensions to the matcher introduced in #3173. class _FuzzyPredicate: - operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5") + operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5", + "FUZZY6", "FUZZY7", "FUZZY8", "FUZZY9") def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None, regex=False, fuzzy=None, fuzzy_compare=None): diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index f04b06a34..b44b0c60e 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -198,15 +198,20 @@ def test_matcher_match_fuzzy_set_multiple(en_vocab): ] -@pytest.mark.parametrize("fuzzyn", range(1, 6)) -def test_matcher_match_fuzzyn(en_vocab, fuzzyn): +@pytest.mark.parametrize("fuzzyn", range(1, 10)) +def test_matcher_match_fuzzyn_all_insertions(en_vocab, fuzzyn): matcher = Matcher(en_vocab) matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]]) # words with increasing edit distance - words = ["GoogleNow" + "a" * i for i in range(0, 6)] + words = ["GoogleNow" + "a" * i for i in range(0, 10)] doc = Doc(en_vocab, words) assert len(matcher(doc)) == fuzzyn + 1 + +@pytest.mark.parametrize("fuzzyn", range(1, 6)) +def test_matcher_match_fuzzyn_various_edits(en_vocab, fuzzyn): + matcher = Matcher(en_vocab) + matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]]) # words with increasing edit distance of different edit types words = [ "GoogleNow",