From 070cbf2be0fd54357dbd4a60eb837f1006f85b8d Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 11 Nov 2022 13:13:58 +0100 Subject: [PATCH] Parametrize fuzzyn tests --- spacy/tests/matcher/test_matcher_api.py | 65 +++++++------------------ 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py index 838fd5515..f9fea559b 100644 --- a/spacy/tests/matcher/test_matcher_api.py +++ b/spacy/tests/matcher/test_matcher_api.py @@ -241,57 +241,26 @@ def test_matcher_match_fuzzy_set4(en_vocab): ] -def test_matcher_match_fuzzyn1(en_vocab): - rules = { - "JS": [[{"ORTH": "JavaScript"}]], - "GoogleNow": [[{"ORTH": {"FUZZY1": "Google"}}, {"ORTH": "Now"}]], - "Java": [[{"LOWER": "java"}]], - } +@pytest.mark.parametrize("fuzzyn", range(1, 6)) +def test_matcher_match_fuzzyn(en_vocab, fuzzyn): matcher = Matcher(en_vocab) - for key, patterns in rules.items(): - matcher.add(key, patterns) + matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]]) + # words with increasing edit distance + words = ["GoogleNow" + "a" * i for i in range(0, 6)] + doc = Doc(en_vocab, words) + assert len(matcher(doc)) == fuzzyn + 1 - words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"] - doc = Doc(matcher.vocab, words=words) - assert matcher(doc) == [ - (doc.vocab.strings["GoogleNow"], 2, 4), - ] - - -def test_matcher_match_fuzzyn2(en_vocab): - rules = { - "JS": [[{"ORTH": "JavaScript"}]], - "GoogleNow": [[{"ORTH": "Google"}, {"ORTH": "Now"}]], - "Java": [[{"LOWER": {"FUZZY1": "java"}}]], - } - matcher = Matcher(en_vocab) - for key, patterns in rules.items(): - matcher.add(key, patterns) - - words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"] - doc = Doc(matcher.vocab, words=words) - assert matcher(doc) == [ - (doc.vocab.strings["Java"], 5, 6), - ] - - -def test_matcher_match_fuzzyn3(en_vocab): - rules = { - "JS": [[{"ORTH": {"FUZZY2": "JavaScript"}}]], - "GoogleNow": [[{"ORTH": {"FUZZY1": "Google"}}, {"ORTH": "Now"}]], - "Java": [[{"LOWER": {"FUZZY1": "java"}}]], - } - matcher = Matcher(en_vocab) - for key, patterns in rules.items(): - matcher.add(key, patterns) - - words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"] - doc = Doc(matcher.vocab, words=words) - assert matcher(doc) == [ - (doc.vocab.strings["GoogleNow"], 2, 4), - (doc.vocab.strings["Java"], 5, 6), - (doc.vocab.strings["JS"], 8, 9), + # words with increasing edit distance of different edit types + words = [ + "GoogleNow", + "GoogleNuw", + "GoogleNuew", + "GoogleNoweee", + "GiggleNuw3", + "gouggle5New", ] + doc = Doc(en_vocab, words) + assert len(matcher(doc)) == fuzzyn + 1 def test_matcher_match_fuzzyn_set1(en_vocab):