mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Add FUZZY6-9 operators and update tests
This commit is contained in:
parent
8722f857eb
commit
213fb85d12
|
@ -848,7 +848,8 @@ def _get_attr_values(spec, string_store):
|
||||||
# extensions to the matcher introduced in #3173.
|
# extensions to the matcher introduced in #3173.
|
||||||
|
|
||||||
class _FuzzyPredicate:
|
class _FuzzyPredicate:
|
||||||
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5")
|
operators = ("FUZZY", "FUZZY1", "FUZZY2", "FUZZY3", "FUZZY4", "FUZZY5",
|
||||||
|
"FUZZY6", "FUZZY7", "FUZZY8", "FUZZY9")
|
||||||
|
|
||||||
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None,
|
def __init__(self, i, attr, value, predicate, is_extension=False, vocab=None,
|
||||||
regex=False, fuzzy=None, fuzzy_compare=None):
|
regex=False, fuzzy=None, fuzzy_compare=None):
|
||||||
|
|
|
@ -198,15 +198,20 @@ def test_matcher_match_fuzzy_set_multiple(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("fuzzyn", range(1, 6))
|
@pytest.mark.parametrize("fuzzyn", range(1, 10))
|
||||||
def test_matcher_match_fuzzyn(en_vocab, fuzzyn):
|
def test_matcher_match_fuzzyn_all_insertions(en_vocab, fuzzyn):
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
||||||
# words with increasing edit distance
|
# words with increasing edit distance
|
||||||
words = ["GoogleNow" + "a" * i for i in range(0, 6)]
|
words = ["GoogleNow" + "a" * i for i in range(0, 10)]
|
||||||
doc = Doc(en_vocab, words)
|
doc = Doc(en_vocab, words)
|
||||||
assert len(matcher(doc)) == fuzzyn + 1
|
assert len(matcher(doc)) == fuzzyn + 1
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("fuzzyn", range(1, 6))
|
||||||
|
def test_matcher_match_fuzzyn_various_edits(en_vocab, fuzzyn):
|
||||||
|
matcher = Matcher(en_vocab)
|
||||||
|
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
||||||
# words with increasing edit distance of different edit types
|
# words with increasing edit distance of different edit types
|
||||||
words = [
|
words = [
|
||||||
"GoogleNow",
|
"GoogleNow",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user