mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
Parametrize fuzzyn tests
This commit is contained in:
parent
feb068a369
commit
070cbf2be0
|
@ -241,57 +241,26 @@ def test_matcher_match_fuzzy_set4(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_match_fuzzyn1(en_vocab):
|
@pytest.mark.parametrize("fuzzyn", range(1, 6))
|
||||||
rules = {
|
def test_matcher_match_fuzzyn(en_vocab, fuzzyn):
|
||||||
"JS": [[{"ORTH": "JavaScript"}]],
|
|
||||||
"GoogleNow": [[{"ORTH": {"FUZZY1": "Google"}}, {"ORTH": "Now"}]],
|
|
||||||
"Java": [[{"LOWER": "java"}]],
|
|
||||||
}
|
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
for key, patterns in rules.items():
|
matcher.add("GoogleNow", [[{"ORTH": {f"FUZZY{fuzzyn}": "GoogleNow"}}]])
|
||||||
matcher.add(key, patterns)
|
# words with increasing edit distance
|
||||||
|
words = ["GoogleNow" + "a" * i for i in range(0, 6)]
|
||||||
|
doc = Doc(en_vocab, words)
|
||||||
|
assert len(matcher(doc)) == fuzzyn + 1
|
||||||
|
|
||||||
words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"]
|
# words with increasing edit distance of different edit types
|
||||||
doc = Doc(matcher.vocab, words=words)
|
words = [
|
||||||
assert matcher(doc) == [
|
"GoogleNow",
|
||||||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
"GoogleNuw",
|
||||||
]
|
"GoogleNuew",
|
||||||
|
"GoogleNoweee",
|
||||||
|
"GiggleNuw3",
|
||||||
def test_matcher_match_fuzzyn2(en_vocab):
|
"gouggle5New",
|
||||||
rules = {
|
|
||||||
"JS": [[{"ORTH": "JavaScript"}]],
|
|
||||||
"GoogleNow": [[{"ORTH": "Google"}, {"ORTH": "Now"}]],
|
|
||||||
"Java": [[{"LOWER": {"FUZZY1": "java"}}]],
|
|
||||||
}
|
|
||||||
matcher = Matcher(en_vocab)
|
|
||||||
for key, patterns in rules.items():
|
|
||||||
matcher.add(key, patterns)
|
|
||||||
|
|
||||||
words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"]
|
|
||||||
doc = Doc(matcher.vocab, words=words)
|
|
||||||
assert matcher(doc) == [
|
|
||||||
(doc.vocab.strings["Java"], 5, 6),
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_match_fuzzyn3(en_vocab):
|
|
||||||
rules = {
|
|
||||||
"JS": [[{"ORTH": {"FUZZY2": "JavaScript"}}]],
|
|
||||||
"GoogleNow": [[{"ORTH": {"FUZZY1": "Google"}}, {"ORTH": "Now"}]],
|
|
||||||
"Java": [[{"LOWER": {"FUZZY1": "java"}}]],
|
|
||||||
}
|
|
||||||
matcher = Matcher(en_vocab)
|
|
||||||
for key, patterns in rules.items():
|
|
||||||
matcher.add(key, patterns)
|
|
||||||
|
|
||||||
words = ["They", "like", "Goggle", "Now", "and", "Jav", "but", "not", "JvvaScrpt"]
|
|
||||||
doc = Doc(matcher.vocab, words=words)
|
|
||||||
assert matcher(doc) == [
|
|
||||||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
|
||||||
(doc.vocab.strings["Java"], 5, 6),
|
|
||||||
(doc.vocab.strings["JS"], 8, 9),
|
|
||||||
]
|
]
|
||||||
|
doc = Doc(en_vocab, words)
|
||||||
|
assert len(matcher(doc)) == fuzzyn + 1
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_match_fuzzyn_set1(en_vocab):
|
def test_matcher_match_fuzzyn_set1(en_vocab):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user