Format tests

This commit is contained in:
Adriane Boyd 2022-11-11 13:05:07 +01:00
parent 49e93170bb
commit feb068a369
2 changed files with 39 additions and 19 deletions

View File

@ -56,18 +56,18 @@ def test_levenshtein(dist, a, b):
("a", "ab", -1, True),
("ab", "ac", 1, True),
("ab", "ac", -1, True),
("abc", "cde", 4, False), # 4 reduced because of token length
("abc", "cde", 4, False), # 4 reduced because of token length
("abc", "cde", -1, False),
("abcdef", "cdefgh", 4, True), # 4 not reduced because of token length
("abcdef", "cdefgh", 4, True), # 4 not reduced because of token length
("abcdef", "cdefgh", 3, False),
("abcdef", "cdefgh", -1, True), # default equivalent to 4
("abcdef", "cdefgh", -1, True), # default equivalent to 4
("abcdefgh", "cdefghijk", 5, True),
("abcdefgh", "cdefghijk", 4, False),
("abcdefgh", "cdefghijk", -1, True), # default equivalent to 5
("abcdefgh", "cdefghijk", -1, True), # default equivalent to 5
("abcdefgh", "cdefghijkl", 6, True),
("abcdefgh", "cdefghijkl", 5, False),
("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max)
]
("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max)
],
)
def test_fuzzy_match(a, b, fuzzy, expected):
assert Matcher.fuzzy_match(a, b, fuzzy) == expected

View File

@ -120,6 +120,7 @@ def test_matcher_match_multi(matcher):
# fuzzy matches on specific tokens
def test_matcher_match_fuzzy1(en_vocab):
rules = {
"JS": [[{"ORTH": "JavaScript"}]],
@ -136,6 +137,7 @@ def test_matcher_match_fuzzy1(en_vocab):
(doc.vocab.strings["GoogleNow"], 2, 4),
]
def test_matcher_match_fuzzy2(en_vocab):
rules = {
"JS": [[{"ORTH": "JavaScript"}]],
@ -152,6 +154,7 @@ def test_matcher_match_fuzzy2(en_vocab):
(doc.vocab.strings["Java"], 5, 6),
]
def test_matcher_match_fuzzy3(en_vocab):
rules = {
"JS": [[{"ORTH": {"FUZZY": "JavaScript"}}]],
@ -170,10 +173,9 @@ def test_matcher_match_fuzzy3(en_vocab):
(doc.vocab.strings["JS"], 8, 9),
]
def test_matcher_match_fuzzy_set1(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}}, "OP": "+"}]]
}
rules = {"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}}, "OP": "+"}]]}
matcher = Matcher(en_vocab)
for key, patterns in rules.items():
matcher.add(key, patterns, greedy="LONGEST")
@ -184,6 +186,7 @@ def test_matcher_match_fuzzy_set1(en_vocab):
(doc.vocab.strings["GoogleNow"], 2, 4),
]
def test_matcher_match_fuzzy_set2(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY": {"NOT_IN": ["Google", "Now"]}}, "OP": "+"}]],
@ -198,11 +201,17 @@ def test_matcher_match_fuzzy_set2(en_vocab):
(doc.vocab.strings["GoogleNow"], 0, 2),
]
def test_matcher_match_fuzzy_set3(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]},
"NOT_IN": ["Goggle"]},
"OP": "+"}]]
"GoogleNow": [
[
{
"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}, "NOT_IN": ["Goggle"]},
"OP": "+",
}
]
]
}
matcher = Matcher(en_vocab)
for key, patterns in rules.items():
@ -214,11 +223,12 @@ def test_matcher_match_fuzzy_set3(en_vocab):
(doc.vocab.strings["GoogleNow"], 3, 4),
]
def test_matcher_match_fuzzy_set4(en_vocab):
rules = {
"QUESTION": [[{"ORTH": {"FUZZY": {"IN": ["what"]},
"NOT_IN": ["that"]}},
{"ORTH": "do"}]]
"QUESTION": [
[{"ORTH": {"FUZZY": {"IN": ["what"]}, "NOT_IN": ["that"]}}, {"ORTH": "do"}]
]
}
matcher = Matcher(en_vocab)
for key, patterns in rules.items():
@ -247,6 +257,7 @@ def test_matcher_match_fuzzyn1(en_vocab):
(doc.vocab.strings["GoogleNow"], 2, 4),
]
def test_matcher_match_fuzzyn2(en_vocab):
rules = {
"JS": [[{"ORTH": "JavaScript"}]],
@ -263,6 +274,7 @@ def test_matcher_match_fuzzyn2(en_vocab):
(doc.vocab.strings["Java"], 5, 6),
]
def test_matcher_match_fuzzyn3(en_vocab):
rules = {
"JS": [[{"ORTH": {"FUZZY2": "JavaScript"}}]],
@ -281,6 +293,7 @@ def test_matcher_match_fuzzyn3(en_vocab):
(doc.vocab.strings["JS"], 8, 9),
]
def test_matcher_match_fuzzyn_set1(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY2": {"IN": ["Google", "Now"]}}, "OP": "+"}]]
@ -295,6 +308,7 @@ def test_matcher_match_fuzzyn_set1(en_vocab):
(doc.vocab.strings["GoogleNow"], 2, 4),
]
def test_matcher_match_fuzzyn_set2(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY2": {"NOT_IN": ["Google", "Now"]}}, "OP": "+"}]],
@ -309,11 +323,17 @@ def test_matcher_match_fuzzyn_set2(en_vocab):
(doc.vocab.strings["GoogleNow"], 0, 2),
]
def test_matcher_match_fuzzyn_set3(en_vocab):
rules = {
"GoogleNow": [[{"ORTH": {"FUZZY1": {"IN": ["Google", "Now"]},
"NOT_IN": ["Goggle"]},
"OP": "+"}]]
"GoogleNow": [
[
{
"ORTH": {"FUZZY1": {"IN": ["Google", "Now"]}, "NOT_IN": ["Goggle"]},
"OP": "+",
}
]
]
}
matcher = Matcher(en_vocab)
for key, patterns in rules.items():
@ -324,7 +344,7 @@ def test_matcher_match_fuzzyn_set3(en_vocab):
assert matcher(doc) == [
(doc.vocab.strings["GoogleNow"], 3, 4),
]
def test_matcher_empty_dict(en_vocab):
"""Test matcher allows empty token specs, meaning match on any token."""