mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
Format tests
This commit is contained in:
parent
49e93170bb
commit
feb068a369
|
@ -56,18 +56,18 @@ def test_levenshtein(dist, a, b):
|
|||
("a", "ab", -1, True),
|
||||
("ab", "ac", 1, True),
|
||||
("ab", "ac", -1, True),
|
||||
("abc", "cde", 4, False), # 4 reduced because of token length
|
||||
("abc", "cde", 4, False), # 4 reduced because of token length
|
||||
("abc", "cde", -1, False),
|
||||
("abcdef", "cdefgh", 4, True), # 4 not reduced because of token length
|
||||
("abcdef", "cdefgh", 4, True), # 4 not reduced because of token length
|
||||
("abcdef", "cdefgh", 3, False),
|
||||
("abcdef", "cdefgh", -1, True), # default equivalent to 4
|
||||
("abcdef", "cdefgh", -1, True), # default equivalent to 4
|
||||
("abcdefgh", "cdefghijk", 5, True),
|
||||
("abcdefgh", "cdefghijk", 4, False),
|
||||
("abcdefgh", "cdefghijk", -1, True), # default equivalent to 5
|
||||
("abcdefgh", "cdefghijk", -1, True), # default equivalent to 5
|
||||
("abcdefgh", "cdefghijkl", 6, True),
|
||||
("abcdefgh", "cdefghijkl", 5, False),
|
||||
("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max)
|
||||
]
|
||||
("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max)
|
||||
],
|
||||
)
|
||||
def test_fuzzy_match(a, b, fuzzy, expected):
|
||||
assert Matcher.fuzzy_match(a, b, fuzzy) == expected
|
||||
|
|
|
@ -120,6 +120,7 @@ def test_matcher_match_multi(matcher):
|
|||
|
||||
# fuzzy matches on specific tokens
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy1(en_vocab):
|
||||
rules = {
|
||||
"JS": [[{"ORTH": "JavaScript"}]],
|
||||
|
@ -136,6 +137,7 @@ def test_matcher_match_fuzzy1(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy2(en_vocab):
|
||||
rules = {
|
||||
"JS": [[{"ORTH": "JavaScript"}]],
|
||||
|
@ -152,6 +154,7 @@ def test_matcher_match_fuzzy2(en_vocab):
|
|||
(doc.vocab.strings["Java"], 5, 6),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy3(en_vocab):
|
||||
rules = {
|
||||
"JS": [[{"ORTH": {"FUZZY": "JavaScript"}}]],
|
||||
|
@ -170,10 +173,9 @@ def test_matcher_match_fuzzy3(en_vocab):
|
|||
(doc.vocab.strings["JS"], 8, 9),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy_set1(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}}, "OP": "+"}]]
|
||||
}
|
||||
rules = {"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}}, "OP": "+"}]]}
|
||||
matcher = Matcher(en_vocab)
|
||||
for key, patterns in rules.items():
|
||||
matcher.add(key, patterns, greedy="LONGEST")
|
||||
|
@ -184,6 +186,7 @@ def test_matcher_match_fuzzy_set1(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy_set2(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY": {"NOT_IN": ["Google", "Now"]}}, "OP": "+"}]],
|
||||
|
@ -198,11 +201,17 @@ def test_matcher_match_fuzzy_set2(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 0, 2),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy_set3(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY": {"IN": ["Google", "Now"]},
|
||||
"NOT_IN": ["Goggle"]},
|
||||
"OP": "+"}]]
|
||||
"GoogleNow": [
|
||||
[
|
||||
{
|
||||
"ORTH": {"FUZZY": {"IN": ["Google", "Now"]}, "NOT_IN": ["Goggle"]},
|
||||
"OP": "+",
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
matcher = Matcher(en_vocab)
|
||||
for key, patterns in rules.items():
|
||||
|
@ -214,11 +223,12 @@ def test_matcher_match_fuzzy_set3(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 3, 4),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzy_set4(en_vocab):
|
||||
rules = {
|
||||
"QUESTION": [[{"ORTH": {"FUZZY": {"IN": ["what"]},
|
||||
"NOT_IN": ["that"]}},
|
||||
{"ORTH": "do"}]]
|
||||
"QUESTION": [
|
||||
[{"ORTH": {"FUZZY": {"IN": ["what"]}, "NOT_IN": ["that"]}}, {"ORTH": "do"}]
|
||||
]
|
||||
}
|
||||
matcher = Matcher(en_vocab)
|
||||
for key, patterns in rules.items():
|
||||
|
@ -247,6 +257,7 @@ def test_matcher_match_fuzzyn1(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzyn2(en_vocab):
|
||||
rules = {
|
||||
"JS": [[{"ORTH": "JavaScript"}]],
|
||||
|
@ -263,6 +274,7 @@ def test_matcher_match_fuzzyn2(en_vocab):
|
|||
(doc.vocab.strings["Java"], 5, 6),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzyn3(en_vocab):
|
||||
rules = {
|
||||
"JS": [[{"ORTH": {"FUZZY2": "JavaScript"}}]],
|
||||
|
@ -281,6 +293,7 @@ def test_matcher_match_fuzzyn3(en_vocab):
|
|||
(doc.vocab.strings["JS"], 8, 9),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzyn_set1(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY2": {"IN": ["Google", "Now"]}}, "OP": "+"}]]
|
||||
|
@ -295,6 +308,7 @@ def test_matcher_match_fuzzyn_set1(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 2, 4),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzyn_set2(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY2": {"NOT_IN": ["Google", "Now"]}}, "OP": "+"}]],
|
||||
|
@ -309,11 +323,17 @@ def test_matcher_match_fuzzyn_set2(en_vocab):
|
|||
(doc.vocab.strings["GoogleNow"], 0, 2),
|
||||
]
|
||||
|
||||
|
||||
def test_matcher_match_fuzzyn_set3(en_vocab):
|
||||
rules = {
|
||||
"GoogleNow": [[{"ORTH": {"FUZZY1": {"IN": ["Google", "Now"]},
|
||||
"NOT_IN": ["Goggle"]},
|
||||
"OP": "+"}]]
|
||||
"GoogleNow": [
|
||||
[
|
||||
{
|
||||
"ORTH": {"FUZZY1": {"IN": ["Google", "Now"]}, "NOT_IN": ["Goggle"]},
|
||||
"OP": "+",
|
||||
}
|
||||
]
|
||||
]
|
||||
}
|
||||
matcher = Matcher(en_vocab)
|
||||
for key, patterns in rules.items():
|
||||
|
@ -324,7 +344,7 @@ def test_matcher_match_fuzzyn_set3(en_vocab):
|
|||
assert matcher(doc) == [
|
||||
(doc.vocab.strings["GoogleNow"], 3, 4),
|
||||
]
|
||||
|
||||
|
||||
|
||||
def test_matcher_empty_dict(en_vocab):
|
||||
"""Test matcher allows empty token specs, meaning match on any token."""
|
||||
|
|
Loading…
Reference in New Issue
Block a user