Format and turn off formatting for non-greedy test cases

This commit is contained in:
Adriane Boyd 2022-11-28 13:34:32 +01:00
parent a6f0dca441
commit 02aa627092

View File

@ -801,6 +801,7 @@ def test_matcher_with_alignments_non_greedy(en_vocab):
def test_matcher_non_greedy_operator(en_vocab): def test_matcher_non_greedy_operator(en_vocab):
# fmt: off
cases = [ cases = [
(0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]), (0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]),
(1, "aabbab", "a+? b", ["a b", "a a b", "a b"]), (1, "aabbab", "a+? b", ["a b", "a a b", "a b"]),
@ -810,19 +811,20 @@ def test_matcher_non_greedy_operator(en_vocab):
(5, "aabbab", "b+? b+?", ["b b"]), (5, "aabbab", "b+? b+?", ["b b"]),
(6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]), (6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]),
(7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]), (7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]),
(8, "aabbc", "a* b*? c*?", ["a", "a a", 'a']), (8, "aabbc", "a* b*? c*?", ["a", "a a", "a"]),
(9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]), (9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]),
(10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]), (10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]),
# in spaCy, quantifier "*" returns __all__possible__ matches which is different from regex # in spaCy, quantifier "*" returns __all__possible__ matches which is
# in spaCy, quantifier "*?" is designed to return only the non-greedy results from all possible matches # different from regex
# quantifier "*?" is designed to return only the non-greedy results from # all possible matches
# Result 1: a # Result 1: a
# Result 2: a b c # Result 2: a b c
# Result 3: c # Result 3: c
# Among the 3 results, Result 2 might be contentious to some, but we argue that this should be the correct # Among the 3 results, Result 2 might be contentious to some, but we
# behaviour since 'a' and 'c' are matches thus the longest, first possible string "a b c" # argue that this should be the correct behavior since "a" and "c" are
# should be one of the results # matches thus the longest, first possible string "a b c" should be one
# of the results
(11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]), (11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]),
(12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]), (12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]),
(13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]), (13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]),
@ -830,8 +832,8 @@ def test_matcher_non_greedy_operator(en_vocab):
(15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]), (15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]),
(16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]), (16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]),
(17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]), (17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]),
] ]
# fmt: on
for case_id, string, pattern_str, results in cases: for case_id, string, pattern_str, results in cases:
matcher = Matcher(en_vocab) matcher = Matcher(en_vocab)
doc = Doc(matcher.vocab, words=list(string)) doc = Doc(matcher.vocab, words=list(string))