Format and turn off formatting for non-greedy test cases

This commit is contained in:
Adriane Boyd 2022-11-28 13:34:32 +01:00
parent a6f0dca441
commit 02aa627092

View File

@ -801,6 +801,7 @@ def test_matcher_with_alignments_non_greedy(en_vocab):
def test_matcher_non_greedy_operator(en_vocab):
# fmt: off
cases = [
(0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]),
(1, "aabbab", "a+? b", ["a b", "a a b", "a b"]),
@ -810,19 +811,20 @@ def test_matcher_non_greedy_operator(en_vocab):
(5, "aabbab", "b+? b+?", ["b b"]),
(6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]),
(7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]),
(8, "aabbc", "a* b*? c*?", ["a", "a a", 'a']),
(8, "aabbc", "a* b*? c*?", ["a", "a a", "a"]),
(9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]),
(10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]),
# in spaCy, quantifier "*" returns __all__possible__ matches which is different from regex
# in spaCy, quantifier "*?" is designed to return only the non-greedy results from all possible matches
# in spaCy, quantifier "*" returns __all__possible__ matches which is
# different from regex
# quantifier "*?" is designed to return only the non-greedy results from # all possible matches
# Result 1: a
# Result 2: a b c
# Result 3: c
# Among the 3 results, Result 2 might be contentious to some, but we argue that this should be the correct
# behaviour since 'a' and 'c' are matches thus the longest, first possible string "a b c"
# should be one of the results
# Among the 3 results, Result 2 might be contentious to some, but we
# argue that this should be the correct behavior since "a" and "c" are
# matches thus the longest, first possible string "a b c" should be one
# of the results
(11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]),
(12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]),
(13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]),
@ -830,8 +832,8 @@ def test_matcher_non_greedy_operator(en_vocab):
(15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]),
(16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]),
(17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]),
]
# fmt: on
for case_id, string, pattern_str, results in cases:
matcher = Matcher(en_vocab)
doc = Doc(matcher.vocab, words=list(string))