From 02aa6270924c1a3894bc7103804c5e55c2faffe0 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 28 Nov 2022 13:34:32 +0100 Subject: [PATCH] Format and turn off formatting for non-greedy test cases --- spacy/tests/matcher/test_matcher_logic.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/spacy/tests/matcher/test_matcher_logic.py b/spacy/tests/matcher/test_matcher_logic.py index 29c92e550..4810f599b 100644 --- a/spacy/tests/matcher/test_matcher_logic.py +++ b/spacy/tests/matcher/test_matcher_logic.py @@ -801,6 +801,7 @@ def test_matcher_with_alignments_non_greedy(en_vocab): def test_matcher_non_greedy_operator(en_vocab): + # fmt: off cases = [ (0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]), (1, "aabbab", "a+? b", ["a b", "a a b", "a b"]), @@ -810,19 +811,20 @@ def test_matcher_non_greedy_operator(en_vocab): (5, "aabbab", "b+? b+?", ["b b"]), (6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]), (7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]), - (8, "aabbc", "a* b*? c*?", ["a", "a a", 'a']), + (8, "aabbc", "a* b*? c*?", ["a", "a a", "a"]), (9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]), (10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]), - # in spaCy, quantifier "*" returns __all__possible__ matches which is different from regex - # in spaCy, quantifier "*?" is designed to return only the non-greedy results from all possible matches + # in spaCy, quantifier "*" returns __all__possible__ matches which is + # different from regex + # quantifier "*?" is designed to return only the non-greedy results from # all possible matches # Result 1: a # Result 2: a b c # Result 3: c - # Among the 3 results, Result 2 might be contentious to some, but we argue that this should be the correct - # behaviour since 'a' and 'c' are matches thus the longest, first possible string "a b c" - # should be one of the results - + # Among the 3 results, Result 2 might be contentious to some, but we + # argue that this should be the correct behavior since "a" and "c" are + # matches thus the longest, first possible string "a b c" should be one + # of the results (11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]), (12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]), (13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]), @@ -830,8 +832,8 @@ def test_matcher_non_greedy_operator(en_vocab): (15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]), (16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]), (17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]), - ] + # fmt: on for case_id, string, pattern_str, results in cases: matcher = Matcher(en_vocab) doc = Doc(matcher.vocab, words=list(string))