mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-07 13:44:55 +03:00
Format and turn off formatting for non-greedy test cases
This commit is contained in:
parent
a6f0dca441
commit
02aa627092
|
@ -801,6 +801,7 @@ def test_matcher_with_alignments_non_greedy(en_vocab):
|
||||||
|
|
||||||
|
|
||||||
def test_matcher_non_greedy_operator(en_vocab):
|
def test_matcher_non_greedy_operator(en_vocab):
|
||||||
|
# fmt: off
|
||||||
cases = [
|
cases = [
|
||||||
(0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]),
|
(0, "aabbab", "a*? b", ["a a b", "a b", "b", "b", "a b", "b"]),
|
||||||
(1, "aabbab", "a+? b", ["a b", "a a b", "a b"]),
|
(1, "aabbab", "a+? b", ["a b", "a a b", "a b"]),
|
||||||
|
@ -810,19 +811,20 @@ def test_matcher_non_greedy_operator(en_vocab):
|
||||||
(5, "aabbab", "b+? b+?", ["b b"]),
|
(5, "aabbab", "b+? b+?", ["b b"]),
|
||||||
(6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]),
|
(6, "aabbab", "a* b*?", ["a", "a a", "a", "a"]),
|
||||||
(7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]),
|
(7, "aabbab", "a*? b*", ["a a b", "a b", "b", "a a b b", "a b b", "b b", "b", "a b", "b"]),
|
||||||
(8, "aabbc", "a* b*? c*?", ["a", "a a", 'a']),
|
(8, "aabbc", "a* b*? c*?", ["a", "a a", "a"]),
|
||||||
(9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]),
|
(9, "aabbc", "a* b*? c", ["a a b b c", "a b b c", "b b c", "b c", "c"]),
|
||||||
|
|
||||||
(10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]),
|
(10, "abc", "a* b*? c*", ["a", "a b c", "b c", "c"]),
|
||||||
# in spaCy, quantifier "*" returns __all__possible__ matches which is different from regex
|
# in spaCy, quantifier "*" returns __all__possible__ matches which is
|
||||||
# in spaCy, quantifier "*?" is designed to return only the non-greedy results from all possible matches
|
# different from regex
|
||||||
|
# quantifier "*?" is designed to return only the non-greedy results from # all possible matches
|
||||||
# Result 1: a
|
# Result 1: a
|
||||||
# Result 2: a b c
|
# Result 2: a b c
|
||||||
# Result 3: c
|
# Result 3: c
|
||||||
# Among the 3 results, Result 2 might be contentious to some, but we argue that this should be the correct
|
# Among the 3 results, Result 2 might be contentious to some, but we
|
||||||
# behaviour since 'a' and 'c' are matches thus the longest, first possible string "a b c"
|
# argue that this should be the correct behavior since "a" and "c" are
|
||||||
# should be one of the results
|
# matches thus the longest, first possible string "a b c" should be one
|
||||||
|
# of the results
|
||||||
(11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]),
|
(11, "aabbc", "a+? b*? c", ["a b b c", "a a b b c"]),
|
||||||
(12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]),
|
(12, "aabbc", "a+? b+? c", ["a b b c", "a a b b c"]),
|
||||||
(13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]),
|
(13, "abbc", "a* b*? c?", ["a", "a b b c", "b b c", "b c", "c"]),
|
||||||
|
@ -830,8 +832,8 @@ def test_matcher_non_greedy_operator(en_vocab):
|
||||||
(15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]),
|
(15, "abbxb", "a*? b+? c*", ["a b", "b", "b", "b"]),
|
||||||
(16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]),
|
(16, "abbcbc", "a*? b+? c*", ["a b", "b", "b", "b c", "a b b c", "b b c", "b", "b c"]),
|
||||||
(17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]),
|
(17, "abbcbc", "a*? b+? c", ["b c", "a b b c", "b b c", "b c"]),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
# fmt: on
|
||||||
for case_id, string, pattern_str, results in cases:
|
for case_id, string, pattern_str, results in cases:
|
||||||
matcher = Matcher(en_vocab)
|
matcher = Matcher(en_vocab)
|
||||||
doc = Doc(matcher.vocab, words=list(string))
|
doc = Doc(matcher.vocab, words=list(string))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user