diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py index fa8e132c0..2fceece49 100644 --- a/spacy/tests/lang/hu/test_tokenizer.py +++ b/spacy/tests/lang/hu/test_tokenizer.py @@ -296,9 +296,8 @@ WIKI_TESTS = [ ("cérium(IV)-oxid", ["cérium", "(", "IV", ")", "-oxid"]), ] -TESTCASES = ( - DEFAULT_TESTS - + DOT_TESTS +EXTRA_TESTS = ( + DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS @@ -306,8 +305,16 @@ TESTCASES = ( + TYPO_TESTS ) +# normal: default tests + 10% of extra tests +TESTS = DEFAULT_TESTS +TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0]) -@pytest.mark.parametrize("text,expected_tokens", TESTCASES) +# slow: remaining 90% of extra tests +SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0] +TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS]) + + +@pytest.mark.parametrize("text,expected_tokens", TESTS) def test_hu_tokenizer_handles_testcases(hu_tokenizer, text, expected_tokens): tokens = hu_tokenizer(text) token_list = [token.text for token in tokens if not token.is_space]