Mark most Hungarian tokenizer test cases as slow (#4883)

* Mark most Hungarian tokenizer test cases as slow Mark most Hungarian tokenizer test cases as slow to reduce the runtime of the test suite in ordinary usage: * for normal tests: run default tests plus 10% of the detailed tests * for slow tests: run all tests * Rework to mark individual tests as slow
2025-11-01 16:37:45 +03:00 · 2020-01-08 12:34:06 +01:00 · 2020-01-08 12:34:06 +01:00 · aef83e8070
commit aef83e8070
parent 7b96a5e10f
1 changed files with 11 additions and 4 deletions
--- a/spacy/tests/lang/hu/test_tokenizer.py
+++ b/spacy/tests/lang/hu/test_tokenizer.py
@ -296,9 +296,8 @@ WIKI_TESTS = [
    ("cérium(IV)-oxid", ["cérium", "(", "IV", ")", "-oxid"]),
 ]

-TESTCASES = (
-    DEFAULT_TESTS
-    + DOT_TESTS
+EXTRA_TESTS = (
+    DOT_TESTS
    + QUOTE_TESTS
    + NUMBER_TESTS
    + HYPHEN_TESTS
@ -306,8 +305,16 @@ TESTCASES = (
    + TYPO_TESTS
 )

+# normal: default tests + 10% of extra tests
+TESTS = DEFAULT_TESTS
+TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0])

-@pytest.mark.parametrize("text,expected_tokens", TESTCASES)
+# slow: remaining 90% of extra tests
+SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0]
+TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS])
+
+
+@pytest.mark.parametrize("text,expected_tokens", TESTS)
 def test_hu_tokenizer_handles_testcases(hu_tokenizer, text, expected_tokens):
    tokens = hu_tokenizer(text)
    token_list = [token.text for token in tokens if not token.is_space]