diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 91b7e4d9d..567bf901c 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -47,6 +47,11 @@ def ca_tokenizer(): return get_lang_class("ca").Defaults.create_tokenizer() +@pytest.fixture(scope="session") +def cs_tokenizer(): + return get_lang_class("cs").Defaults.create_tokenizer() + + @pytest.fixture(scope="session") def da_tokenizer(): return get_lang_class("da").Defaults.create_tokenizer() diff --git a/spacy/tests/lang/cs/__init__.py b/spacy/tests/lang/cs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/spacy/tests/lang/cs/test_text.py b/spacy/tests/lang/cs/test_text.py new file mode 100644 index 000000000..d98961738 --- /dev/null +++ b/spacy/tests/lang/cs/test_text.py @@ -0,0 +1,26 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize( + "text,match", + [ + ("10", True), + ("1", True), + ("10.000", True), + ("1000", True), + ("999,0", True), + ("devatenáct", True), + ("osmdesát", True), + ("kvadrilion", True), + ("Pes", False), + (",", False), + ("1/2", True), + ], +) +def test_lex_attrs_like_number(cs_tokenizer, text, match): + tokens = cs_tokenizer(text) + assert len(tokens) == 1 + assert tokens[0].like_num == match