mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Adding num_like test for Czech (#5946)
* Create lex_attrs.py Hello, I am missing a CZECH language in SpaCy. So I would like to help to push it a little. This file is base on others lex_attrs.py files just with translation to Czech. * Update __init__.py Updated for use with new Czech Lex_attrs file * Update stop_words.py * Create test_text.py * add like_num testing for czech Co-authored-by: holubvl3 <47881982+holubvl3@users.noreply.github.com> Co-authored-by: holubvl3 <vilemrousi@gmail.com> Co-authored-by: Vladimír Holubec <vholubec@arcdata.cz>
This commit is contained in:
parent
a341b4ef09
commit
56eabcb2f2
|
@ -47,6 +47,11 @@ def ca_tokenizer():
|
||||||
return get_lang_class("ca").Defaults.create_tokenizer()
|
return get_lang_class("ca").Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def cs_tokenizer():
|
||||||
|
return get_lang_class("cs").Defaults.create_tokenizer()
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def da_tokenizer():
|
def da_tokenizer():
|
||||||
return get_lang_class("da").Defaults.create_tokenizer()
|
return get_lang_class("da").Defaults.create_tokenizer()
|
||||||
|
|
0
spacy/tests/lang/cs/__init__.py
Normal file
0
spacy/tests/lang/cs/__init__.py
Normal file
26
spacy/tests/lang/cs/test_text.py
Normal file
26
spacy/tests/lang/cs/test_text.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
# coding: utf-8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,match",
|
||||||
|
[
|
||||||
|
("10", True),
|
||||||
|
("1", True),
|
||||||
|
("10.000", True),
|
||||||
|
("1000", True),
|
||||||
|
("999,0", True),
|
||||||
|
("devatenáct", True),
|
||||||
|
("osmdesát", True),
|
||||||
|
("kvadrilion", True),
|
||||||
|
("Pes", False),
|
||||||
|
(",", False),
|
||||||
|
("1/2", True),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_lex_attrs_like_number(cs_tokenizer, text, match):
|
||||||
|
tokens = cs_tokenizer(text)
|
||||||
|
assert len(tokens) == 1
|
||||||
|
assert tokens[0].like_num == match
|
Loading…
Reference in New Issue
Block a user