mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Adding num_like test for Czech (#5946)
* Create lex_attrs.py Hello, I am missing a CZECH language in SpaCy. So I would like to help to push it a little. This file is base on others lex_attrs.py files just with translation to Czech. * Update __init__.py Updated for use with new Czech Lex_attrs file * Update stop_words.py * Create test_text.py * add like_num testing for czech Co-authored-by: holubvl3 <47881982+holubvl3@users.noreply.github.com> Co-authored-by: holubvl3 <vilemrousi@gmail.com> Co-authored-by: Vladimír Holubec <vholubec@arcdata.cz>
This commit is contained in:
parent
a341b4ef09
commit
56eabcb2f2
|
@ -47,6 +47,11 @@ def ca_tokenizer():
|
|||
return get_lang_class("ca").Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def cs_tokenizer():
|
||||
return get_lang_class("cs").Defaults.create_tokenizer()
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def da_tokenizer():
|
||||
return get_lang_class("da").Defaults.create_tokenizer()
|
||||
|
|
0
spacy/tests/lang/cs/__init__.py
Normal file
0
spacy/tests/lang/cs/__init__.py
Normal file
26
spacy/tests/lang/cs/test_text.py
Normal file
26
spacy/tests/lang/cs/test_text.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,match",
|
||||
[
|
||||
("10", True),
|
||||
("1", True),
|
||||
("10.000", True),
|
||||
("1000", True),
|
||||
("999,0", True),
|
||||
("devatenáct", True),
|
||||
("osmdesát", True),
|
||||
("kvadrilion", True),
|
||||
("Pes", False),
|
||||
(",", False),
|
||||
("1/2", True),
|
||||
],
|
||||
)
|
||||
def test_lex_attrs_like_number(cs_tokenizer, text, match):
|
||||
tokens = cs_tokenizer(text)
|
||||
assert len(tokens) == 1
|
||||
assert tokens[0].like_num == match
|
Loading…
Reference in New Issue
Block a user