mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Move language-specific tests to tests/lang
This commit is contained in:
parent
bd57b611cc
commit
c714841cc8
|
@ -1 +0,0 @@
|
|||
# coding: utf-8
|
|
@ -1 +0,0 @@
|
|||
# coding: utf-8
|
|
@ -6,8 +6,8 @@ from __future__ import unicode_literals
|
|||
|
||||
import pytest
|
||||
|
||||
from ...util import compile_prefix_regex
|
||||
from ...language_data import TOKENIZER_PREFIXES
|
||||
from ....util import compile_prefix_regex
|
||||
from ....lang.punctuation import TOKENIZER_PREFIXES
|
||||
|
||||
|
||||
PUNCT_OPEN = ['(', '[', '{', '*']
|
|
@ -3,15 +3,14 @@ from __future__ import unicode_literals
|
|||
|
||||
import pytest
|
||||
|
||||
|
||||
ABBREVIATION_TESTS = [
|
||||
('Hyvää uutta vuotta t. siht. Niemelä!', ['Hyvää', 'uutta', 'vuotta', 't.', 'siht.', 'Niemelä', '!']),
|
||||
('Paino on n. 2.2 kg', ['Paino', 'on', 'n.', '2.2', 'kg'])
|
||||
]
|
||||
|
||||
TESTCASES = ABBREVIATION_TESTS
|
||||
|
||||
|
||||
@pytest.mark.parametrize('text,expected_tokens', TESTCASES)
|
||||
@pytest.mark.parametrize('text,expected_tokens', ABBREVIATION_TESTS)
|
||||
def test_tokenizer_handles_testcases(fi_tokenizer, text, expected_tokens):
|
||||
tokens = fi_tokenizer(text)
|
||||
token_list = [token.text for token in tokens if not token.is_space]
|
|
@ -1,6 +1,4 @@
|
|||
# coding: utf8
|
||||
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
0
spacy/tests/lang/hu/__init__.py
Normal file
0
spacy/tests/lang/hu/__init__.py
Normal file
0
spacy/tests/lang/nb/__init__.py
Normal file
0
spacy/tests/lang/nb/__init__.py
Normal file
0
spacy/tests/lang/sv/__init__.py
Normal file
0
spacy/tests/lang/sv/__init__.py
Normal file
Loading…
Reference in New Issue
Block a user