Move language-specific tests to tests/lang

This commit is contained in:
ines 2017-05-09 00:02:37 +02:00
parent bd57b611cc
commit c714841cc8
25 changed files with 4 additions and 9 deletions

View File

@ -1 +0,0 @@
# coding: utf-8

View File

@ -1 +0,0 @@
# coding: utf-8

View File

@ -6,8 +6,8 @@ from __future__ import unicode_literals
import pytest import pytest
from ...util import compile_prefix_regex from ....util import compile_prefix_regex
from ...language_data import TOKENIZER_PREFIXES from ....lang.punctuation import TOKENIZER_PREFIXES
PUNCT_OPEN = ['(', '[', '{', '*'] PUNCT_OPEN = ['(', '[', '{', '*']

View File

@ -3,15 +3,14 @@ from __future__ import unicode_literals
import pytest import pytest
ABBREVIATION_TESTS = [ ABBREVIATION_TESTS = [
('Hyvää uutta vuotta t. siht. Niemelä!', ['Hyvää', 'uutta', 'vuotta', 't.', 'siht.', 'Niemelä', '!']), ('Hyvää uutta vuotta t. siht. Niemelä!', ['Hyvää', 'uutta', 'vuotta', 't.', 'siht.', 'Niemelä', '!']),
('Paino on n. 2.2 kg', ['Paino', 'on', 'n.', '2.2', 'kg']) ('Paino on n. 2.2 kg', ['Paino', 'on', 'n.', '2.2', 'kg'])
] ]
TESTCASES = ABBREVIATION_TESTS
@pytest.mark.parametrize('text,expected_tokens', ABBREVIATION_TESTS)
@pytest.mark.parametrize('text,expected_tokens', TESTCASES)
def test_tokenizer_handles_testcases(fi_tokenizer, text, expected_tokens): def test_tokenizer_handles_testcases(fi_tokenizer, text, expected_tokens):
tokens = fi_tokenizer(text) tokens = fi_tokenizer(text)
token_list = [token.text for token in tokens if not token.is_space] token_list = [token.text for token in tokens if not token.is_space]

View File

@ -1,6 +1,4 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals

View File

View File

View File