mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
first stab at test
This commit is contained in:
parent
1ee75ae337
commit
c283e9edfe
0
spacy/tests/lang/ga/__init__.py
Normal file
0
spacy/tests/lang/ga/__init__.py
Normal file
18
spacy/tests/lang/ga/test_tokenizer.py
Normal file
18
spacy/tests/lang/ga/test_tokenizer.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
SV_TOKEN_EXCEPTION_TESTS = [
|
||||||
|
('B\'fhearr fanacht as amharc', ['B\'', 'fhearr', 'fanacht', 'as', 'amharc']),
|
||||||
|
('Daoine a bhfuil Gaeilge acu, m.sh. tusa agus mise', ['Daoine', 'a', 'bhfuil', 'Gaeilge', 'acu', ',', 'm.sh.', 'tusa', 'agus', 'mise'])
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('text,expected_tokens', GA_TOKEN_EXCEPTION_TESTS)
|
||||||
|
def test_tokenizer_handles_exception_cases(ga_tokenizer, text, expected_tokens):
|
||||||
|
tokens = ga_tokenizer(text)
|
||||||
|
token_list = [token.text for token in tokens if not token.is_space]
|
||||||
|
assert expected_tokens == token_list
|
||||||
|
|
Loading…
Reference in New Issue
Block a user