first stab at test

This commit is contained in:
Jim O'Regan 2017-09-11 08:57:48 +01:00
parent 1ee75ae337
commit c283e9edfe
2 changed files with 18 additions and 0 deletions

View File

View File

@ -0,0 +1,18 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
SV_TOKEN_EXCEPTION_TESTS = [
('B\'fhearr fanacht as amharc', ['B\'', 'fhearr', 'fanacht', 'as', 'amharc']),
('Daoine a bhfuil Gaeilge acu, m.sh. tusa agus mise', ['Daoine', 'a', 'bhfuil', 'Gaeilge', 'acu', ',', 'm.sh.', 'tusa', 'agus', 'mise'])
]
@pytest.mark.parametrize('text,expected_tokens', GA_TOKEN_EXCEPTION_TESTS)
def test_tokenizer_handles_exception_cases(ga_tokenizer, text, expected_tokens):
tokens = ga_tokenizer(text)
token_list = [token.text for token in tokens if not token.is_space]
assert expected_tokens == token_list