spaCy/spacy/tests/lang/isl/test_tokenizer.py
Edward 360ccf628a
Rename language codes (Icelandic, multi-language) (#12149)
* Init

* fix tests

* Update spacy/errors.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Fix test_blank_languages

* Rename xx to mul in docs

* Format _util with black

* prettier formatting

---------

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
2023-01-31 17:30:43 +01:00

31 lines
788 B
Python

import pytest
ISL_BASIC_TOKENIZATION_TESTS = [
(
"Enginn maður skal sæta pyndingum eða ómannlegri eða "
"vanvirðandi meðferð eða refsingu. ",
[
"Enginn",
"maður",
"skal",
"sæta",
"pyndingum",
"eða",
"ómannlegri",
"eða",
"vanvirðandi",
"meðferð",
"eða",
"refsingu",
".",
],
),
]
@pytest.mark.parametrize("text,expected_tokens", ISL_BASIC_TOKENIZATION_TESTS)
def test_isl_tokenizer_basic(isl_tokenizer, text, expected_tokens):
tokens = isl_tokenizer(text)
token_list = [token.text for token in tokens if not token.is_space]
assert expected_tokens == token_list