spaCy/spacy/tests/lang/dsb/test_tokenizer.py
jnphilipp 5ca0dbae76
Add Lower Sorbian support. (#10431)
* Add support basic support for lower sorbian.

* Add some test for dsb.

* Update spacy/lang/dsb/examples.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
2022-03-07 16:57:14 +01:00

30 lines
749 B
Python

import pytest
DSB_BASIC_TOKENIZATION_TESTS = [
(
"Ale eksistěrujo mimo togo ceła kopica narěcow, ako na pśikład slěpjańska.",
[
"Ale",
"eksistěrujo",
"mimo",
"togo",
"ceła",
"kopica",
"narěcow",
",",
"ako",
"na",
"pśikład",
"slěpjańska",
".",
],
),
]
@pytest.mark.parametrize("text,expected_tokens", DSB_BASIC_TOKENIZATION_TESTS)
def test_dsb_tokenizer_basic(dsb_tokenizer, text, expected_tokens):
tokens = dsb_tokenizer(text)
token_list = [token.text for token in tokens if not token.is_space]
assert expected_tokens == token_list