mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 18:36:36 +03:00
5ca0dbae76
* Add support basic support for lower sorbian. * Add some test for dsb. * Update spacy/lang/dsb/examples.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
30 lines
749 B
Python
30 lines
749 B
Python
import pytest
|
|
|
|
DSB_BASIC_TOKENIZATION_TESTS = [
|
|
(
|
|
"Ale eksistěrujo mimo togo ceła kopica narěcow, ako na pśikład slěpjańska.",
|
|
[
|
|
"Ale",
|
|
"eksistěrujo",
|
|
"mimo",
|
|
"togo",
|
|
"ceła",
|
|
"kopica",
|
|
"narěcow",
|
|
",",
|
|
"ako",
|
|
"na",
|
|
"pśikład",
|
|
"slěpjańska",
|
|
".",
|
|
],
|
|
),
|
|
]
|
|
|
|
|
|
@pytest.mark.parametrize("text,expected_tokens", DSB_BASIC_TOKENIZATION_TESTS)
|
|
def test_dsb_tokenizer_basic(dsb_tokenizer, text, expected_tokens):
|
|
tokens = dsb_tokenizer(text)
|
|
token_list = [token.text for token in tokens if not token.is_space]
|
|
assert expected_tokens == token_list
|