spaCy/spacy/tests/lang/sr/test_exceptions.py

17 lines
391 B
Python
Raw Normal View History

import pytest
@pytest.mark.parametrize(
"text,norms",
2019-08-31 14:39:06 +03:00
[
("о.г.", ["ove godine"]),
("чет.", ["četvrtak"]),
("гђа", ["gospođa"]),
("ил'", ["ili"]),
2019-08-31 14:39:06 +03:00
],
)
def test_sr_tokenizer_abbrev_exceptions(sr_tokenizer, text, norms):
tokens = sr_tokenizer(text)
assert len(tokens) == 1
assert [token.norm_ for token in tokens] == norms