mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 01:36:35 +03:00
6f314f99c4
* Use Latin normalization for Serbian attrs Use Latin normalization for Serbian `NORM`, `PREFIX`, and `SUFFIX`. * Update NORMs in tokenizer exceptions and related tests * Add tests for all custom lex attrs * Remove unused imports
17 lines
391 B
Python
17 lines
391 B
Python
import pytest
|
||
|
||
|
||
@pytest.mark.parametrize(
|
||
"text,norms",
|
||
[
|
||
("о.г.", ["ove godine"]),
|
||
("чет.", ["četvrtak"]),
|
||
("гђа", ["gospođa"]),
|
||
("ил'", ["ili"]),
|
||
],
|
||
)
|
||
def test_sr_tokenizer_abbrev_exceptions(sr_tokenizer, text, norms):
|
||
tokens = sr_tokenizer(text)
|
||
assert len(tokens) == 1
|
||
assert [token.norm_ for token in tokens] == norms
|