spaCy/spacy/tests/lang/sr/test_lex_attrs.py
Adriane Boyd 6f314f99c4
Use Latin normalization for Serbian attrs (#12608)
* Use Latin normalization for Serbian attrs

Use Latin normalization for Serbian `NORM`, `PREFIX`, and `SUFFIX`.

* Update NORMs in tokenizer exceptions and related tests

* Add tests for all custom lex attrs

* Remove unused imports
2023-05-08 12:33:56 +02:00

18 lines
484 B
Python

import pytest
@pytest.mark.parametrize(
"text,like_num,norm,prefix,suffix",
[
("нула", True, "nula", "n", "ula"),
("Казна", False, "kazna", "K", "zna"),
],
)
def test_lex_attrs(sr_tokenizer, text, like_num, norm, prefix, suffix):
tokens = sr_tokenizer(text)
assert len(tokens) == 1
assert tokens[0].like_num == like_num
assert tokens[0].norm_ == norm
assert tokens[0].prefix_ == prefix
assert tokens[0].suffix_ == suffix