mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 01:36:35 +03:00
6f314f99c4
* Use Latin normalization for Serbian attrs Use Latin normalization for Serbian `NORM`, `PREFIX`, and `SUFFIX`. * Update NORMs in tokenizer exceptions and related tests * Add tests for all custom lex attrs * Remove unused imports
18 lines
484 B
Python
18 lines
484 B
Python
import pytest
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"text,like_num,norm,prefix,suffix",
|
|
[
|
|
("нула", True, "nula", "n", "ula"),
|
|
("Казна", False, "kazna", "K", "zna"),
|
|
],
|
|
)
|
|
def test_lex_attrs(sr_tokenizer, text, like_num, norm, prefix, suffix):
|
|
tokens = sr_tokenizer(text)
|
|
assert len(tokens) == 1
|
|
assert tokens[0].like_num == like_num
|
|
assert tokens[0].norm_ == norm
|
|
assert tokens[0].prefix_ == prefix
|
|
assert tokens[0].suffix_ == suffix
|