mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
cca8651fc8
* [Bulgarian] Add tokenizer exceptions and like_num for Bulgarian * [Bulgarian] Add tokenizer exceptions and like_num for Bulgarian
31 lines
794 B
Python
31 lines
794 B
Python
import pytest
|
|
from spacy.lang.bg.lex_attrs import like_num
|
|
|
|
@pytest.mark.parametrize(
|
|
"word,match",
|
|
[
|
|
("10", True),
|
|
("1", True),
|
|
("10000", True),
|
|
("1.000", True),
|
|
("бројка", False),
|
|
("999,23", True),
|
|
("едно", True),
|
|
("две", True),
|
|
("цифра", False),
|
|
("единайсет", True),
|
|
("десет", True),
|
|
("сто", True),
|
|
("брой", False),
|
|
("хиляда", True),
|
|
("милион", True),
|
|
(",", False),
|
|
("милиарда", True),
|
|
("билион", True),
|
|
],
|
|
)
|
|
def test_bg_lex_attrs_like_number(bg_tokenizer, word, match):
|
|
tokens = bg_tokenizer(word)
|
|
assert len(tokens) == 1
|
|
assert tokens[0].like_num == match
|