1
1
mirror of https://github.com/explosion/spaCy.git synced 2025-02-17 20:10:34 +03:00
spaCy/spacy/tests/lang/ru/test_tokenizer_exc.py

17 lines
540 B
Python

# coding: utf-8
"""Test that tokenizer exceptions are parsed correctly."""
from __future__ import unicode_literals
import pytest
@pytest.mark.parametrize('text,norms', [("пн.", ["понедельник"]),
("пт.", ["пятница"]),
("дек.", ["декабрь"])])
def test_ru_tokenizer_abbrev_exceptions(ru_tokenizer, text, norms):
tokens = ru_tokenizer(text)
assert len(tokens) == 1
assert [token.norm_ for token in tokens] == norms