mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 22:27:12 +03:00
a9559e7435
* Handle Russian, Ukrainian and Bulgarian * Corrections * Correction * Correction to comment * Changes based on review * Correction * Reverted irrelevant change in punctuation.py * Remove unnecessary group * Reverted accidental change
9 lines
252 B
Python
9 lines
252 B
Python
import pytest
|
|
|
|
|
|
def test_bg_tokenizer_handles_final_diacritics(bg_tokenizer):
|
|
text = "Ня̀маше яйца̀. Ня̀маше яйца̀."
|
|
tokens = bg_tokenizer(text)
|
|
assert tokens[1].text == "яйца̀"
|
|
assert tokens[2].text == "."
|