Update NORMs in tokenizer exceptions and related tests

2025-09-20 19:12:36 +03:00 · 2023-05-08 11:04:58 +02:00 · 2023-05-08 11:04:58 +02:00 · 01829933fd
commit 01829933fd
parent 560ed4b491
2 changed files with 9 additions and 6 deletions
--- a/spacy/lang/sr/tokenizer_exceptions.py
+++ b/spacy/lang/sr/tokenizer_exceptions.py
@ -1,3 +1,4 @@
 from .lex_attrs import _cyr_to_latin_norm
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...symbols import ORTH, NORM
 from ...util import update_exc
@ -89,5 +90,7 @@ _slang_exc = [
 for slang_desc in _slang_exc:
    _exc[slang_desc[ORTH]] = [slang_desc]
 for _exc_key in _exc:
    _exc[_exc_key][0][NORM] = _cyr_to_latin_norm(_exc[_exc_key][0][NORM])
 TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
--- a/spacy/tests/lang/sr/test_exceptions.py
+++ b/spacy/tests/lang/sr/test_exceptions.py
@ -2,15 +2,15 @@ import pytest
@pytest.mark.parametrize(
-    "text,norms,lemmas",
+    "text,norms",
    [
-        ("о.г.", ["ове године"], ["ова година"]),
+        ("о.г.", ["ove godine"]),
-        ("чет.", ["четвртак"], ["четвртак"]),
+        ("чет.", ["četvrtak"]),
-        ("гђа", ["госпођа"], ["госпођа"]),
+        ("гђа", ["gospođa"]),
-        ("ил'", ["или"], ["или"]),
+        ("ил'", ["ili"]),
    ],
 )
-def test_sr_tokenizer_abbrev_exceptions(sr_tokenizer, text, norms, lemmas):
+def test_sr_tokenizer_abbrev_exceptions(sr_tokenizer, text, norms):
    tokens = sr_tokenizer(text)
    assert len(tokens) == 1
    assert [token.norm_ for token in tokens] == norms