spaCy/spacy/lang/lt/tokenizer_exceptions.py
2023-06-26 11:41:03 +02:00

15 lines
382 B
Python

from ...symbols import ORTH
from ...util import update_exc
from ..tokenizer_exceptions import BASE_EXCEPTIONS
_exc = {}
for orth in ["n-tosios", "?!"]:
_exc[orth] = [{ORTH: orth}]
mod_base_exceptions = {
exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
}
del mod_base_exceptions["8)"]
TOKENIZER_EXCEPTIONS = update_exc(mod_base_exceptions, _exc)