spaCy/spacy/lang/lt/tokenizer_exceptions.py

15 lines
382 B
Python
Raw Permalink Normal View History

2019-07-08 11:25:22 +03:00
from ...symbols import ORTH
from ...util import update_exc
2023-06-26 12:41:03 +03:00
from ..tokenizer_exceptions import BASE_EXCEPTIONS
2019-07-08 11:25:22 +03:00
_exc = {}
for orth in ["n-tosios", "?!"]:
2019-07-08 11:25:22 +03:00
_exc[orth] = [{ORTH: orth}]
mod_base_exceptions = {
exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
}
del mod_base_exceptions["8)"]
TOKENIZER_EXCEPTIONS = update_exc(mod_base_exceptions, _exc)