spaCy/spacy/lang/hsb/tokenizer_exceptions.py
jnphilipp 7ed7908716
Add Upper Sorbian support. (#10432)
* Add support basic support for upper sorbian.

* Add tokenizer exceptions and tests.

* Update spacy/lang/hsb/examples.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
2022-03-07 16:20:39 +01:00

19 lines
386 B
Python

from ..tokenizer_exceptions import BASE_EXCEPTIONS
from ...symbols import ORTH, NORM
from ...util import update_exc
_exc = dict()
for exc_data in [
{ORTH: "mil.", NORM: "milion"},
{ORTH: "wob.", NORM: "wobydler"},
]:
_exc[exc_data[ORTH]] = [exc_data]
for orth in [
"resp.",
]:
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)