mirror of
https://github.com/explosion/spaCy.git
synced 2024-09-22 11:59:14 +03:00
12 lines
264 B
Python
12 lines
264 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS
|
|
from ...symbols import ORTH
|
|
|
|
_exc = {}
|
|
|
|
for orth in ID_BASE_EXCEPTIONS + ["etc."]:
|
|
_exc[orth] = [{ORTH: orth}]
|
|
|
|
TOKENIZER_EXCEPTIONS = dict(_exc) |