spaCy/spacy/lang/id/tokenizer_exceptions.py
2017-07-24 14:11:51 +07:00

11 lines
236 B
Python

# coding: utf8
from __future__ import unicode_literals
from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS
_exc = {}
for orth in ID_BASE_EXCEPTIONS + ["etc."]:
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = dict(_exc)