spaCy/spacy/lang/id/tokenizer_exceptions.py

12 lines
264 B
Python
Raw Normal View History

2017-07-23 18:55:05 +03:00
# coding: utf8
from __future__ import unicode_literals
2017-07-24 10:11:51 +03:00
from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS
2017-07-24 10:12:34 +03:00
from ...symbols import ORTH
2017-07-24 10:11:10 +03:00
_exc = {}
2017-07-24 10:11:51 +03:00
for orth in ID_BASE_EXCEPTIONS + ["etc."]:
2017-07-24 10:11:10 +03:00
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = dict(_exc)