spaCy/spacy/lang/id/tokenizer_exceptions.py

11 lines
236 B
Python
Raw Normal View History

2017-07-23 18:55:05 +03:00
# coding: utf8
from __future__ import unicode_literals
2017-07-24 10:11:10 +03:00
from ._tokenizer_exceptions_list import FR_BASE_EXCEPTIONS
_exc = {}
for orth in FR_BASE_EXCEPTIONS + ["etc."]:
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = dict(_exc)