mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
f33c703066
* added Urdu language support. * added Urdu language tests. * modified conftest.py for Urdu language support. * added spacy contributor agreement.
23 lines
699 B
Python
23 lines
699 B
Python
# coding: utf8
|
||
from __future__ import unicode_literals
|
||
|
||
# import symbols – if you need to use more, add them here
|
||
from ...symbols import ORTH, LEMMA, TAG, NORM, ADP, DET
|
||
|
||
# Add tokenizer exceptions
|
||
# Documentation: https://spacy.io/docs/usage/adding-languages#tokenizer-exceptions
|
||
# Feel free to use custom logic to generate repetitive exceptions more efficiently.
|
||
# If an exception is split into more than one token, the ORTH values combined always
|
||
# need to match the original string.
|
||
|
||
# Exceptions should be added in the following format:
|
||
|
||
_exc = {
|
||
|
||
}
|
||
|
||
# To keep things clean and readable, it's recommended to only declare the
|
||
# TOKENIZER_EXCEPTIONS at the bottom:
|
||
|
||
TOKENIZER_EXCEPTIONS = _exc
|