spaCy/spacy/fr/language_data.py

34 lines
429 B
Python
Raw Normal View History

# encoding: utf8
from __future__ import unicode_literals
2016-12-08 22:07:14 +03:00
from ..symbols import *
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
2016-12-08 22:07:14 +03:00
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
2016-12-08 22:07:14 +03:00
PRON_LEMMA = "-PRON-"
2016-12-08 22:07:14 +03:00
TAG_MAP = {
2016-12-08 22:07:14 +03:00
}
2016-12-08 22:07:14 +03:00
STOP_WORDS = set("""
2016-12-08 22:07:14 +03:00
""".split())
2016-12-08 22:07:14 +03:00
TOKENIZER_EXCEPTIONS = {
}
2016-12-08 22:07:14 +03:00
ORTH_ONLY = {
}