Use base language data as default

This commit is contained in:
Ines Montani 2016-12-18 16:55:25 +01:00
parent bcc1d50d09
commit 753068f1d5

View File

@ -21,6 +21,7 @@ from .matcher import Matcher
from . import attrs
from . import orth
from . import util
from . import language_data
from .lemmatizer import Lemmatizer
from .train import Trainer
@ -141,13 +142,13 @@ class BaseDefaults(object):
pipeline.append(nlp.entity)
return pipeline
prefixes = tuple()
prefixes = tuple(language_data.TOKENIZER_PREFIXES)
suffixes = tuple()
suffixes = tuple(language_data.TOKENIZER_SUFFIXES)
infixes = tuple()
infixes = tuple(language_data.TOKENIZER_INFIXES)
tag_map = {}
tag_map = dict(language_data.TAG_MAP)
tokenizer_exceptions = {}