mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Lazy imports language
This commit is contained in:
parent
c0afcd22bb
commit
f2ab7d77b4
|
@ -3,18 +3,13 @@ from __future__ import unicode_literals
|
|||
|
||||
from . import util
|
||||
from .deprecated import resolve_model_name
|
||||
import importlib
|
||||
from .cli.info import info
|
||||
|
||||
from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he, nb
|
||||
|
||||
|
||||
_languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French,
|
||||
it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish,
|
||||
fi.Finnish, bn.Bengali, he.Hebrew, nb.Norwegian)
|
||||
|
||||
|
||||
for _lang in _languages:
|
||||
util.set_lang_class(_lang.lang, _lang)
|
||||
_languages_name = set(["en", "de", "es", "pt", "fr",
|
||||
"it", "hu", "zh", "nl", "sv",
|
||||
"fi", "bn", "he", "nb"])
|
||||
|
||||
|
||||
def load(name, **overrides):
|
||||
|
@ -34,7 +29,7 @@ def load(name, **overrides):
|
|||
model_name = ''
|
||||
meta = util.parse_package_meta(data_path, model_name, require=False)
|
||||
lang = meta['lang'] if meta and 'lang' in meta else name
|
||||
cls = util.get_lang_class(lang)
|
||||
cls = importlib.import_module("."+lang, "spacy")
|
||||
overrides['meta'] = meta
|
||||
overrides['path'] = model_path
|
||||
return cls(**overrides)
|
||||
return cls.EXPORT(**overrides)
|
||||
|
|
|
@ -22,3 +22,5 @@ class Bengali(Language):
|
|||
prefixes = tuple(TOKENIZER_PREFIXES)
|
||||
suffixes = tuple(TOKENIZER_SUFFIXES)
|
||||
infixes = tuple(TOKENIZER_INFIXES)
|
||||
|
||||
EXPORT = Bengali
|
|
@ -20,3 +20,6 @@ class German(Language):
|
|||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = German
|
|
@ -32,3 +32,6 @@ class English(Language):
|
|||
# Special-case hack for loading the GloVe vectors, to support <1.0
|
||||
overrides = fix_glove_vectors_loading(overrides)
|
||||
Language.__init__(self, **overrides)
|
||||
|
||||
|
||||
EXPORT = English
|
|
@ -19,3 +19,6 @@ class Spanish(Language):
|
|||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Spanish
|
|
@ -15,3 +15,6 @@ class Finnish(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Finnish
|
|
@ -31,4 +31,7 @@ class FrenchDefaults(BaseDefaults):
|
|||
class French(Language):
|
||||
lang = 'fr'
|
||||
|
||||
Defaults = FrenchDefaults
|
||||
Defaults = FrenchDefaults
|
||||
|
||||
|
||||
EXPORT = French
|
|
@ -16,3 +16,6 @@ class Hebrew(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Hebrew
|
|
@ -24,3 +24,6 @@ class Hungarian(Language):
|
|||
stop_words = set(STOP_WORDS)
|
||||
|
||||
token_match = TOKEN_MATCH
|
||||
|
||||
|
||||
EXPORT = Hungarian
|
|
@ -16,3 +16,6 @@ class Italian(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Italian
|
|
@ -22,5 +22,6 @@ TAG_MAP = {
|
|||
"CCONJ": {POS: CCONJ}, # U20
|
||||
"ADJ": {POS: ADJ},
|
||||
"VERB": {POS: VERB},
|
||||
"PART": {POS: PART}
|
||||
"PART": {POS: PART},
|
||||
"SP": {POS: SPACE}
|
||||
}
|
||||
|
|
|
@ -23,3 +23,6 @@ class Norwegian(Language):
|
|||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
#tag_map = TAG_MAP
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Norwegian
|
|
@ -15,3 +15,6 @@ class Dutch(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Dutch
|
|
@ -16,3 +16,6 @@ class Portuguese(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Portuguese
|
|
@ -15,3 +15,6 @@ class Swedish(Language):
|
|||
|
||||
tokenizer_exceptions = TOKENIZER_EXCEPTIONS
|
||||
stop_words = STOP_WORDS
|
||||
|
||||
|
||||
EXPORT = Swedish
|
|
@ -9,3 +9,6 @@ class Chinese(Language):
|
|||
import jieba
|
||||
words = list(jieba.cut(text, cut_all=True))
|
||||
return Doc(self.vocab, words=words, spaces=[False]*len(words))
|
||||
|
||||
|
||||
EXPORT = Chinese
|
Loading…
Reference in New Issue
Block a user