diff --git a/spacy/__init__.py b/spacy/__init__.py index f71d3addd..24ac28dfc 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -3,18 +3,13 @@ from __future__ import unicode_literals from . import util from .deprecated import resolve_model_name +import importlib from .cli.info import info -from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he, nb - -_languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French, - it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish, - fi.Finnish, bn.Bengali, he.Hebrew, nb.Norwegian) - - -for _lang in _languages: - util.set_lang_class(_lang.lang, _lang) +_languages_name = set(["en", "de", "es", "pt", "fr", + "it", "hu", "zh", "nl", "sv", + "fi", "bn", "he", "nb"]) def load(name, **overrides): @@ -34,7 +29,7 @@ def load(name, **overrides): model_name = '' meta = util.parse_package_meta(data_path, model_name, require=False) lang = meta['lang'] if meta and 'lang' in meta else name - cls = util.get_lang_class(lang) + cls = importlib.import_module("."+lang, "spacy") overrides['meta'] = meta overrides['path'] = model_path - return cls(**overrides) + return cls.EXPORT(**overrides) diff --git a/spacy/bn/__init__.py b/spacy/bn/__init__.py index d9fcb3dd2..b1335a110 100644 --- a/spacy/bn/__init__.py +++ b/spacy/bn/__init__.py @@ -22,3 +22,5 @@ class Bengali(Language): prefixes = tuple(TOKENIZER_PREFIXES) suffixes = tuple(TOKENIZER_SUFFIXES) infixes = tuple(TOKENIZER_INFIXES) + +EXPORT = Bengali \ No newline at end of file diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index 37a57d676..9cec1520d 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -20,3 +20,6 @@ class German(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS tag_map = TAG_MAP stop_words = STOP_WORDS + + +EXPORT = German \ No newline at end of file diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index 5d808d4b3..3298a6822 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -32,3 +32,6 @@ class English(Language): # Special-case hack for loading the GloVe vectors, to support <1.0 overrides = fix_glove_vectors_loading(overrides) Language.__init__(self, **overrides) + + +EXPORT = English \ No newline at end of file diff --git a/spacy/es/__init__.py b/spacy/es/__init__.py index 97cca637a..17ab34d83 100644 --- a/spacy/es/__init__.py +++ b/spacy/es/__init__.py @@ -19,3 +19,6 @@ class Spanish(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS tag_map = TAG_MAP stop_words = STOP_WORDS + + +EXPORT = Spanish \ No newline at end of file diff --git a/spacy/fi/__init__.py b/spacy/fi/__init__.py index fd1f712ac..ab49b5db1 100644 --- a/spacy/fi/__init__.py +++ b/spacy/fi/__init__.py @@ -15,3 +15,6 @@ class Finnish(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Finnish \ No newline at end of file diff --git a/spacy/fr/__init__.py b/spacy/fr/__init__.py index 9dae42d64..d15bcdc4a 100644 --- a/spacy/fr/__init__.py +++ b/spacy/fr/__init__.py @@ -31,4 +31,7 @@ class FrenchDefaults(BaseDefaults): class French(Language): lang = 'fr' - Defaults = FrenchDefaults \ No newline at end of file + Defaults = FrenchDefaults + + +EXPORT = French \ No newline at end of file diff --git a/spacy/he/__init__.py b/spacy/he/__init__.py index a3e86ed73..9426247bd 100644 --- a/spacy/he/__init__.py +++ b/spacy/he/__init__.py @@ -16,3 +16,6 @@ class Hebrew(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Hebrew \ No newline at end of file diff --git a/spacy/hu/__init__.py b/spacy/hu/__init__.py index e5f8d0a45..77fff7743 100644 --- a/spacy/hu/__init__.py +++ b/spacy/hu/__init__.py @@ -24,3 +24,6 @@ class Hungarian(Language): stop_words = set(STOP_WORDS) token_match = TOKEN_MATCH + + +EXPORT = Hungarian \ No newline at end of file diff --git a/spacy/it/__init__.py b/spacy/it/__init__.py index 5ebefb6b2..a797ba9a1 100644 --- a/spacy/it/__init__.py +++ b/spacy/it/__init__.py @@ -16,3 +16,6 @@ class Italian(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Italian \ No newline at end of file diff --git a/spacy/language_data/tag_map.py b/spacy/language_data/tag_map.py index ead6dd1c6..0ca7f3910 100644 --- a/spacy/language_data/tag_map.py +++ b/spacy/language_data/tag_map.py @@ -22,5 +22,6 @@ TAG_MAP = { "CCONJ": {POS: CCONJ}, # U20 "ADJ": {POS: ADJ}, "VERB": {POS: VERB}, - "PART": {POS: PART} + "PART": {POS: PART}, + "SP": {POS: SPACE} } diff --git a/spacy/nb/__init__.py b/spacy/nb/__init__.py index 7d4b471a2..fd0cf9db0 100644 --- a/spacy/nb/__init__.py +++ b/spacy/nb/__init__.py @@ -23,3 +23,6 @@ class Norwegian(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS #tag_map = TAG_MAP stop_words = STOP_WORDS + + +EXPORT = Norwegian \ No newline at end of file diff --git a/spacy/nl/__init__.py b/spacy/nl/__init__.py index 6e1bbea70..9c9da6bcd 100644 --- a/spacy/nl/__init__.py +++ b/spacy/nl/__init__.py @@ -15,3 +15,6 @@ class Dutch(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Dutch \ No newline at end of file diff --git a/spacy/pt/__init__.py b/spacy/pt/__init__.py index b4ee5e339..504a54029 100644 --- a/spacy/pt/__init__.py +++ b/spacy/pt/__init__.py @@ -16,3 +16,6 @@ class Portuguese(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Portuguese \ No newline at end of file diff --git a/spacy/sv/__init__.py b/spacy/sv/__init__.py index d7e9db4f1..bd37c06b2 100644 --- a/spacy/sv/__init__.py +++ b/spacy/sv/__init__.py @@ -15,3 +15,6 @@ class Swedish(Language): tokenizer_exceptions = TOKENIZER_EXCEPTIONS stop_words = STOP_WORDS + + +EXPORT = Swedish \ No newline at end of file diff --git a/spacy/zh/__init__.py b/spacy/zh/__init__.py index 1847a7d8d..633459ae6 100644 --- a/spacy/zh/__init__.py +++ b/spacy/zh/__init__.py @@ -9,3 +9,6 @@ class Chinese(Language): import jieba words = list(jieba.cut(text, cut_all=True)) return Doc(self.vocab, words=words, spaces=[False]*len(words)) + + +EXPORT = Chinese \ No newline at end of file