mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Lazy imports language
This commit is contained in:
		
							parent
							
								
									c0afcd22bb
								
							
						
					
					
						commit
						f2ab7d77b4
					
				|  | @ -3,18 +3,13 @@ from __future__ import unicode_literals | |||
| 
 | ||||
| from . import util | ||||
| from .deprecated import resolve_model_name | ||||
| import importlib | ||||
| from .cli.info import info | ||||
| 
 | ||||
| from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he, nb | ||||
| 
 | ||||
| 
 | ||||
| _languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French, | ||||
|              it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish, | ||||
|              fi.Finnish, bn.Bengali, he.Hebrew, nb.Norwegian) | ||||
| 
 | ||||
| 
 | ||||
| for _lang in _languages: | ||||
|     util.set_lang_class(_lang.lang, _lang) | ||||
| _languages_name = set(["en", "de", "es", "pt", "fr", | ||||
|              "it", "hu", "zh", "nl", "sv", | ||||
|              "fi", "bn", "he", "nb"]) | ||||
| 
 | ||||
| 
 | ||||
| def load(name, **overrides): | ||||
|  | @ -34,7 +29,7 @@ def load(name, **overrides): | |||
|         model_name = '' | ||||
|     meta = util.parse_package_meta(data_path, model_name, require=False) | ||||
|     lang = meta['lang'] if meta and 'lang' in meta else name | ||||
|     cls = util.get_lang_class(lang) | ||||
|     cls = importlib.import_module("."+lang, "spacy") | ||||
|     overrides['meta'] = meta | ||||
|     overrides['path'] = model_path | ||||
|     return cls(**overrides) | ||||
|     return cls.EXPORT(**overrides) | ||||
|  |  | |||
|  | @ -22,3 +22,5 @@ class Bengali(Language): | |||
|         prefixes = tuple(TOKENIZER_PREFIXES) | ||||
|         suffixes = tuple(TOKENIZER_SUFFIXES) | ||||
|         infixes = tuple(TOKENIZER_INFIXES) | ||||
| 
 | ||||
| EXPORT = Bengali | ||||
|  | @ -20,3 +20,6 @@ class German(Language): | |||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         tag_map = TAG_MAP | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = German | ||||
|  | @ -32,3 +32,6 @@ class English(Language): | |||
|         # Special-case hack for loading the GloVe vectors, to support <1.0 | ||||
|         overrides = fix_glove_vectors_loading(overrides) | ||||
|         Language.__init__(self, **overrides) | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = English | ||||
|  | @ -19,3 +19,6 @@ class Spanish(Language): | |||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         tag_map = TAG_MAP | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Spanish | ||||
|  | @ -15,3 +15,6 @@ class Finnish(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Finnish | ||||
|  | @ -32,3 +32,6 @@ class French(Language): | |||
|     lang = 'fr' | ||||
| 
 | ||||
|     Defaults = FrenchDefaults | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = French | ||||
|  | @ -16,3 +16,6 @@ class Hebrew(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Hebrew | ||||
|  | @ -24,3 +24,6 @@ class Hungarian(Language): | |||
|         stop_words = set(STOP_WORDS) | ||||
| 
 | ||||
|         token_match = TOKEN_MATCH | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Hungarian | ||||
|  | @ -16,3 +16,6 @@ class Italian(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Italian | ||||
|  | @ -22,5 +22,6 @@ TAG_MAP = { | |||
|     "CCONJ":    {POS: CCONJ}, # U20 | ||||
|     "ADJ":      {POS: ADJ}, | ||||
|     "VERB":     {POS: VERB}, | ||||
|     "PART":     {POS: PART} | ||||
|     "PART":     {POS: PART}, | ||||
| 	"SP":     	{POS: SPACE} | ||||
| } | ||||
|  |  | |||
|  | @ -23,3 +23,6 @@ class Norwegian(Language): | |||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         #tag_map = TAG_MAP | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Norwegian | ||||
|  | @ -15,3 +15,6 @@ class Dutch(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Dutch | ||||
|  | @ -16,3 +16,6 @@ class Portuguese(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Portuguese | ||||
|  | @ -15,3 +15,6 @@ class Swedish(Language): | |||
| 
 | ||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         stop_words = STOP_WORDS | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Swedish | ||||
|  | @ -9,3 +9,6 @@ class Chinese(Language): | |||
|         import jieba | ||||
|         words = list(jieba.cut(text, cut_all=True)) | ||||
|         return Doc(self.vocab, words=words, spaces=[False]*len(words)) | ||||
| 
 | ||||
| 
 | ||||
| EXPORT = Chinese | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user