mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Lazy imports language
This commit is contained in:
		
							parent
							
								
									c0afcd22bb
								
							
						
					
					
						commit
						f2ab7d77b4
					
				|  | @ -3,18 +3,13 @@ from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| from . import util | from . import util | ||||||
| from .deprecated import resolve_model_name | from .deprecated import resolve_model_name | ||||||
|  | import importlib | ||||||
| from .cli.info import info | from .cli.info import info | ||||||
| 
 | 
 | ||||||
| from . import en, de, zh, es, it, hu, fr, pt, nl, sv, fi, bn, he, nb |  | ||||||
| 
 | 
 | ||||||
| 
 | _languages_name = set(["en", "de", "es", "pt", "fr", | ||||||
| _languages = (en.English, de.German, es.Spanish, pt.Portuguese, fr.French, |              "it", "hu", "zh", "nl", "sv", | ||||||
|              it.Italian, hu.Hungarian, zh.Chinese, nl.Dutch, sv.Swedish, |              "fi", "bn", "he", "nb"]) | ||||||
|              fi.Finnish, bn.Bengali, he.Hebrew, nb.Norwegian) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| for _lang in _languages: |  | ||||||
|     util.set_lang_class(_lang.lang, _lang) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def load(name, **overrides): | def load(name, **overrides): | ||||||
|  | @ -34,7 +29,7 @@ def load(name, **overrides): | ||||||
|         model_name = '' |         model_name = '' | ||||||
|     meta = util.parse_package_meta(data_path, model_name, require=False) |     meta = util.parse_package_meta(data_path, model_name, require=False) | ||||||
|     lang = meta['lang'] if meta and 'lang' in meta else name |     lang = meta['lang'] if meta and 'lang' in meta else name | ||||||
|     cls = util.get_lang_class(lang) |     cls = importlib.import_module("."+lang, "spacy") | ||||||
|     overrides['meta'] = meta |     overrides['meta'] = meta | ||||||
|     overrides['path'] = model_path |     overrides['path'] = model_path | ||||||
|     return cls(**overrides) |     return cls.EXPORT(**overrides) | ||||||
|  |  | ||||||
|  | @ -22,3 +22,5 @@ class Bengali(Language): | ||||||
|         prefixes = tuple(TOKENIZER_PREFIXES) |         prefixes = tuple(TOKENIZER_PREFIXES) | ||||||
|         suffixes = tuple(TOKENIZER_SUFFIXES) |         suffixes = tuple(TOKENIZER_SUFFIXES) | ||||||
|         infixes = tuple(TOKENIZER_INFIXES) |         infixes = tuple(TOKENIZER_INFIXES) | ||||||
|  | 
 | ||||||
|  | EXPORT = Bengali | ||||||
|  | @ -20,3 +20,6 @@ class German(Language): | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         tag_map = TAG_MAP |         tag_map = TAG_MAP | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = German | ||||||
|  | @ -32,3 +32,6 @@ class English(Language): | ||||||
|         # Special-case hack for loading the GloVe vectors, to support <1.0 |         # Special-case hack for loading the GloVe vectors, to support <1.0 | ||||||
|         overrides = fix_glove_vectors_loading(overrides) |         overrides = fix_glove_vectors_loading(overrides) | ||||||
|         Language.__init__(self, **overrides) |         Language.__init__(self, **overrides) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = English | ||||||
|  | @ -19,3 +19,6 @@ class Spanish(Language): | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         tag_map = TAG_MAP |         tag_map = TAG_MAP | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Spanish | ||||||
|  | @ -15,3 +15,6 @@ class Finnish(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Finnish | ||||||
|  | @ -32,3 +32,6 @@ class French(Language): | ||||||
|     lang = 'fr' |     lang = 'fr' | ||||||
| 
 | 
 | ||||||
|     Defaults = FrenchDefaults |     Defaults = FrenchDefaults | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = French | ||||||
|  | @ -16,3 +16,6 @@ class Hebrew(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Hebrew | ||||||
|  | @ -24,3 +24,6 @@ class Hungarian(Language): | ||||||
|         stop_words = set(STOP_WORDS) |         stop_words = set(STOP_WORDS) | ||||||
| 
 | 
 | ||||||
|         token_match = TOKEN_MATCH |         token_match = TOKEN_MATCH | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Hungarian | ||||||
|  | @ -16,3 +16,6 @@ class Italian(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Italian | ||||||
|  | @ -22,5 +22,6 @@ TAG_MAP = { | ||||||
|     "CCONJ":    {POS: CCONJ}, # U20 |     "CCONJ":    {POS: CCONJ}, # U20 | ||||||
|     "ADJ":      {POS: ADJ}, |     "ADJ":      {POS: ADJ}, | ||||||
|     "VERB":     {POS: VERB}, |     "VERB":     {POS: VERB}, | ||||||
|     "PART":     {POS: PART} |     "PART":     {POS: PART}, | ||||||
|  | 	"SP":     	{POS: SPACE} | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -23,3 +23,6 @@ class Norwegian(Language): | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         #tag_map = TAG_MAP |         #tag_map = TAG_MAP | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Norwegian | ||||||
|  | @ -15,3 +15,6 @@ class Dutch(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Dutch | ||||||
|  | @ -16,3 +16,6 @@ class Portuguese(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Portuguese | ||||||
|  | @ -15,3 +15,6 @@ class Swedish(Language): | ||||||
| 
 | 
 | ||||||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS |         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||||
|         stop_words = STOP_WORDS |         stop_words = STOP_WORDS | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Swedish | ||||||
|  | @ -9,3 +9,6 @@ class Chinese(Language): | ||||||
|         import jieba |         import jieba | ||||||
|         words = list(jieba.cut(text, cut_all=True)) |         words = list(jieba.cut(text, cut_all=True)) | ||||||
|         return Doc(self.vocab, words=words, spaces=[False]*len(words)) |         return Doc(self.vocab, words=words, spaces=[False]*len(words)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | EXPORT = Chinese | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user