mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 23:17:59 +03:00 
			
		
		
		
	updated Russian tokenizer
moved the trying to import pymorph into __init__
This commit is contained in:
		
							parent
							
								
									3aad66cf00
								
							
						
					
					
						commit
						7401152289
					
				|  | @ -8,6 +8,9 @@ from .language_data import * | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class RussianTokenizer(object): | class RussianTokenizer(object): | ||||||
|  |     _morph = None | ||||||
|  | 
 | ||||||
|  |     def __init__(self, spacy_tokenizer, cls, nlp=None): | ||||||
|         try: |         try: | ||||||
|             from pymorphy2 import MorphAnalyzer |             from pymorphy2 import MorphAnalyzer | ||||||
|         except ImportError: |         except ImportError: | ||||||
|  | @ -16,9 +19,8 @@ class RussianTokenizer(object): | ||||||
|                 "try to fix it with " |                 "try to fix it with " | ||||||
|                 "pip install pymorphy2==0.8") |                 "pip install pymorphy2==0.8") | ||||||
| 
 | 
 | ||||||
|     _morph = MorphAnalyzer() |         RussianTokenizer._morph = RussianTokenizer._create_morph(MorphAnalyzer) | ||||||
| 
 | 
 | ||||||
|     def __init__(self, spacy_tokenizer, cls, nlp=None): |  | ||||||
|         self.vocab = nlp.vocab if nlp else cls.create_vocab(nlp) |         self.vocab = nlp.vocab if nlp else cls.create_vocab(nlp) | ||||||
|         self._spacy_tokenizer = spacy_tokenizer |         self._spacy_tokenizer = spacy_tokenizer | ||||||
| 
 | 
 | ||||||
|  | @ -36,6 +38,12 @@ class RussianTokenizer(object): | ||||||
|     def _normalize(cls, word): |     def _normalize(cls, word): | ||||||
|         return cls._morph.parse(word)[0].normal_form |         return cls._morph.parse(word)[0].normal_form | ||||||
| 
 | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def _create_morph(cls, morph_analyzer_class): | ||||||
|  |         if not cls._morph: | ||||||
|  |             cls._morph = morph_analyzer_class() | ||||||
|  |         return cls._morph | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class RussianDefaults(Language.Defaults): | class RussianDefaults(Language.Defaults): | ||||||
|     lex_attr_getters = dict(Language.Defaults.lex_attr_getters) |     lex_attr_getters = dict(Language.Defaults.lex_attr_getters) | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user