mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Merge remote-tracking branch 'origin/organize-language-data' into organize-language-data
This commit is contained in:
		
						commit
						b11d8cd3db
					
				|  | @ -21,3 +21,4 @@ class English(Language): | |||
|         tokenizer_exceptions = TOKENIZER_EXCEPTIONS | ||||
|         tag_map = TAG_MAP | ||||
|         stop_words = STOP_WORDS | ||||
|         lemma_rules = LEMMA_RULES | ||||
|  |  | |||
|  | @ -39,7 +39,7 @@ class BaseDefaults(object): | |||
|         if nlp is None or nlp.path is None: | ||||
|             return Lemmatizer({}, {}, {}) | ||||
|         else: | ||||
|             return Lemmatizer.load(nlp.path) | ||||
|             return Lemmatizer.load(nlp.path, rules=self.lemma_rules) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def create_vocab(cls, nlp=None): | ||||
|  | @ -160,6 +160,8 @@ class BaseDefaults(object): | |||
| 
 | ||||
|     stop_words = set() | ||||
| 
 | ||||
|     lemma_rules = {} | ||||
| 
 | ||||
|     lex_attr_getters = { | ||||
|         attrs.LOWER: lambda string: string.lower(), | ||||
|         attrs.NORM: lambda string: string, | ||||
|  |  | |||
|  | @ -9,7 +9,7 @@ from .symbols import POS, NOUN, VERB, ADJ, PUNCT | |||
| 
 | ||||
| class Lemmatizer(object): | ||||
|     @classmethod | ||||
|     def load(cls, path): | ||||
|     def load(cls, path, rules=None): | ||||
|         index = {} | ||||
|         exc = {} | ||||
|         for pos in ['adj', 'noun', 'verb']: | ||||
|  | @ -25,8 +25,11 @@ class Lemmatizer(object): | |||
|                     exc[pos] = read_exc(file_) | ||||
|             else: | ||||
|                 exc[pos] = {} | ||||
|         with (path / 'vocab' / 'lemma_rules.json').open('r', encoding='utf8') as file_: | ||||
|             rules = json.load(file_) | ||||
|         if rules is None and (path / 'vocab' / 'lemma_rules.json').exists(): | ||||
|             with (path / 'vocab' / 'lemma_rules.json').open('r', encoding='utf8') as file_: | ||||
|                 rules = json.load(file_) | ||||
|         elif rules is None: | ||||
|             rules = {} | ||||
|         return cls(index, exc, rules) | ||||
| 
 | ||||
|     def __init__(self, index, exceptions, rules): | ||||
|  |  | |||
|  | @ -140,6 +140,7 @@ cdef class Morphology: | |||
|         lemma = self.strings[lemma_string] | ||||
|         return lemma | ||||
| 
 | ||||
| 
 | ||||
| IDS = { | ||||
|     "Animacy_anim": Animacy_anim, | ||||
|     "Animacy_inam": Animacy_inam, | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user