mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	* Refactor morphology.pyx
This commit is contained in:
		
							parent
							
								
									4c6ce7ee84
								
							
						
					
					
						commit
						4e30195c6d
					
				|  | @ -1,36 +1,9 @@ | |||
| 
 | ||||
| from .tokens cimport TokenC | ||||
| from .lexeme cimport Lexeme | ||||
| from .utf8string cimport StringStore | ||||
| from .typedefs cimport id_t, Morphology | ||||
| 
 | ||||
| from preshed.maps cimport PreshMapArray | ||||
| from cymem.cymem cimport Pool | ||||
| from preshed.maps cimport PreshMapArray | ||||
| 
 | ||||
| 
 | ||||
| # Google universal tag set | ||||
| cpdef enum univ_tag_t: | ||||
|     NO_TAG | ||||
|     ADJ | ||||
|     ADV | ||||
|     ADP | ||||
|     CONJ | ||||
|     DET | ||||
|     NOUN | ||||
|     NUM | ||||
|     PRON | ||||
|     PRT | ||||
|     VERB | ||||
|     X | ||||
|     PUNCT | ||||
|     EOL | ||||
|     N_UNIV_TAGS | ||||
| 
 | ||||
| 
 | ||||
| cdef struct PosTag: | ||||
|     Morphology morph | ||||
|     int id | ||||
|     univ_tag_t pos | ||||
| from .structs cimport TokenC, Lexeme, Morphology, PosTag | ||||
| from .strings cimport StringStore | ||||
| from .typedefs cimport id_t, univ_tag_t | ||||
| 
 | ||||
| 
 | ||||
| cdef class Morphologizer: | ||||
|  |  | |||
|  | @ -4,7 +4,9 @@ from os import path | |||
| import json | ||||
| 
 | ||||
| from .lemmatizer import Lemmatizer | ||||
| from .typedefs cimport id_t | ||||
| from .typedefs cimport id_t, univ_tag_t | ||||
| from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT | ||||
| from .typedefs cimport VERB, X, PUNCT, EOL | ||||
| from . import util | ||||
| 
 | ||||
| 
 | ||||
|  | @ -34,13 +36,12 @@ cdef struct _Cached: | |||
| cdef class Morphologizer: | ||||
|     """Given a POS tag and a Lexeme, find its lemma and morphological analysis. | ||||
|     """ | ||||
|     def __init__(self, StringStore strings, data_dir): | ||||
|     def __init__(self, StringStore strings, object lemmatizer, **kwargs): | ||||
|         self.mem = Pool() | ||||
|         self.strings = strings | ||||
|         cfg = json.load(open(path.join(data_dir, 'config.json'))) | ||||
|         tag_map = cfg['tag_map'] | ||||
|         self.tag_names = cfg['tag_names'] | ||||
|         self.lemmatizer = Lemmatizer(path.join(util.DATA_DIR, 'wordnet')) | ||||
|         tag_map = kwargs['tag_map'] | ||||
|         self.tag_names = kwargs['tag_names'] | ||||
|         self.lemmatizer = lemmatizer | ||||
|         self._cache = PreshMapArray(len(self.tag_names)) | ||||
|         self.tags = <PosTag*>self.mem.alloc(len(self.tag_names), sizeof(PosTag)) | ||||
|         for i, tag in enumerate(self.tag_names): | ||||
|  | @ -54,9 +55,9 @@ cdef class Morphologizer: | |||
|             self.tags[i].morph.person = props.get('person', 0) | ||||
|             self.tags[i].morph.case = props.get('case', 0) | ||||
|             self.tags[i].morph.misc = props.get('misc', 0) | ||||
|         if path.exists(path.join(data_dir, 'morphs.json')): | ||||
|             with open(path.join(data_dir, 'morphs.json')) as file_: | ||||
|                 self.load_exceptions(json.load(file_)) | ||||
|         #if path.exists(path.join(data_dir, 'morphs.json')): | ||||
|         #    with open(path.join(data_dir, 'morphs.json')) as file_: | ||||
|         #        self.load_exceptions(json.load(file_)) | ||||
| 
 | ||||
|     cdef int lemmatize(self, const univ_tag_t pos, const Lexeme* lex) except -1: | ||||
|         if self.lemmatizer is None: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user