mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Refactor morphology.pyx
This commit is contained in:
		
							parent
							
								
									4c6ce7ee84
								
							
						
					
					
						commit
						4e30195c6d
					
				| 
						 | 
					@ -1,36 +1,9 @@
 | 
				
			||||||
 | 
					 | 
				
			||||||
from .tokens cimport TokenC
 | 
					 | 
				
			||||||
from .lexeme cimport Lexeme
 | 
					 | 
				
			||||||
from .utf8string cimport StringStore
 | 
					 | 
				
			||||||
from .typedefs cimport id_t, Morphology
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from preshed.maps cimport PreshMapArray
 | 
					 | 
				
			||||||
from cymem.cymem cimport Pool
 | 
					from cymem.cymem cimport Pool
 | 
				
			||||||
 | 
					from preshed.maps cimport PreshMapArray
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .structs cimport TokenC, Lexeme, Morphology, PosTag
 | 
				
			||||||
# Google universal tag set
 | 
					from .strings cimport StringStore
 | 
				
			||||||
cpdef enum univ_tag_t:
 | 
					from .typedefs cimport id_t, univ_tag_t
 | 
				
			||||||
    NO_TAG
 | 
					 | 
				
			||||||
    ADJ
 | 
					 | 
				
			||||||
    ADV
 | 
					 | 
				
			||||||
    ADP
 | 
					 | 
				
			||||||
    CONJ
 | 
					 | 
				
			||||||
    DET
 | 
					 | 
				
			||||||
    NOUN
 | 
					 | 
				
			||||||
    NUM
 | 
					 | 
				
			||||||
    PRON
 | 
					 | 
				
			||||||
    PRT
 | 
					 | 
				
			||||||
    VERB
 | 
					 | 
				
			||||||
    X
 | 
					 | 
				
			||||||
    PUNCT
 | 
					 | 
				
			||||||
    EOL
 | 
					 | 
				
			||||||
    N_UNIV_TAGS
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef struct PosTag:
 | 
					 | 
				
			||||||
    Morphology morph
 | 
					 | 
				
			||||||
    int id
 | 
					 | 
				
			||||||
    univ_tag_t pos
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef class Morphologizer:
 | 
					cdef class Morphologizer:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,7 +4,9 @@ from os import path
 | 
				
			||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .lemmatizer import Lemmatizer
 | 
					from .lemmatizer import Lemmatizer
 | 
				
			||||||
from .typedefs cimport id_t
 | 
					from .typedefs cimport id_t, univ_tag_t
 | 
				
			||||||
 | 
					from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT
 | 
				
			||||||
 | 
					from .typedefs cimport VERB, X, PUNCT, EOL
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -34,13 +36,12 @@ cdef struct _Cached:
 | 
				
			||||||
cdef class Morphologizer:
 | 
					cdef class Morphologizer:
 | 
				
			||||||
    """Given a POS tag and a Lexeme, find its lemma and morphological analysis.
 | 
					    """Given a POS tag and a Lexeme, find its lemma and morphological analysis.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    def __init__(self, StringStore strings, data_dir):
 | 
					    def __init__(self, StringStore strings, object lemmatizer, **kwargs):
 | 
				
			||||||
        self.mem = Pool()
 | 
					        self.mem = Pool()
 | 
				
			||||||
        self.strings = strings
 | 
					        self.strings = strings
 | 
				
			||||||
        cfg = json.load(open(path.join(data_dir, 'config.json')))
 | 
					        tag_map = kwargs['tag_map']
 | 
				
			||||||
        tag_map = cfg['tag_map']
 | 
					        self.tag_names = kwargs['tag_names']
 | 
				
			||||||
        self.tag_names = cfg['tag_names']
 | 
					        self.lemmatizer = lemmatizer
 | 
				
			||||||
        self.lemmatizer = Lemmatizer(path.join(util.DATA_DIR, 'wordnet'))
 | 
					 | 
				
			||||||
        self._cache = PreshMapArray(len(self.tag_names))
 | 
					        self._cache = PreshMapArray(len(self.tag_names))
 | 
				
			||||||
        self.tags = <PosTag*>self.mem.alloc(len(self.tag_names), sizeof(PosTag))
 | 
					        self.tags = <PosTag*>self.mem.alloc(len(self.tag_names), sizeof(PosTag))
 | 
				
			||||||
        for i, tag in enumerate(self.tag_names):
 | 
					        for i, tag in enumerate(self.tag_names):
 | 
				
			||||||
| 
						 | 
					@ -54,9 +55,9 @@ cdef class Morphologizer:
 | 
				
			||||||
            self.tags[i].morph.person = props.get('person', 0)
 | 
					            self.tags[i].morph.person = props.get('person', 0)
 | 
				
			||||||
            self.tags[i].morph.case = props.get('case', 0)
 | 
					            self.tags[i].morph.case = props.get('case', 0)
 | 
				
			||||||
            self.tags[i].morph.misc = props.get('misc', 0)
 | 
					            self.tags[i].morph.misc = props.get('misc', 0)
 | 
				
			||||||
        if path.exists(path.join(data_dir, 'morphs.json')):
 | 
					        #if path.exists(path.join(data_dir, 'morphs.json')):
 | 
				
			||||||
            with open(path.join(data_dir, 'morphs.json')) as file_:
 | 
					        #    with open(path.join(data_dir, 'morphs.json')) as file_:
 | 
				
			||||||
                self.load_exceptions(json.load(file_))
 | 
					        #        self.load_exceptions(json.load(file_))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    cdef int lemmatize(self, const univ_tag_t pos, const Lexeme* lex) except -1:
 | 
					    cdef int lemmatize(self, const univ_tag_t pos, const Lexeme* lex) except -1:
 | 
				
			||||||
        if self.lemmatizer is None:
 | 
					        if self.lemmatizer is None:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user