mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Restore previous morphology stuff
This commit is contained in:
parent
3bba8e9245
commit
a3d2e616d5
|
@ -1,5 +1,5 @@
|
|||
from cymem.cymem cimport Pool
|
||||
from preshed.maps cimport PreshMap
|
||||
from preshed.maps cimport PreshMap, PreshMapArray
|
||||
from libc.stdint cimport uint64_t
|
||||
from murmurhash cimport mrmr
|
||||
|
||||
|
@ -17,14 +17,17 @@ cdef class Morphology:
|
|||
|
||||
cdef public object lemmatizer
|
||||
cdef readonly object tag_map
|
||||
cdef readonly object tag_names
|
||||
cdef readonly object reverse_index
|
||||
cdef readonly object exc
|
||||
cdef readonly int n_tags
|
||||
|
||||
cdef hash_t insert(self, RichTagC tag) except 0
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||
cdef update_token_morph(self, TokenC* token, features)
|
||||
cdef set_token_morph(self, TokenC* token, pos, features)
|
||||
cdef update_morph(self, hash_t morph, features)
|
||||
|
||||
cdef enum univ_morph_t:
|
||||
NIL = 0
|
||||
|
|
|
@ -125,17 +125,17 @@ cdef class Morphology:
|
|||
# figure out why the statistical model fails. Related to Issue #220
|
||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||
tag_id = self.reverse_index[self.strings.add('_SP')]
|
||||
tag_str = self.tag_names[tag_id]
|
||||
features = dict(self.tag_map.get(tag_str, {}))
|
||||
lemma = <attr_t>self._cache.get(tag_id, token.lex.orth)
|
||||
if lemma == 0:
|
||||
tag_str = self.tag_names[tag_id]
|
||||
features = dict(self.tag_map.get(tag_str, {}))
|
||||
if lemma == 0 and features:
|
||||
pos = self.strings.as_int(features.pop('POS'))
|
||||
lemma = self.lemmatize(pos, token.lex.orth, features)
|
||||
self._cache.set(tag_id, token.lex.orth, lemma)
|
||||
token.lemma = lemma
|
||||
token.pos = pos
|
||||
token.tag = self.strings[tag_str]
|
||||
token.morph = self.add(attrs)
|
||||
token.morph = self.add(features)
|
||||
|
||||
cdef update_morph(self, hash_t morph, features):
|
||||
"""Update a morphological analysis with new feature values."""
|
||||
|
@ -175,10 +175,9 @@ cpdef intify_features(StringStore strings, features):
|
|||
cdef hash_t hash_tag(RichTagC tag) nogil:
|
||||
return mrmr.hash64(&tag, sizeof(tag), 0)
|
||||
|
||||
cdef RichTagC create_rich_tag(pos_, features):
|
||||
cdef RichTagC create_rich_tag(features):
|
||||
cdef RichTagC tag
|
||||
cdef univ_morph_t feature
|
||||
tag.pos = get_int_tag(pos_)
|
||||
for feature in features:
|
||||
set_feature(&tag, feature, 1)
|
||||
return tag
|
||||
|
|
Loading…
Reference in New Issue
Block a user