mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Make morphology use int attributes internally
The morphology class was calling the lemmatizer inconsistently, which some string-valued attributes. This caused Issue #903.
This commit is contained in:
parent
4454c1b23f
commit
850d35dcb3
|
@ -32,12 +32,11 @@ def _normalize_props(props):
|
|||
return out
|
||||
|
||||
|
||||
|
||||
cdef class Morphology:
|
||||
def __init__(self, StringStore string_store, tag_map, lemmatizer):
|
||||
self.mem = Pool()
|
||||
self.strings = string_store
|
||||
self.tag_map = tag_map
|
||||
self.tag_map = {}
|
||||
self.lemmatizer = lemmatizer
|
||||
self.n_tags = len(tag_map) + 1
|
||||
self.tag_names = tuple(sorted(tag_map.keys()))
|
||||
|
@ -52,6 +51,7 @@ cdef class Morphology:
|
|||
self.rich_tags[i].morph = 0
|
||||
self.rich_tags[i].pos = attrs[POS]
|
||||
self.reverse_index[self.rich_tags[i].name] = i
|
||||
self.tag_map[tag_str] = attrs
|
||||
self._cache = PreshMapArray(self.n_tags)
|
||||
|
||||
def __reduce__(self):
|
||||
|
@ -74,10 +74,10 @@ cdef class Morphology:
|
|||
# Related to Issue #220
|
||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||
tag_id = self.reverse_index[self.strings['SP']]
|
||||
rich_tag = self.rich_tags[tag_id]
|
||||
analysis = <MorphAnalysisC*>self._cache.get(tag_id, token.lex.orth)
|
||||
if analysis is NULL:
|
||||
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
||||
analysis.tag = self.rich_tags[tag_id]
|
||||
tag_str = self.strings[self.rich_tags[tag_id].name]
|
||||
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
||||
self.tag_map.get(tag_str, {}))
|
||||
|
@ -126,8 +126,7 @@ cdef class Morphology:
|
|||
else:
|
||||
self.assign_feature(&cached.tag.morph, name_id, value_id)
|
||||
if cached.lemma == 0:
|
||||
cached.lemma = self.lemmatize(rich_tag.pos, orth,
|
||||
self.tag_map.get(tag_str, {}))
|
||||
cached.lemma = self.lemmatize(rich_tag.pos, orth, attrs)
|
||||
self._cache.set(tag_id, orth, <void*>cached)
|
||||
|
||||
def load_morph_exceptions(self, dict exc):
|
||||
|
|
Loading…
Reference in New Issue
Block a user