mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-06 21:03:07 +03:00
Make morphology use int attributes internally
The morphology class was calling the lemmatizer inconsistently, which some string-valued attributes. This caused Issue #903.
This commit is contained in:
parent
4454c1b23f
commit
850d35dcb3
|
@ -32,12 +32,11 @@ def _normalize_props(props):
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef class Morphology:
|
cdef class Morphology:
|
||||||
def __init__(self, StringStore string_store, tag_map, lemmatizer):
|
def __init__(self, StringStore string_store, tag_map, lemmatizer):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self.strings = string_store
|
self.strings = string_store
|
||||||
self.tag_map = tag_map
|
self.tag_map = {}
|
||||||
self.lemmatizer = lemmatizer
|
self.lemmatizer = lemmatizer
|
||||||
self.n_tags = len(tag_map) + 1
|
self.n_tags = len(tag_map) + 1
|
||||||
self.tag_names = tuple(sorted(tag_map.keys()))
|
self.tag_names = tuple(sorted(tag_map.keys()))
|
||||||
|
@ -52,6 +51,7 @@ cdef class Morphology:
|
||||||
self.rich_tags[i].morph = 0
|
self.rich_tags[i].morph = 0
|
||||||
self.rich_tags[i].pos = attrs[POS]
|
self.rich_tags[i].pos = attrs[POS]
|
||||||
self.reverse_index[self.rich_tags[i].name] = i
|
self.reverse_index[self.rich_tags[i].name] = i
|
||||||
|
self.tag_map[tag_str] = attrs
|
||||||
self._cache = PreshMapArray(self.n_tags)
|
self._cache = PreshMapArray(self.n_tags)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
|
@ -74,10 +74,10 @@ cdef class Morphology:
|
||||||
# Related to Issue #220
|
# Related to Issue #220
|
||||||
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
if Lexeme.c_check_flag(token.lex, IS_SPACE):
|
||||||
tag_id = self.reverse_index[self.strings['SP']]
|
tag_id = self.reverse_index[self.strings['SP']]
|
||||||
|
rich_tag = self.rich_tags[tag_id]
|
||||||
analysis = <MorphAnalysisC*>self._cache.get(tag_id, token.lex.orth)
|
analysis = <MorphAnalysisC*>self._cache.get(tag_id, token.lex.orth)
|
||||||
if analysis is NULL:
|
if analysis is NULL:
|
||||||
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
||||||
analysis.tag = self.rich_tags[tag_id]
|
|
||||||
tag_str = self.strings[self.rich_tags[tag_id].name]
|
tag_str = self.strings[self.rich_tags[tag_id].name]
|
||||||
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
||||||
self.tag_map.get(tag_str, {}))
|
self.tag_map.get(tag_str, {}))
|
||||||
|
@ -126,8 +126,7 @@ cdef class Morphology:
|
||||||
else:
|
else:
|
||||||
self.assign_feature(&cached.tag.morph, name_id, value_id)
|
self.assign_feature(&cached.tag.morph, name_id, value_id)
|
||||||
if cached.lemma == 0:
|
if cached.lemma == 0:
|
||||||
cached.lemma = self.lemmatize(rich_tag.pos, orth,
|
cached.lemma = self.lemmatize(rich_tag.pos, orth, attrs)
|
||||||
self.tag_map.get(tag_str, {}))
|
|
||||||
self._cache.set(tag_id, orth, <void*>cached)
|
self._cache.set(tag_id, orth, <void*>cached)
|
||||||
|
|
||||||
def load_morph_exceptions(self, dict exc):
|
def load_morph_exceptions(self, dict exc):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user