mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
Fix errors in previous commit
This commit is contained in:
parent
4f400fa486
commit
c748907a66
|
@ -47,13 +47,20 @@ class Lemmatizer(object):
|
||||||
morphology = {} if morphology is None else morphology
|
morphology = {} if morphology is None else morphology
|
||||||
others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')]
|
others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')]
|
||||||
true_morph_key = morphology.get('morph', 0)
|
true_morph_key = morphology.get('morph', 0)
|
||||||
if univ_pos == 'noun' and morphology.get('Number') == 'sing' and not others:
|
print(univ_pos, morphology)
|
||||||
|
if univ_pos == 'noun' and morphology.get('Number') == 'sing':
|
||||||
return True
|
return True
|
||||||
elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf' and not others:
|
elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf':
|
||||||
return True
|
return True
|
||||||
elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
|
elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
|
||||||
return True
|
return True
|
||||||
elif true_morph_key in (VerbForm_inf, VerbForm_none, Number_sing, Degree_pos):
|
elif VerbForm_inf in morphology:
|
||||||
|
return True
|
||||||
|
elif VerbForm_none in morphology:
|
||||||
|
return True
|
||||||
|
elif Number_sing in morphology:
|
||||||
|
return True
|
||||||
|
elif Degree_pos in morphology:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -25,6 +25,8 @@ def _normalize_props(props):
|
||||||
if value in POS_IDS:
|
if value in POS_IDS:
|
||||||
value = POS_IDS[value]
|
value = POS_IDS[value]
|
||||||
out[key] = value
|
out[key] = value
|
||||||
|
elif isinstance(key, int):
|
||||||
|
out[key] = value
|
||||||
elif key.lower() == 'pos':
|
elif key.lower() == 'pos':
|
||||||
out[POS] = POS_IDS[value.upper()]
|
out[POS] = POS_IDS[value.upper()]
|
||||||
else:
|
else:
|
||||||
|
@ -45,13 +47,13 @@ cdef class Morphology:
|
||||||
self.rich_tags = <RichTagC*>self.mem.alloc(self.n_tags, sizeof(RichTagC))
|
self.rich_tags = <RichTagC*>self.mem.alloc(self.n_tags, sizeof(RichTagC))
|
||||||
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
|
||||||
attrs = _normalize_props(attrs)
|
attrs = _normalize_props(attrs)
|
||||||
|
self.tag_map[tag_str] = dict(attrs)
|
||||||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||||
self.rich_tags[i].id = i
|
self.rich_tags[i].id = i
|
||||||
self.rich_tags[i].name = self.strings[tag_str]
|
self.rich_tags[i].name = self.strings[tag_str]
|
||||||
self.rich_tags[i].morph = 0
|
self.rich_tags[i].morph = 0
|
||||||
self.rich_tags[i].pos = attrs[POS]
|
self.rich_tags[i].pos = attrs[POS]
|
||||||
self.reverse_index[self.rich_tags[i].name] = i
|
self.reverse_index[self.rich_tags[i].name] = i
|
||||||
self.tag_map[tag_str] = attrs
|
|
||||||
self._cache = PreshMapArray(self.n_tags)
|
self._cache = PreshMapArray(self.n_tags)
|
||||||
|
|
||||||
def __reduce__(self):
|
def __reduce__(self):
|
||||||
|
@ -79,6 +81,7 @@ cdef class Morphology:
|
||||||
if analysis is NULL:
|
if analysis is NULL:
|
||||||
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
||||||
tag_str = self.strings[self.rich_tags[tag_id].name]
|
tag_str = self.strings[self.rich_tags[tag_id].name]
|
||||||
|
analysis.tag = rich_tag
|
||||||
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
|
||||||
self.tag_map.get(tag_str, {}))
|
self.tag_map.get(tag_str, {}))
|
||||||
self._cache.set(tag_id, token.lex.orth, analysis)
|
self._cache.set(tag_id, token.lex.orth, analysis)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user