Fix errors in previous commit

This commit is contained in:
Matthew Honnibal 2017-03-25 21:56:41 +01:00
parent 4f400fa486
commit c748907a66
2 changed files with 14 additions and 4 deletions

View File

@ -47,13 +47,20 @@ class Lemmatizer(object):
morphology = {} if morphology is None else morphology morphology = {} if morphology is None else morphology
others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')] others = [key for key in morphology if key not in (POS, 'number', 'pos', 'verbform')]
true_morph_key = morphology.get('morph', 0) true_morph_key = morphology.get('morph', 0)
if univ_pos == 'noun' and morphology.get('Number') == 'sing' and not others: print(univ_pos, morphology)
if univ_pos == 'noun' and morphology.get('Number') == 'sing':
return True return True
elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf' and not others: elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf':
return True return True
elif univ_pos == 'adj' and morphology.get('Degree') == 'pos': elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
return True return True
elif true_morph_key in (VerbForm_inf, VerbForm_none, Number_sing, Degree_pos): elif VerbForm_inf in morphology:
return True
elif VerbForm_none in morphology:
return True
elif Number_sing in morphology:
return True
elif Degree_pos in morphology:
return True return True
else: else:
return False return False

View File

@ -25,6 +25,8 @@ def _normalize_props(props):
if value in POS_IDS: if value in POS_IDS:
value = POS_IDS[value] value = POS_IDS[value]
out[key] = value out[key] = value
elif isinstance(key, int):
out[key] = value
elif key.lower() == 'pos': elif key.lower() == 'pos':
out[POS] = POS_IDS[value.upper()] out[POS] = POS_IDS[value.upper()]
else: else:
@ -45,13 +47,13 @@ cdef class Morphology:
self.rich_tags = <RichTagC*>self.mem.alloc(self.n_tags, sizeof(RichTagC)) self.rich_tags = <RichTagC*>self.mem.alloc(self.n_tags, sizeof(RichTagC))
for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())): for i, (tag_str, attrs) in enumerate(sorted(tag_map.items())):
attrs = _normalize_props(attrs) attrs = _normalize_props(attrs)
self.tag_map[tag_str] = dict(attrs)
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True) attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
self.rich_tags[i].id = i self.rich_tags[i].id = i
self.rich_tags[i].name = self.strings[tag_str] self.rich_tags[i].name = self.strings[tag_str]
self.rich_tags[i].morph = 0 self.rich_tags[i].morph = 0
self.rich_tags[i].pos = attrs[POS] self.rich_tags[i].pos = attrs[POS]
self.reverse_index[self.rich_tags[i].name] = i self.reverse_index[self.rich_tags[i].name] = i
self.tag_map[tag_str] = attrs
self._cache = PreshMapArray(self.n_tags) self._cache = PreshMapArray(self.n_tags)
def __reduce__(self): def __reduce__(self):
@ -79,6 +81,7 @@ cdef class Morphology:
if analysis is NULL: if analysis is NULL:
analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC)) analysis = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
tag_str = self.strings[self.rich_tags[tag_id].name] tag_str = self.strings[self.rich_tags[tag_id].name]
analysis.tag = rich_tag
analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth, analysis.lemma = self.lemmatize(analysis.tag.pos, token.lex.orth,
self.tag_map.get(tag_str, {})) self.tag_map.get(tag_str, {}))
self._cache.set(tag_id, token.lex.orth, analysis) self._cache.set(tag_id, token.lex.orth, analysis)