* Fix capitalization in lemmatizer

This commit is contained in:
Matthew Honnibal 2015-11-06 05:44:35 +11:00
parent 6ed3aedf79
commit 9d1b2a103a

View File

@ -84,11 +84,11 @@ cdef class Morphology:
self._cache.set(tag_id, orth, <void*>cached)
def lemmatize(self, const univ_pos_t pos, attr_t orth):
if self.lemmatizer is None:
return self.strings[orth].lower()
cdef unicode py_string = self.strings[orth]
if self.lemmatizer is None:
return self.strings[py_string.lower()]
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
return py_string.lower()
return self.strings[py_string.lower()]
cdef set lemma_strings
cdef unicode lemma_string
lemma_strings = self.lemmatizer(py_string, pos)