* When lemmatizing non-noun, non-verb, non-adj words, output lower-case

This commit is contained in:
Matthew Honnibal 2015-11-06 00:45:09 +11:00
parent 754bf04162
commit 5b2af4864f

View File

@ -85,12 +85,10 @@ cdef class Morphology:
def lemmatize(self, const univ_pos_t pos, attr_t orth):
if self.lemmatizer is None:
return orth
return self.strings[orth].lower()
cdef unicode py_string = self.strings[orth]
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
# TODO: This should lower-case
# return self.strings[py_string.lower()]
return orth
return py_string.lower()
cdef set lemma_strings
cdef unicode lemma_string
lemma_strings = self.lemmatizer(py_string, pos)