* When lemmatizing non-noun, non-verb, non-adj words, output lower-case

This commit is contained in:
Matthew Honnibal 2015-11-06 00:45:09 +11:00
parent 754bf04162
commit 5b2af4864f

View File

@ -85,12 +85,10 @@ cdef class Morphology:
def lemmatize(self, const univ_pos_t pos, attr_t orth): def lemmatize(self, const univ_pos_t pos, attr_t orth):
if self.lemmatizer is None: if self.lemmatizer is None:
return orth return self.strings[orth].lower()
cdef unicode py_string = self.strings[orth] cdef unicode py_string = self.strings[orth]
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT: if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
# TODO: This should lower-case return py_string.lower()
# return self.strings[py_string.lower()]
return orth
cdef set lemma_strings cdef set lemma_strings
cdef unicode lemma_string cdef unicode lemma_string
lemma_strings = self.lemmatizer(py_string, pos) lemma_strings = self.lemmatizer(py_string, pos)