mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* When lemmatizing non-noun, non-verb, non-adj words, output lower-case
This commit is contained in:
parent
754bf04162
commit
5b2af4864f
|
@ -85,12 +85,10 @@ cdef class Morphology:
|
|||
|
||||
def lemmatize(self, const univ_pos_t pos, attr_t orth):
|
||||
if self.lemmatizer is None:
|
||||
return orth
|
||||
return self.strings[orth].lower()
|
||||
cdef unicode py_string = self.strings[orth]
|
||||
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
|
||||
# TODO: This should lower-case
|
||||
# return self.strings[py_string.lower()]
|
||||
return orth
|
||||
return py_string.lower()
|
||||
cdef set lemma_strings
|
||||
cdef unicode lemma_string
|
||||
lemma_strings = self.lemmatizer(py_string, pos)
|
||||
|
|
Loading…
Reference in New Issue
Block a user