mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
* When lemmatizing non-noun, non-verb, non-adj words, output lower-case
This commit is contained in:
parent
754bf04162
commit
5b2af4864f
|
@ -85,12 +85,10 @@ cdef class Morphology:
|
||||||
|
|
||||||
def lemmatize(self, const univ_pos_t pos, attr_t orth):
|
def lemmatize(self, const univ_pos_t pos, attr_t orth):
|
||||||
if self.lemmatizer is None:
|
if self.lemmatizer is None:
|
||||||
return orth
|
return self.strings[orth].lower()
|
||||||
cdef unicode py_string = self.strings[orth]
|
cdef unicode py_string = self.strings[orth]
|
||||||
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
|
if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
|
||||||
# TODO: This should lower-case
|
return py_string.lower()
|
||||||
# return self.strings[py_string.lower()]
|
|
||||||
return orth
|
|
||||||
cdef set lemma_strings
|
cdef set lemma_strings
|
||||||
cdef unicode lemma_string
|
cdef unicode lemma_string
|
||||||
lemma_strings = self.lemmatizer(py_string, pos)
|
lemma_strings = self.lemmatizer(py_string, pos)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user