mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 16:24:16 +03:00
Always make lemmatizer return a list of lemmas, not a set
This commit is contained in:
parent
95f866f99f
commit
8492d5be6d
|
@ -26,10 +26,10 @@ class Lemmatizer(object):
|
|||
elif univ_pos in (PUNCT, 'PUNCT', 'punct'):
|
||||
univ_pos = 'punct'
|
||||
else:
|
||||
return set([string.lower()])
|
||||
return list(set([string.lower()]))
|
||||
# See Issue #435 for example of where this logic is requied.
|
||||
if self.is_base_form(univ_pos, morphology):
|
||||
return set([string.lower()])
|
||||
return list(set([string.lower()]))
|
||||
lemmas = lemmatize(string, self.index.get(univ_pos, {}),
|
||||
self.exc.get(univ_pos, {}),
|
||||
self.rules.get(univ_pos, []))
|
||||
|
@ -108,4 +108,4 @@ def lemmatize(string, index, exceptions, rules):
|
|||
forms.extend(oov_forms)
|
||||
if not forms:
|
||||
forms.append(string)
|
||||
return set(forms)
|
||||
return list(set(forms))
|
||||
|
|
|
@ -172,7 +172,7 @@ cdef class Morphology:
|
|||
cdef unicode py_string = self.strings[orth]
|
||||
if self.lemmatizer is None:
|
||||
return self.strings.add(py_string.lower())
|
||||
cdef set lemma_strings
|
||||
cdef list lemma_strings
|
||||
cdef unicode lemma_string
|
||||
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
|
||||
lemma_string = sorted(lemma_strings)[0]
|
||||
|
|
Loading…
Reference in New Issue
Block a user