Always make lemmatizer return a list of lemmas, not a set

2025-10-25 13:11:03 +03:00 · 2017-10-24 16:00:54 +02:00 · 2017-10-24 16:00:54 +02:00 · 8492d5be6d
commit 8492d5be6d
parent 95f866f99f
2 changed files with 4 additions and 4 deletions
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@ -26,10 +26,10 @@ class Lemmatizer(object):
        elif univ_pos in (PUNCT, 'PUNCT', 'punct'):
            univ_pos = 'punct'
        else:
-            return set([string.lower()])
+            return list(set([string.lower()]))
        # See Issue #435 for example of where this logic is requied.
        if self.is_base_form(univ_pos, morphology):
-            return set([string.lower()])
+            return list(set([string.lower()]))
        lemmas = lemmatize(string, self.index.get(univ_pos, {}),
                           self.exc.get(univ_pos, {}),
                           self.rules.get(univ_pos, []))
@ -108,4 +108,4 @@ def lemmatize(string, index, exceptions, rules):
        forms.extend(oov_forms)
    if not forms:
        forms.append(string)
-    return set(forms)
+    return list(set(forms))
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -172,7 +172,7 @@ cdef class Morphology:
        cdef unicode py_string = self.strings[orth]
        if self.lemmatizer is None:
            return self.strings.add(py_string.lower())
-        cdef set lemma_strings
+        cdef list lemma_strings
        cdef unicode lemma_string
        lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
        lemma_string = sorted(lemma_strings)[0]