Fix lemmatization

This commit is contained in:
Matthew Honnibal 2018-07-05 13:56:02 +02:00
parent ec41ceb383
commit 7b09a4ca49
2 changed files with 1 additions and 2 deletions

View File

@ -105,7 +105,6 @@ def lemmatize(string, index, exceptions, rules):
oov_forms.append(form) oov_forms.append(form)
# Remove duplicates, and sort forms generated by rules alphabetically. # Remove duplicates, and sort forms generated by rules alphabetically.
forms = list(set(forms)) forms = list(set(forms))
forms.sort()
# Put exceptions at the front of the list, so they get priority. # Put exceptions at the front of the list, so they get priority.
# This is a dodgy heuristic -- but it's the best we can do until we get # This is a dodgy heuristic -- but it's the best we can do until we get
# frequencies on this. We can at least prune out problematic exceptions, # frequencies on this. We can at least prune out problematic exceptions,

View File

@ -176,7 +176,7 @@ cdef class Morphology:
cdef list lemma_strings cdef list lemma_strings
cdef unicode lemma_string cdef unicode lemma_string
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology) lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
lemma_string = sorted(lemma_strings)[0] lemma_string = lemma_strings[0]
lemma = self.strings.add(lemma_string) lemma = self.strings.add(lemma_string)
return lemma return lemma