mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Fix lemmatization
This commit is contained in:
parent
ec41ceb383
commit
7b09a4ca49
|
@ -105,7 +105,6 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
oov_forms.append(form)
|
oov_forms.append(form)
|
||||||
# Remove duplicates, and sort forms generated by rules alphabetically.
|
# Remove duplicates, and sort forms generated by rules alphabetically.
|
||||||
forms = list(set(forms))
|
forms = list(set(forms))
|
||||||
forms.sort()
|
|
||||||
# Put exceptions at the front of the list, so they get priority.
|
# Put exceptions at the front of the list, so they get priority.
|
||||||
# This is a dodgy heuristic -- but it's the best we can do until we get
|
# This is a dodgy heuristic -- but it's the best we can do until we get
|
||||||
# frequencies on this. We can at least prune out problematic exceptions,
|
# frequencies on this. We can at least prune out problematic exceptions,
|
||||||
|
|
|
@ -176,7 +176,7 @@ cdef class Morphology:
|
||||||
cdef list lemma_strings
|
cdef list lemma_strings
|
||||||
cdef unicode lemma_string
|
cdef unicode lemma_string
|
||||||
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
|
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
|
||||||
lemma_string = sorted(lemma_strings)[0]
|
lemma_string = lemma_strings[0]
|
||||||
lemma = self.strings.add(lemma_string)
|
lemma = self.strings.add(lemma_string)
|
||||||
return lemma
|
return lemma
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user