Fix lemma_ unicode handling

This commit is contained in:
Someon 2015-03-11 01:26:59 +02:00
parent ec25976416
commit 5b8e8b4f72

View File

@ -335,7 +335,8 @@ cdef class EnPosTagger:
cdef unicode lemma_string
lemma_strings = self.lemmatizer(py_string, pos)
lemma_string = sorted(lemma_strings)[0]
lemma = self.strings.intern(lemma_string.encode('utf8'), len(lemma_string)).i
raw_string = lemma_string.encode('utf8')
lemma = self.strings.intern(raw_string, len(raw_string)).i
return lemma
def load_morph_exceptions(self, dict exc):