From 5b8e8b4f7220abe80eb0b63fb6a145477f2832be Mon Sep 17 00:00:00 2001 From: Someon Date: Wed, 11 Mar 2015 01:26:59 +0200 Subject: [PATCH] Fix lemma_ unicode handling --- spacy/en/pos.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/en/pos.pyx b/spacy/en/pos.pyx index 1094d5c09..a38745165 100644 --- a/spacy/en/pos.pyx +++ b/spacy/en/pos.pyx @@ -335,7 +335,8 @@ cdef class EnPosTagger: cdef unicode lemma_string lemma_strings = self.lemmatizer(py_string, pos) lemma_string = sorted(lemma_strings)[0] - lemma = self.strings.intern(lemma_string.encode('utf8'), len(lemma_string)).i + raw_string = lemma_string.encode('utf8') + lemma = self.strings.intern(raw_string, len(raw_string)).i return lemma def load_morph_exceptions(self, dict exc):