From c396581a0b908ea2ff5244c01ce5d87153b7d729 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 15 Sep 2014 06:34:45 +0200 Subject: [PATCH] * Fiddle with the way strings are interned in lexeme --- spacy/lexeme.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 3c0a28c7f..223fc5e06 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -24,8 +24,8 @@ cdef int lexeme_free(LexemeC* lexeme) except -1: cdef char* intern_and_encode(unicode string, size_t* length): - global _strings - cdef bytes utf8_string = intern(string.encode('utf8')) + cdef bytes byte_string = string.encode('utf8') + cdef bytes utf8_string = intern(byte_string) Py_INCREF(utf8_string) length[0] = len(utf8_string) return utf8_string