* Fiddle with the way strings are interned in lexeme

This commit is contained in:
Matthew Honnibal 2014-09-15 06:34:45 +02:00
parent 0bb547ab98
commit c396581a0b

View File

@ -24,8 +24,8 @@ cdef int lexeme_free(LexemeC* lexeme) except -1:
cdef char* intern_and_encode(unicode string, size_t* length): cdef char* intern_and_encode(unicode string, size_t* length):
global _strings cdef bytes byte_string = string.encode('utf8')
cdef bytes utf8_string = intern(string.encode('utf8')) cdef bytes utf8_string = intern(byte_string)
Py_INCREF(utf8_string) Py_INCREF(utf8_string)
length[0] = len(utf8_string) length[0] = len(utf8_string)
return <char*>utf8_string return <char*>utf8_string