mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Fiddle with the way strings are interned in lexeme
This commit is contained in:
parent
0bb547ab98
commit
c396581a0b
|
@ -24,8 +24,8 @@ cdef int lexeme_free(LexemeC* lexeme) except -1:
|
||||||
|
|
||||||
|
|
||||||
cdef char* intern_and_encode(unicode string, size_t* length):
|
cdef char* intern_and_encode(unicode string, size_t* length):
|
||||||
global _strings
|
cdef bytes byte_string = string.encode('utf8')
|
||||||
cdef bytes utf8_string = intern(string.encode('utf8'))
|
cdef bytes utf8_string = intern(byte_string)
|
||||||
Py_INCREF(utf8_string)
|
Py_INCREF(utf8_string)
|
||||||
length[0] = len(utf8_string)
|
length[0] = len(utf8_string)
|
||||||
return <char*>utf8_string
|
return <char*>utf8_string
|
||||||
|
|
Loading…
Reference in New Issue
Block a user