mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Fiddle with the way strings are interned in lexeme
This commit is contained in:
parent
0bb547ab98
commit
c396581a0b
|
@ -24,8 +24,8 @@ cdef int lexeme_free(LexemeC* lexeme) except -1:
|
|||
|
||||
|
||||
cdef char* intern_and_encode(unicode string, size_t* length):
|
||||
global _strings
|
||||
cdef bytes utf8_string = intern(string.encode('utf8'))
|
||||
cdef bytes byte_string = string.encode('utf8')
|
||||
cdef bytes utf8_string = intern(byte_string)
|
||||
Py_INCREF(utf8_string)
|
||||
length[0] = len(utf8_string)
|
||||
return <char*>utf8_string
|
||||
|
|
Loading…
Reference in New Issue
Block a user