mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Patch deserialisation for invalid loads, to avoid model failure
This commit is contained in:
parent
d8391b1c4d
commit
8f8bccecb9
|
@ -400,6 +400,7 @@ cdef class Vocab:
|
||||||
cdef int j = 0
|
cdef int j = 0
|
||||||
cdef SerializedLexemeC lex_data
|
cdef SerializedLexemeC lex_data
|
||||||
chunk_size = sizeof(lex_data.data)
|
chunk_size = sizeof(lex_data.data)
|
||||||
|
cdef void* ptr
|
||||||
cdef unsigned char* bytes_ptr = bytes_data
|
cdef unsigned char* bytes_ptr = bytes_data
|
||||||
for i in range(0, len(bytes_data), chunk_size):
|
for i in range(0, len(bytes_data), chunk_size):
|
||||||
lexeme = <LexemeC*>self.mem.alloc(1, sizeof(LexemeC))
|
lexeme = <LexemeC*>self.mem.alloc(1, sizeof(LexemeC))
|
||||||
|
@ -407,6 +408,9 @@ cdef class Vocab:
|
||||||
lex_data.data[j] = bytes_ptr[i+j]
|
lex_data.data[j] = bytes_ptr[i+j]
|
||||||
Lexeme.c_from_bytes(lexeme, lex_data)
|
Lexeme.c_from_bytes(lexeme, lex_data)
|
||||||
|
|
||||||
|
ptr = self.strings._map.get(lexeme.orth)
|
||||||
|
if ptr == NULL:
|
||||||
|
continue
|
||||||
py_str = self.strings[lexeme.orth]
|
py_str = self.strings[lexeme.orth]
|
||||||
assert self.strings[py_str] == lexeme.orth, (py_str, lexeme.orth)
|
assert self.strings[py_str] == lexeme.orth, (py_str, lexeme.orth)
|
||||||
key = hash_string(py_str)
|
key = hash_string(py_str)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user