mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Prevent 0-length mem alloc (#6653)
* prevent 0-length mem alloc by adding asserts * fix lexeme mem allocation
This commit is contained in:
parent
6f83abb971
commit
29b59086f9
|
@ -133,8 +133,9 @@ cdef class Morphology:
|
|||
"""
|
||||
cdef MorphAnalysisC tag
|
||||
tag.length = len(field_feature_pairs)
|
||||
tag.fields = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||
tag.features = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||
if tag.length > 0:
|
||||
tag.fields = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||
tag.features = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||
for i, (field, feature) in enumerate(field_feature_pairs):
|
||||
tag.fields[i] = field
|
||||
tag.features[i] = feature
|
||||
|
|
|
@ -65,6 +65,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
|
|||
cdef GoldParseStateC gs
|
||||
gs.length = len(heads)
|
||||
gs.stride = 1
|
||||
assert gs.length > 0
|
||||
gs.labels = <attr_t*>mem.alloc(gs.length, sizeof(gs.labels[0]))
|
||||
gs.heads = <int32_t*>mem.alloc(gs.length, sizeof(gs.heads[0]))
|
||||
gs.n_kids = <int32_t*>mem.alloc(gs.length, sizeof(gs.n_kids[0]))
|
||||
|
@ -126,6 +127,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
|
|||
1
|
||||
)
|
||||
# Make an array of pointers, pointing into the gs_kids_flat array.
|
||||
assert gs.length > 0
|
||||
gs.kids = <int32_t**>mem.alloc(gs.length, sizeof(int32_t*))
|
||||
for i in range(gs.length):
|
||||
if gs.n_kids[i] != 0:
|
||||
|
|
|
@ -63,6 +63,7 @@ cdef GoldNERStateC create_gold_state(
|
|||
Example example
|
||||
) except *:
|
||||
cdef GoldNERStateC gs
|
||||
assert example.x.length > 0
|
||||
gs.ner = <Transition*>mem.alloc(example.x.length, sizeof(Transition))
|
||||
ner_tags = example.get_aligned_ner()
|
||||
for i, ner_tag in enumerate(ner_tags):
|
||||
|
|
|
@ -258,6 +258,7 @@ cdef class Tokenizer:
|
|||
tokens = doc.c
|
||||
# Otherwise create a separate array to store modified tokens
|
||||
else:
|
||||
assert max_length > 0
|
||||
tokens = <TokenC*>mem.alloc(max_length, sizeof(TokenC))
|
||||
# Modify tokenization according to filtered special cases
|
||||
offset = self._retokenize_special_spans(doc, tokens, span_data)
|
||||
|
|
|
@ -225,6 +225,7 @@ cdef class Doc:
|
|||
# Guarantee self.lex[i-x], for any i >= 0 and x < padding is in bounds
|
||||
# However, we need to remember the true starting places, so that we can
|
||||
# realloc.
|
||||
assert size + (PADDING*2) > 0
|
||||
data_start = <TokenC*>self.mem.alloc(size + (PADDING*2), sizeof(TokenC))
|
||||
cdef int i
|
||||
for i in range(size + (PADDING*2)):
|
||||
|
@ -1177,6 +1178,7 @@ cdef class Doc:
|
|||
other.length = self.length
|
||||
other.max_length = self.max_length
|
||||
buff_size = other.max_length + (PADDING*2)
|
||||
assert buff_size > 0
|
||||
tokens = <TokenC*>other.mem.alloc(buff_size, sizeof(TokenC))
|
||||
memcpy(tokens, self.c - PADDING, buff_size * sizeof(TokenC))
|
||||
other.c = &tokens[PADDING]
|
||||
|
|
|
@ -164,7 +164,7 @@ cdef class Vocab:
|
|||
if len(string) < 3 or self.length < 10000:
|
||||
mem = self.mem
|
||||
cdef bint is_oov = mem is not self.mem
|
||||
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
||||
lex = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
|
||||
lex.orth = self.strings.add(string)
|
||||
lex.length = len(string)
|
||||
if self.vectors is not None:
|
||||
|
|
Loading…
Reference in New Issue
Block a user