mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Prevent 0-length mem alloc (#6653)
* prevent 0-length mem alloc by adding asserts * fix lexeme mem allocation
This commit is contained in:
parent
6f83abb971
commit
29b59086f9
|
@ -133,8 +133,9 @@ cdef class Morphology:
|
||||||
"""
|
"""
|
||||||
cdef MorphAnalysisC tag
|
cdef MorphAnalysisC tag
|
||||||
tag.length = len(field_feature_pairs)
|
tag.length = len(field_feature_pairs)
|
||||||
tag.fields = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
if tag.length > 0:
|
||||||
tag.features = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
tag.fields = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||||
|
tag.features = <attr_t*>self.mem.alloc(tag.length, sizeof(attr_t))
|
||||||
for i, (field, feature) in enumerate(field_feature_pairs):
|
for i, (field, feature) in enumerate(field_feature_pairs):
|
||||||
tag.fields[i] = field
|
tag.fields[i] = field
|
||||||
tag.features[i] = feature
|
tag.features[i] = feature
|
||||||
|
|
|
@ -65,6 +65,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
|
||||||
cdef GoldParseStateC gs
|
cdef GoldParseStateC gs
|
||||||
gs.length = len(heads)
|
gs.length = len(heads)
|
||||||
gs.stride = 1
|
gs.stride = 1
|
||||||
|
assert gs.length > 0
|
||||||
gs.labels = <attr_t*>mem.alloc(gs.length, sizeof(gs.labels[0]))
|
gs.labels = <attr_t*>mem.alloc(gs.length, sizeof(gs.labels[0]))
|
||||||
gs.heads = <int32_t*>mem.alloc(gs.length, sizeof(gs.heads[0]))
|
gs.heads = <int32_t*>mem.alloc(gs.length, sizeof(gs.heads[0]))
|
||||||
gs.n_kids = <int32_t*>mem.alloc(gs.length, sizeof(gs.n_kids[0]))
|
gs.n_kids = <int32_t*>mem.alloc(gs.length, sizeof(gs.n_kids[0]))
|
||||||
|
@ -126,6 +127,7 @@ cdef GoldParseStateC create_gold_state(Pool mem, const StateC* state,
|
||||||
1
|
1
|
||||||
)
|
)
|
||||||
# Make an array of pointers, pointing into the gs_kids_flat array.
|
# Make an array of pointers, pointing into the gs_kids_flat array.
|
||||||
|
assert gs.length > 0
|
||||||
gs.kids = <int32_t**>mem.alloc(gs.length, sizeof(int32_t*))
|
gs.kids = <int32_t**>mem.alloc(gs.length, sizeof(int32_t*))
|
||||||
for i in range(gs.length):
|
for i in range(gs.length):
|
||||||
if gs.n_kids[i] != 0:
|
if gs.n_kids[i] != 0:
|
||||||
|
|
|
@ -63,6 +63,7 @@ cdef GoldNERStateC create_gold_state(
|
||||||
Example example
|
Example example
|
||||||
) except *:
|
) except *:
|
||||||
cdef GoldNERStateC gs
|
cdef GoldNERStateC gs
|
||||||
|
assert example.x.length > 0
|
||||||
gs.ner = <Transition*>mem.alloc(example.x.length, sizeof(Transition))
|
gs.ner = <Transition*>mem.alloc(example.x.length, sizeof(Transition))
|
||||||
ner_tags = example.get_aligned_ner()
|
ner_tags = example.get_aligned_ner()
|
||||||
for i, ner_tag in enumerate(ner_tags):
|
for i, ner_tag in enumerate(ner_tags):
|
||||||
|
|
|
@ -258,6 +258,7 @@ cdef class Tokenizer:
|
||||||
tokens = doc.c
|
tokens = doc.c
|
||||||
# Otherwise create a separate array to store modified tokens
|
# Otherwise create a separate array to store modified tokens
|
||||||
else:
|
else:
|
||||||
|
assert max_length > 0
|
||||||
tokens = <TokenC*>mem.alloc(max_length, sizeof(TokenC))
|
tokens = <TokenC*>mem.alloc(max_length, sizeof(TokenC))
|
||||||
# Modify tokenization according to filtered special cases
|
# Modify tokenization according to filtered special cases
|
||||||
offset = self._retokenize_special_spans(doc, tokens, span_data)
|
offset = self._retokenize_special_spans(doc, tokens, span_data)
|
||||||
|
|
|
@ -225,6 +225,7 @@ cdef class Doc:
|
||||||
# Guarantee self.lex[i-x], for any i >= 0 and x < padding is in bounds
|
# Guarantee self.lex[i-x], for any i >= 0 and x < padding is in bounds
|
||||||
# However, we need to remember the true starting places, so that we can
|
# However, we need to remember the true starting places, so that we can
|
||||||
# realloc.
|
# realloc.
|
||||||
|
assert size + (PADDING*2) > 0
|
||||||
data_start = <TokenC*>self.mem.alloc(size + (PADDING*2), sizeof(TokenC))
|
data_start = <TokenC*>self.mem.alloc(size + (PADDING*2), sizeof(TokenC))
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(size + (PADDING*2)):
|
for i in range(size + (PADDING*2)):
|
||||||
|
@ -1177,6 +1178,7 @@ cdef class Doc:
|
||||||
other.length = self.length
|
other.length = self.length
|
||||||
other.max_length = self.max_length
|
other.max_length = self.max_length
|
||||||
buff_size = other.max_length + (PADDING*2)
|
buff_size = other.max_length + (PADDING*2)
|
||||||
|
assert buff_size > 0
|
||||||
tokens = <TokenC*>other.mem.alloc(buff_size, sizeof(TokenC))
|
tokens = <TokenC*>other.mem.alloc(buff_size, sizeof(TokenC))
|
||||||
memcpy(tokens, self.c - PADDING, buff_size * sizeof(TokenC))
|
memcpy(tokens, self.c - PADDING, buff_size * sizeof(TokenC))
|
||||||
other.c = &tokens[PADDING]
|
other.c = &tokens[PADDING]
|
||||||
|
|
|
@ -164,7 +164,7 @@ cdef class Vocab:
|
||||||
if len(string) < 3 or self.length < 10000:
|
if len(string) < 3 or self.length < 10000:
|
||||||
mem = self.mem
|
mem = self.mem
|
||||||
cdef bint is_oov = mem is not self.mem
|
cdef bint is_oov = mem is not self.mem
|
||||||
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
|
lex = <LexemeC*>mem.alloc(1, sizeof(LexemeC))
|
||||||
lex.orth = self.strings.add(string)
|
lex.orth = self.strings.add(string)
|
||||||
lex.length = len(string)
|
lex.length = len(string)
|
||||||
if self.vectors is not None:
|
if self.vectors is not None:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user