mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Explicitly delete objects after parser.update to free GPU memory (#5748)
* Try explicitly deleting objects * Refactor parser model backprop slightly * Free parser data explicitly after rehearse and update
This commit is contained in:
parent
de6a32315c
commit
b68216e263
|
@ -245,6 +245,13 @@ class ParserStepModel(Model):
|
||||||
for class_ in unseen_classes:
|
for class_ in unseen_classes:
|
||||||
self._class_mask[class_] = 0.
|
self._class_mask[class_] = 0.
|
||||||
|
|
||||||
|
def clear_memory(self):
|
||||||
|
del self.tokvecs
|
||||||
|
del self.bp_tokvecs
|
||||||
|
del self.state2vec
|
||||||
|
del self.backprops
|
||||||
|
del self._class_mask
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def nO(self):
|
def nO(self):
|
||||||
if self.attrs["has_upper"]:
|
if self.attrs["has_upper"]:
|
||||||
|
@ -273,6 +280,19 @@ class ParserStepModel(Model):
|
||||||
c_ids += ids.shape[1]
|
c_ids += ids.shape[1]
|
||||||
return ids
|
return ids
|
||||||
|
|
||||||
|
def backprop_step(self, token_ids, d_vector, get_d_tokvecs):
|
||||||
|
if isinstance(self.state2vec.ops, CupyOps) \
|
||||||
|
and not isinstance(token_ids, self.state2vec.ops.xp.ndarray):
|
||||||
|
# Move token_ids and d_vector to GPU, asynchronously
|
||||||
|
self.backprops.append((
|
||||||
|
util.get_async(self.cuda_stream, token_ids),
|
||||||
|
util.get_async(self.cuda_stream, d_vector),
|
||||||
|
get_d_tokvecs
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
self.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
||||||
|
|
||||||
|
|
||||||
def finish_steps(self, golds):
|
def finish_steps(self, golds):
|
||||||
# Add a padding vector to the d_tokvecs gradient, so that missing
|
# Add a padding vector to the d_tokvecs gradient, so that missing
|
||||||
# values don't affect the real gradient.
|
# values don't affect the real gradient.
|
||||||
|
@ -315,16 +335,7 @@ def step_forward(model: ParserStepModel, states, is_train):
|
||||||
d_vector = get_d_vector(d_scores)
|
d_vector = get_d_vector(d_scores)
|
||||||
if mask is not None:
|
if mask is not None:
|
||||||
d_vector *= mask
|
d_vector *= mask
|
||||||
if isinstance(model.state2vec.ops, CupyOps) \
|
model.backprop_step(token_ids, d_vector, get_d_tokvecs)
|
||||||
and not isinstance(token_ids, model.state2vec.ops.xp.ndarray):
|
|
||||||
# Move token_ids and d_vector to GPU, asynchronously
|
|
||||||
model.backprops.append((
|
|
||||||
util.get_async(model.cuda_stream, token_ids),
|
|
||||||
util.get_async(model.cuda_stream, d_vector),
|
|
||||||
get_d_tokvecs
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
model.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
|
||||||
return None
|
return None
|
||||||
return scores, backprop_parser_step
|
return scores, backprop_parser_step
|
||||||
|
|
||||||
|
|
|
@ -200,6 +200,8 @@ cdef class Parser:
|
||||||
with nogil:
|
with nogil:
|
||||||
self._parseC(&states[0],
|
self._parseC(&states[0],
|
||||||
weights, sizes)
|
weights, sizes)
|
||||||
|
model.clear_memory()
|
||||||
|
del model
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
cdef void _parseC(self, StateC** states,
|
cdef void _parseC(self, StateC** states,
|
||||||
|
@ -312,6 +314,13 @@ cdef class Parser:
|
||||||
if set_annotations:
|
if set_annotations:
|
||||||
docs = [eg.predicted for eg in examples]
|
docs = [eg.predicted for eg in examples]
|
||||||
self.set_annotations(docs, all_states)
|
self.set_annotations(docs, all_states)
|
||||||
|
# Ugh, this is annoying. If we're working on GPU, we want to free the
|
||||||
|
# memory ASAP. It seems that Python doesn't necessarily get around to
|
||||||
|
# removing these in time if we don't explicitly delete? It's confusing.
|
||||||
|
del backprop
|
||||||
|
del backprop_tok2vec
|
||||||
|
model.clear_memory()
|
||||||
|
del model
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def rehearse(self, examples, sgd=None, losses=None, **cfg):
|
def rehearse(self, examples, sgd=None, losses=None, **cfg):
|
||||||
|
@ -335,7 +344,7 @@ cdef class Parser:
|
||||||
set_dropout_rate(self._rehearsal_model, 0.0)
|
set_dropout_rate(self._rehearsal_model, 0.0)
|
||||||
set_dropout_rate(self.model, 0.0)
|
set_dropout_rate(self.model, 0.0)
|
||||||
tutor, _ = self._rehearsal_model.begin_update(docs)
|
tutor, _ = self._rehearsal_model.begin_update(docs)
|
||||||
model, finish_update = self.model.begin_update(docs)
|
model, backprop_tok2vec = self.model.begin_update(docs)
|
||||||
n_scores = 0.
|
n_scores = 0.
|
||||||
loss = 0.
|
loss = 0.
|
||||||
while states:
|
while states:
|
||||||
|
@ -351,10 +360,16 @@ cdef class Parser:
|
||||||
states = [state for state in states if not state.is_final()]
|
states = [state for state in states if not state.is_final()]
|
||||||
n_scores += d_scores.size
|
n_scores += d_scores.size
|
||||||
# Do the backprop
|
# Do the backprop
|
||||||
finish_update(docs)
|
backprop_tok2vec(docs)
|
||||||
if sgd is not None:
|
if sgd is not None:
|
||||||
self.model.finish_update(sgd)
|
self.model.finish_update(sgd)
|
||||||
losses[self.name] += loss / n_scores
|
losses[self.name] += loss / n_scores
|
||||||
|
del backprop
|
||||||
|
del backprop_tok2vec
|
||||||
|
model.clear_memory()
|
||||||
|
tutor.clear_memory()
|
||||||
|
del model
|
||||||
|
del tutor
|
||||||
return losses
|
return losses
|
||||||
|
|
||||||
def get_gradients(self):
|
def get_gradients(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user