Explicitly delete objects after parser.update to free GPU memory (#5748)

* Try explicitly deleting objects

* Refactor parser model backprop slightly

* Free parser data explicitly after rehearse and update
This commit is contained in:
Matthew Honnibal 2020-07-10 22:35:20 +02:00 committed by GitHub
parent de6a32315c
commit b68216e263
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 38 additions and 12 deletions

View File

@ -245,6 +245,13 @@ class ParserStepModel(Model):
for class_ in unseen_classes:
self._class_mask[class_] = 0.
def clear_memory(self):
del self.tokvecs
del self.bp_tokvecs
del self.state2vec
del self.backprops
del self._class_mask
@property
def nO(self):
if self.attrs["has_upper"]:
@ -273,6 +280,19 @@ class ParserStepModel(Model):
c_ids += ids.shape[1]
return ids
def backprop_step(self, token_ids, d_vector, get_d_tokvecs):
if isinstance(self.state2vec.ops, CupyOps) \
and not isinstance(token_ids, self.state2vec.ops.xp.ndarray):
# Move token_ids and d_vector to GPU, asynchronously
self.backprops.append((
util.get_async(self.cuda_stream, token_ids),
util.get_async(self.cuda_stream, d_vector),
get_d_tokvecs
))
else:
self.backprops.append((token_ids, d_vector, get_d_tokvecs))
def finish_steps(self, golds):
# Add a padding vector to the d_tokvecs gradient, so that missing
# values don't affect the real gradient.
@ -315,16 +335,7 @@ def step_forward(model: ParserStepModel, states, is_train):
d_vector = get_d_vector(d_scores)
if mask is not None:
d_vector *= mask
if isinstance(model.state2vec.ops, CupyOps) \
and not isinstance(token_ids, model.state2vec.ops.xp.ndarray):
# Move token_ids and d_vector to GPU, asynchronously
model.backprops.append((
util.get_async(model.cuda_stream, token_ids),
util.get_async(model.cuda_stream, d_vector),
get_d_tokvecs
))
else:
model.backprops.append((token_ids, d_vector, get_d_tokvecs))
model.backprop_step(token_ids, d_vector, get_d_tokvecs)
return None
return scores, backprop_parser_step

View File

@ -200,6 +200,8 @@ cdef class Parser:
with nogil:
self._parseC(&states[0],
weights, sizes)
model.clear_memory()
del model
return batch
cdef void _parseC(self, StateC** states,
@ -312,6 +314,13 @@ cdef class Parser:
if set_annotations:
docs = [eg.predicted for eg in examples]
self.set_annotations(docs, all_states)
# Ugh, this is annoying. If we're working on GPU, we want to free the
# memory ASAP. It seems that Python doesn't necessarily get around to
# removing these in time if we don't explicitly delete? It's confusing.
del backprop
del backprop_tok2vec
model.clear_memory()
del model
return losses
def rehearse(self, examples, sgd=None, losses=None, **cfg):
@ -335,7 +344,7 @@ cdef class Parser:
set_dropout_rate(self._rehearsal_model, 0.0)
set_dropout_rate(self.model, 0.0)
tutor, _ = self._rehearsal_model.begin_update(docs)
model, finish_update = self.model.begin_update(docs)
model, backprop_tok2vec = self.model.begin_update(docs)
n_scores = 0.
loss = 0.
while states:
@ -351,10 +360,16 @@ cdef class Parser:
states = [state for state in states if not state.is_final()]
n_scores += d_scores.size
# Do the backprop
finish_update(docs)
backprop_tok2vec(docs)
if sgd is not None:
self.model.finish_update(sgd)
losses[self.name] += loss / n_scores
del backprop
del backprop_tok2vec
model.clear_memory()
tutor.clear_memory()
del model
del tutor
return losses
def get_gradients(self):