mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Explicitly delete objects after parser.update to free GPU memory (#5748)
* Try explicitly deleting objects * Refactor parser model backprop slightly * Free parser data explicitly after rehearse and update
This commit is contained in:
parent
de6a32315c
commit
b68216e263
|
@ -245,6 +245,13 @@ class ParserStepModel(Model):
|
|||
for class_ in unseen_classes:
|
||||
self._class_mask[class_] = 0.
|
||||
|
||||
def clear_memory(self):
|
||||
del self.tokvecs
|
||||
del self.bp_tokvecs
|
||||
del self.state2vec
|
||||
del self.backprops
|
||||
del self._class_mask
|
||||
|
||||
@property
|
||||
def nO(self):
|
||||
if self.attrs["has_upper"]:
|
||||
|
@ -273,6 +280,19 @@ class ParserStepModel(Model):
|
|||
c_ids += ids.shape[1]
|
||||
return ids
|
||||
|
||||
def backprop_step(self, token_ids, d_vector, get_d_tokvecs):
|
||||
if isinstance(self.state2vec.ops, CupyOps) \
|
||||
and not isinstance(token_ids, self.state2vec.ops.xp.ndarray):
|
||||
# Move token_ids and d_vector to GPU, asynchronously
|
||||
self.backprops.append((
|
||||
util.get_async(self.cuda_stream, token_ids),
|
||||
util.get_async(self.cuda_stream, d_vector),
|
||||
get_d_tokvecs
|
||||
))
|
||||
else:
|
||||
self.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
||||
|
||||
|
||||
def finish_steps(self, golds):
|
||||
# Add a padding vector to the d_tokvecs gradient, so that missing
|
||||
# values don't affect the real gradient.
|
||||
|
@ -315,16 +335,7 @@ def step_forward(model: ParserStepModel, states, is_train):
|
|||
d_vector = get_d_vector(d_scores)
|
||||
if mask is not None:
|
||||
d_vector *= mask
|
||||
if isinstance(model.state2vec.ops, CupyOps) \
|
||||
and not isinstance(token_ids, model.state2vec.ops.xp.ndarray):
|
||||
# Move token_ids and d_vector to GPU, asynchronously
|
||||
model.backprops.append((
|
||||
util.get_async(model.cuda_stream, token_ids),
|
||||
util.get_async(model.cuda_stream, d_vector),
|
||||
get_d_tokvecs
|
||||
))
|
||||
else:
|
||||
model.backprops.append((token_ids, d_vector, get_d_tokvecs))
|
||||
model.backprop_step(token_ids, d_vector, get_d_tokvecs)
|
||||
return None
|
||||
return scores, backprop_parser_step
|
||||
|
||||
|
|
|
@ -200,6 +200,8 @@ cdef class Parser:
|
|||
with nogil:
|
||||
self._parseC(&states[0],
|
||||
weights, sizes)
|
||||
model.clear_memory()
|
||||
del model
|
||||
return batch
|
||||
|
||||
cdef void _parseC(self, StateC** states,
|
||||
|
@ -312,6 +314,13 @@ cdef class Parser:
|
|||
if set_annotations:
|
||||
docs = [eg.predicted for eg in examples]
|
||||
self.set_annotations(docs, all_states)
|
||||
# Ugh, this is annoying. If we're working on GPU, we want to free the
|
||||
# memory ASAP. It seems that Python doesn't necessarily get around to
|
||||
# removing these in time if we don't explicitly delete? It's confusing.
|
||||
del backprop
|
||||
del backprop_tok2vec
|
||||
model.clear_memory()
|
||||
del model
|
||||
return losses
|
||||
|
||||
def rehearse(self, examples, sgd=None, losses=None, **cfg):
|
||||
|
@ -335,7 +344,7 @@ cdef class Parser:
|
|||
set_dropout_rate(self._rehearsal_model, 0.0)
|
||||
set_dropout_rate(self.model, 0.0)
|
||||
tutor, _ = self._rehearsal_model.begin_update(docs)
|
||||
model, finish_update = self.model.begin_update(docs)
|
||||
model, backprop_tok2vec = self.model.begin_update(docs)
|
||||
n_scores = 0.
|
||||
loss = 0.
|
||||
while states:
|
||||
|
@ -351,10 +360,16 @@ cdef class Parser:
|
|||
states = [state for state in states if not state.is_final()]
|
||||
n_scores += d_scores.size
|
||||
# Do the backprop
|
||||
finish_update(docs)
|
||||
backprop_tok2vec(docs)
|
||||
if sgd is not None:
|
||||
self.model.finish_update(sgd)
|
||||
losses[self.name] += loss / n_scores
|
||||
del backprop
|
||||
del backprop_tok2vec
|
||||
model.clear_memory()
|
||||
tutor.clear_memory()
|
||||
del model
|
||||
del tutor
|
||||
return losses
|
||||
|
||||
def get_gradients(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user