Fix memory leak

This commit is contained in:
Matthew Honnibal 2017-05-21 13:38:46 -05:00
parent 59fbfb3829
commit 432b3499b3
2 changed files with 5 additions and 4 deletions

View File

@ -148,7 +148,7 @@ class GoldCorpus(object):
self.train_path = util.ensure_path(train_path) self.train_path = util.ensure_path(train_path)
self.dev_path = util.ensure_path(dev_path) self.dev_path = util.ensure_path(dev_path)
self.train_locs = self.walk_corpus(self.train_path) self.train_locs = self.walk_corpus(self.train_path)
self.dev_locs = self.walk_corpus(self.train_path) self.dev_locs = self.walk_corpus(self.dev_path)
@property @property
def train_tuples(self): def train_tuples(self):
@ -173,7 +173,7 @@ class GoldCorpus(object):
random.shuffle(self.train_locs) random.shuffle(self.train_locs)
gold_docs = self.iter_gold_docs(nlp, self.train_tuples) gold_docs = self.iter_gold_docs(nlp, self.train_tuples)
if shuffle: if shuffle:
gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*5000) gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*1000)
yield from gold_docs yield from gold_docs
def dev_docs(self, nlp): def dev_docs(self, nlp):

View File

@ -182,8 +182,8 @@ class Language(object):
for proc in self.pipeline[1:]: for proc in self.pipeline[1:]:
grads = {} grads = {}
tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop) tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop)
d_tokvecses = proc.update((docs, tokvecses), golds, sgd=sgd, drop=drop) d_tokvecses = proc.update((docs, tokvecses), golds, sgd=get_grads, drop=drop)
bp_tokvecses(d_tokvecses, sgd=sgd) bp_tokvecses(d_tokvecses, sgd=get_grads)
if sgd is not None: if sgd is not None:
for key, (W, dW) in grads.items(): for key, (W, dW) in grads.items():
# TODO: Unhack this when thinc improves # TODO: Unhack this when thinc improves
@ -228,6 +228,7 @@ class Language(object):
scorer = Scorer() scorer = Scorer()
for doc, gold in zip(self.pipe(docs), golds): for doc, gold in zip(self.pipe(docs), golds):
scorer.score(doc, gold) scorer.score(doc, gold)
doc.tensor = None
return scorer return scorer
@contextmanager @contextmanager