Fix memory leak

2025-08-04 12:20:20 +03:00 · 2017-05-21 13:38:46 -05:00 · 2017-05-21 13:38:46 -05:00 · 432b3499b3
commit 432b3499b3
parent 59fbfb3829
2 changed files with 5 additions and 4 deletions
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@ -148,7 +148,7 @@ class GoldCorpus(object):
        self.train_path = util.ensure_path(train_path)
        self.dev_path = util.ensure_path(dev_path)
        self.train_locs = self.walk_corpus(self.train_path)
-        self.dev_locs = self.walk_corpus(self.train_path)
+        self.dev_locs = self.walk_corpus(self.dev_path)

    @property
    def train_tuples(self):
@ -173,7 +173,7 @@ class GoldCorpus(object):
            random.shuffle(self.train_locs)
        gold_docs = self.iter_gold_docs(nlp, self.train_tuples)
        if shuffle:
-            gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*5000)
+            gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*1000)
        yield from gold_docs

    def dev_docs(self, nlp):
--- a/spacy/language.py
+++ b/spacy/language.py
@ -182,8 +182,8 @@ class Language(object):
        for proc in self.pipeline[1:]:
            grads = {}
            tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop)
-            d_tokvecses = proc.update((docs, tokvecses), golds, sgd=sgd, drop=drop)
-            bp_tokvecses(d_tokvecses, sgd=sgd)
+            d_tokvecses = proc.update((docs, tokvecses), golds, sgd=get_grads, drop=drop)
+            bp_tokvecses(d_tokvecses, sgd=get_grads)
            if sgd is not None:
                for key, (W, dW) in grads.items():
                    # TODO: Unhack this when thinc improves
@ -228,6 +228,7 @@ class Language(object):
        scorer = Scorer()
        for doc, gold in zip(self.pipe(docs), golds):
            scorer.score(doc, gold)
+            doc.tensor = None
        return scorer

    @contextmanager