diff --git a/spacy/gold.pyx b/spacy/gold.pyx index ed66390e4..d344473bf 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -148,7 +148,7 @@ class GoldCorpus(object): self.train_path = util.ensure_path(train_path) self.dev_path = util.ensure_path(dev_path) self.train_locs = self.walk_corpus(self.train_path) - self.dev_locs = self.walk_corpus(self.train_path) + self.dev_locs = self.walk_corpus(self.dev_path) @property def train_tuples(self): @@ -173,7 +173,7 @@ class GoldCorpus(object): random.shuffle(self.train_locs) gold_docs = self.iter_gold_docs(nlp, self.train_tuples) if shuffle: - gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*5000) + gold_docs = util.itershuffle(gold_docs, bufsize=shuffle*1000) yield from gold_docs def dev_docs(self, nlp): diff --git a/spacy/language.py b/spacy/language.py index 12964784c..0f6daec70 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -182,8 +182,8 @@ class Language(object): for proc in self.pipeline[1:]: grads = {} tokvecses, bp_tokvecses = tok2vec.model.begin_update(feats, drop=drop) - d_tokvecses = proc.update((docs, tokvecses), golds, sgd=sgd, drop=drop) - bp_tokvecses(d_tokvecses, sgd=sgd) + d_tokvecses = proc.update((docs, tokvecses), golds, sgd=get_grads, drop=drop) + bp_tokvecses(d_tokvecses, sgd=get_grads) if sgd is not None: for key, (W, dW) in grads.items(): # TODO: Unhack this when thinc improves @@ -228,6 +228,7 @@ class Language(object): scorer = Scorer() for doc, gold in zip(self.pipe(docs), golds): scorer.score(doc, gold) + doc.tensor = None return scorer @contextmanager