Fix Corpus

This commit is contained in:
Matthew Honnibal 2020-06-20 21:49:15 +02:00
parent 450c6fe39c
commit 396dd60b3a

View File

@ -68,10 +68,12 @@ class Corpus:
i += 1 i += 1
return n return n
def train_dataset(self, nlp, **kwargs): def train_dataset(self, nlp, shuffle=True, **kwargs):
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc)) ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
examples = list(self.make_examples(nlp, ref_docs, **kwargs)) examples = self.make_examples(nlp, ref_docs, **kwargs)
random.shuffle(examples) if shuffle:
examples = list(examples)
random.shuffle(examples)
yield from examples yield from examples
def dev_dataset(self, nlp): def dev_dataset(self, nlp):