Fix Corpus

This commit is contained in:
Matthew Honnibal 2020-06-20 21:49:15 +02:00
parent 450c6fe39c
commit 396dd60b3a

View File

@ -68,10 +68,12 @@ class Corpus:
i += 1
return n
def train_dataset(self, nlp, **kwargs):
def train_dataset(self, nlp, shuffle=True, **kwargs):
ref_docs = self.read_docbin(nlp.vocab, self.walk_corpus(self.train_loc))
examples = list(self.make_examples(nlp, ref_docs, **kwargs))
random.shuffle(examples)
examples = self.make_examples(nlp, ref_docs, **kwargs)
if shuffle:
examples = list(examples)
random.shuffle(examples)
yield from examples
def dev_dataset(self, nlp):