Fix size limits in training data

This commit is contained in:
Matthew Honnibal 2018-05-15 19:01:41 +02:00
parent 8446b35ce0
commit f328c195ca

View File

@ -160,7 +160,7 @@ class GoldCorpus(object):
yield item yield item
i += len(item[1]) i += len(item[1])
if limit and i >= limit: if limit and i >= limit:
break return
@property @property
def dev_tuples(self): def dev_tuples(self):
@ -178,9 +178,9 @@ class GoldCorpus(object):
for raw_text, paragraph_tuples in self.train_tuples: for raw_text, paragraph_tuples in self.train_tuples:
for sent_tuples, brackets in paragraph_tuples: for sent_tuples, brackets in paragraph_tuples:
n += len(sent_tuples[1]) n += len(sent_tuples[1])
if self.limit and i >= self.limit: if self.limit and i >= self.limit:
break break
i += len(paragraph_tuples) i += 1
return n return n
def train_docs(self, nlp, gold_preproc=False, max_length=None, def train_docs(self, nlp, gold_preproc=False, max_length=None,