Disable data caching in parser

This commit is contained in:
Matthew Honnibal 2017-05-19 18:17:11 -05:00
parent e84de028b5
commit 7ee1827af0

View File

@ -43,12 +43,14 @@ class Trainer(object):
else: else:
paragraph_tuples = merge_sents(paragraph_tuples) paragraph_tuples = merge_sents(paragraph_tuples)
if augment_data is None: if augment_data is None:
if i not in cached_docs: docs = self.make_docs(raw_text, paragraph_tuples)
cached_docs[i] = self.make_docs(raw_text, paragraph_tuples) golds = self.make_golds(docs, paragraph_tuples)
docs = cached_docs[i] #if i not in cached_docs:
if i not in cached_golds: # cached_docs[i] = self.make_docs(raw_text, paragraph_tuples)
cached_golds[i] = self.make_golds(docs, paragraph_tuples) #docs = cached_docs[i]
golds = cached_golds[i] #if i not in cached_golds:
# cached_golds[i] = self.make_golds(docs, paragraph_tuples)
#golds = cached_golds[i]
else: else:
raw_text, paragraph_tuples = augment_data(raw_text, paragraph_tuples) raw_text, paragraph_tuples = augment_data(raw_text, paragraph_tuples)
docs = self.make_docs(raw_text, paragraph_tuples) docs = self.make_docs(raw_text, paragraph_tuples)
@ -83,7 +85,7 @@ class Trainer(object):
all_docs.extend(docs) all_docs.extend(docs)
all_golds.extend(golds) all_golds.extend(golds)
scorer = Scorer() scorer = Scorer()
for doc, gold in zip(self.nlp.pipe(all_docs), all_golds): for doc, gold in zip(self.nlp.pipe(all_docs, batch_size=16), all_golds):
scorer.score(doc, gold) scorer.score(doc, gold)
return scorer return scorer