Improve piping in language.pipe

This commit is contained in:
Matthew Honnibal 2017-10-18 21:46:12 +02:00
parent 633a75c7e0
commit 65bf5e85bd

View File

@ -10,6 +10,7 @@ from collections import OrderedDict
import itertools import itertools
import weakref import weakref
import functools import functools
import tqdm
from .tokenizer import Tokenizer from .tokenizer import Tokenizer
from .vocab import Vocab from .vocab import Vocab
@ -447,11 +448,9 @@ class Language(object):
golds = list(golds) golds = list(golds)
for name, pipe in self.pipeline: for name, pipe in self.pipeline:
if not hasattr(pipe, 'pipe'): if not hasattr(pipe, 'pipe'):
for doc in docs: docs = (pipe(doc) for doc in docs)
pipe(doc)
else: else:
docs = list(pipe.pipe(docs)) docs = pipe.pipe(docs, batch_size=256)
assert len(docs) == len(golds)
for doc, gold in zip(docs, golds): for doc, gold in zip(docs, golds):
if verbose: if verbose:
print(doc) print(doc)