From 97aabafb5f99d12065397f8ca162f92ad9a4acc0 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 19 Aug 2017 12:21:33 +0200 Subject: [PATCH] Document as_tuples keyword arg of Language.pipe --- spacy/language.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index cb679a2bc..aa757ffa8 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -430,11 +430,16 @@ class Language(object): except StopIteration: pass - def pipe(self, texts, tuples=False, n_threads=2, batch_size=1000, disable=[]): + def pipe(self, texts, as_tuples=False, n_threads=2, batch_size=1000, + disable=[]): """Process texts as a stream, and yield `Doc` objects in order. Supports GIL-free multi-threading. texts (iterator): A sequence of texts to process. + as_tuples (bool): + If set to True, inputs should be a sequence of + (text, context) tuples. Output will then be a sequence of + (doc, context) tuples. Defaults to False. n_threads (int): The number of worker threads to use. If -1, OpenMP will decide how many to use at run time. Default is 2. batch_size (int): The number of texts to buffer. @@ -446,7 +451,7 @@ class Language(object): >>> for doc in nlp.pipe(texts, batch_size=50, n_threads=4): >>> assert doc.is_parsed """ - if tuples: + if as_tuples: text_context1, text_context2 = itertools.tee(texts) texts = (tc[0] for tc in text_context1) contexts = (tc[1] for tc in text_context2)