From 98fbdf285696cb92e7c17a8685d2fdb30ba84c14 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 1 Feb 2016 09:01:13 +0100 Subject: [PATCH] * Add Language.batch() method, to support multi-threaded jobs --- spacy/language.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/spacy/language.py b/spacy/language.py index 69abf16b3..05ffa4ce6 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -269,6 +269,17 @@ class Language(object): self.entity(tokens) return tokens + def batch(self, texts, tag=True, parse=True, entity=True): + if tag is False: + return [self(text, tag=tag, parse=parse, entity=entity) + for text in texts] + docs = [] + for text in texts: + doc = self(text, tag=True, parse=False, entity=entity) + if self.parser and parse: + self.parser.parse_batch(docs) + return docs + def end_training(self, data_dir=None): if data_dir is None: data_dir = self.data_dir