From 61daac54e4c2172a6ec0ae84858feb51e32a173c Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 3 Nov 2021 07:51:53 +0100 Subject: [PATCH] Serialize _context separately in multiprocessing pipe (#9597) * Serialize _context with Doc * Revert "Serialize _context with Doc" This reverts commit 161f1fac9115778f310eb4ce13ca7825c8129611. * Serialize Doc._context separately for multiprocessing pipe --- spacy/language.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 49f6dd1a5..55c9912cc 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1631,11 +1631,12 @@ class Language: recv.recv() for recv in cycle(bytedocs_recv_ch) ) try: - for i, (_, (byte_doc, byte_error)) in enumerate( + for i, (_, (byte_doc, byte_context, byte_error)) in enumerate( zip(raw_texts, byte_tuples), 1 ): if byte_doc is not None: doc = Doc(self.vocab).from_bytes(byte_doc) + doc._context = byte_context yield doc elif byte_error is not None: error = srsly.msgpack_loads(byte_error) @@ -2186,12 +2187,12 @@ def _apply_pipes( for pipe in pipes: docs = pipe(docs) # type: ignore[arg-type, assignment] # Connection does not accept unpickable objects, so send list. - byte_docs = [(doc.to_bytes(), None) for doc in docs] - padding = [(None, None)] * (len(texts) - len(byte_docs)) + byte_docs = [(doc.to_bytes(), doc._context, None) for doc in docs] + padding = [(None, None, None)] * (len(texts) - len(byte_docs)) sender.send(byte_docs + padding) # type: ignore[operator] except Exception: - error_msg = [(None, srsly.msgpack_dumps(traceback.format_exc()))] - padding = [(None, None)] * (len(texts) - 1) + error_msg = [(None, None, srsly.msgpack_dumps(traceback.format_exc()))] + padding = [(None, None, None)] * (len(texts) - 1) sender.send(error_msg + padding)