Serialize _context separately in multiprocessing pipe (#9597)

* Serialize _context with Doc

* Revert "Serialize _context with Doc"

This reverts commit 161f1fac91.

* Serialize Doc._context separately for multiprocessing pipe
This commit is contained in:
Adriane Boyd 2021-11-03 07:51:53 +01:00 committed by GitHub
parent 5a979137a7
commit 61daac54e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1631,11 +1631,12 @@ class Language:
recv.recv() for recv in cycle(bytedocs_recv_ch)
)
try:
for i, (_, (byte_doc, byte_error)) in enumerate(
for i, (_, (byte_doc, byte_context, byte_error)) in enumerate(
zip(raw_texts, byte_tuples), 1
):
if byte_doc is not None:
doc = Doc(self.vocab).from_bytes(byte_doc)
doc._context = byte_context
yield doc
elif byte_error is not None:
error = srsly.msgpack_loads(byte_error)
@ -2186,12 +2187,12 @@ def _apply_pipes(
for pipe in pipes:
docs = pipe(docs) # type: ignore[arg-type, assignment]
# Connection does not accept unpickable objects, so send list.
byte_docs = [(doc.to_bytes(), None) for doc in docs]
padding = [(None, None)] * (len(texts) - len(byte_docs))
byte_docs = [(doc.to_bytes(), doc._context, None) for doc in docs]
padding = [(None, None, None)] * (len(texts) - len(byte_docs))
sender.send(byte_docs + padding) # type: ignore[operator]
except Exception:
error_msg = [(None, srsly.msgpack_dumps(traceback.format_exc()))]
padding = [(None, None)] * (len(texts) - 1)
error_msg = [(None, None, srsly.msgpack_dumps(traceback.format_exc()))]
padding = [(None, None, None)] * (len(texts) - 1)
sender.send(error_msg + padding)