mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Serialize _context separately in multiprocessing pipe (#9597)
* Serialize _context with Doc
* Revert "Serialize _context with Doc"
This reverts commit 161f1fac91.
* Serialize Doc._context separately for multiprocessing pipe
			
			
This commit is contained in:
		
							parent
							
								
									5a979137a7
								
							
						
					
					
						commit
						61daac54e4
					
				| 
						 | 
					@ -1631,11 +1631,12 @@ class Language:
 | 
				
			||||||
            recv.recv() for recv in cycle(bytedocs_recv_ch)
 | 
					            recv.recv() for recv in cycle(bytedocs_recv_ch)
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            for i, (_, (byte_doc, byte_error)) in enumerate(
 | 
					            for i, (_, (byte_doc, byte_context, byte_error)) in enumerate(
 | 
				
			||||||
                zip(raw_texts, byte_tuples), 1
 | 
					                zip(raw_texts, byte_tuples), 1
 | 
				
			||||||
            ):
 | 
					            ):
 | 
				
			||||||
                if byte_doc is not None:
 | 
					                if byte_doc is not None:
 | 
				
			||||||
                    doc = Doc(self.vocab).from_bytes(byte_doc)
 | 
					                    doc = Doc(self.vocab).from_bytes(byte_doc)
 | 
				
			||||||
 | 
					                    doc._context = byte_context
 | 
				
			||||||
                    yield doc
 | 
					                    yield doc
 | 
				
			||||||
                elif byte_error is not None:
 | 
					                elif byte_error is not None:
 | 
				
			||||||
                    error = srsly.msgpack_loads(byte_error)
 | 
					                    error = srsly.msgpack_loads(byte_error)
 | 
				
			||||||
| 
						 | 
					@ -2186,12 +2187,12 @@ def _apply_pipes(
 | 
				
			||||||
            for pipe in pipes:
 | 
					            for pipe in pipes:
 | 
				
			||||||
                docs = pipe(docs)  # type: ignore[arg-type, assignment]
 | 
					                docs = pipe(docs)  # type: ignore[arg-type, assignment]
 | 
				
			||||||
            # Connection does not accept unpickable objects, so send list.
 | 
					            # Connection does not accept unpickable objects, so send list.
 | 
				
			||||||
            byte_docs = [(doc.to_bytes(), None) for doc in docs]
 | 
					            byte_docs = [(doc.to_bytes(), doc._context, None) for doc in docs]
 | 
				
			||||||
            padding = [(None, None)] * (len(texts) - len(byte_docs))
 | 
					            padding = [(None, None, None)] * (len(texts) - len(byte_docs))
 | 
				
			||||||
            sender.send(byte_docs + padding)  # type: ignore[operator]
 | 
					            sender.send(byte_docs + padding)  # type: ignore[operator]
 | 
				
			||||||
        except Exception:
 | 
					        except Exception:
 | 
				
			||||||
            error_msg = [(None, srsly.msgpack_dumps(traceback.format_exc()))]
 | 
					            error_msg = [(None, None, srsly.msgpack_dumps(traceback.format_exc()))]
 | 
				
			||||||
            padding = [(None, None)] * (len(texts) - 1)
 | 
					            padding = [(None, None, None)] * (len(texts) - 1)
 | 
				
			||||||
            sender.send(error_msg + padding)
 | 
					            sender.send(error_msg + padding)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user