Make Language.pipe workers exit cleanly (#13321)

Also warn when any worker exited with a non-zero exit code and modify test to ensure that workers exit cleanly by default.
2025-08-08 22:24:55 +03:00 · 2024-02-12 14:39:38 +01:00 · 2024-02-12 14:39:38 +01:00 · fdfdbcd9f4
commit fdfdbcd9f4
parent 14bd9d89a3
3 changed files with 14 additions and 3 deletions
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -220,6 +220,7 @@ class Warnings(metaclass=ErrorsWithCodes):
            "key attribute for vectors, configure it through Vectors(attr=) or "
            "'spacy init vectors --attr'")
    W126 = ("These keys are unsupported: {unsupported}")
+    W127 = ("Not all `Language.pipe` worker processes completed successfully")


 class Errors(metaclass=ErrorsWithCodes):
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1730,6 +1730,9 @@ class Language:
            for proc in procs:
                proc.join()

+            if not all(proc.exitcode == 0 for proc in procs):
+                warnings.warn(Warnings.W127)
+
    def _link_components(self) -> None:
        """Register 'listeners' within pipeline components, to allow them to
        effectively share weights.
@ -2350,6 +2353,7 @@ def _apply_pipes(
            if isinstance(texts_with_ctx, _WorkDoneSentinel):
                sender.close()
                receiver.close()
+                return

            docs = (
                ensure_doc(doc_like, context) for doc_like, context in texts_with_ctx
@ -2375,6 +2379,7 @@ def _apply_pipes(
            # stop processing.
            sender.close()
            receiver.close()
+            return


 class _Sender:
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -1,5 +1,6 @@
 import itertools
 import logging
+import warnings
 from unittest import mock

 import pytest
@ -738,9 +739,13 @@ def test_pass_doc_to_pipeline(nlp, n_process):
    assert doc.text == texts[0]
    assert len(doc.cats) > 0
    if isinstance(get_current_ops(), NumpyOps) or n_process < 2:
-        docs = nlp.pipe(docs, n_process=n_process)
-        assert [doc.text for doc in docs] == texts
-        assert all(len(doc.cats) for doc in docs)
+        # Catch warnings to ensure that all worker processes exited
+        # succesfully.
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            docs = nlp.pipe(docs, n_process=n_process)
+            assert [doc.text for doc in docs] == texts
+            assert all(len(doc.cats) for doc in docs)


 def test_invalid_arg_to_pipeline(nlp):