Update multi-processing example and add to docs

2025-12-22 17:43:13 +03:00 · 2017-10-27 01:58:55 +02:00 · 2017-10-27 01:58:55 +02:00 · 1d69a46cd4
commit 1d69a46cd4
parent 4eabaafd66
4 changed files with 31 additions and 3 deletions
--- a/examples/parallel_tag.py
+++ b/examples/parallel_tag.py
@ -1,7 +1,9 @@
 """
-Example of multi-processing with joblib. Here, we're exporting
+Example of multi-processing with Joblib. Here, we're exporting
 part-of-speech-tagged, true-cased, (very roughly) sentence-separated text, with
-each "sentence" on a newline, and spaces between tokens.
+each "sentence" on a newline, and spaces between tokens. Data is loaded from
+the IMDB movie reviews dataset and will be loaded automatically via Thinc's
+built-in dataset loader.

 Last updated for: spaCy 2.0.0a18
 """
--- a/website/usage/_data.json
+++ b/website/usage/_data.json
@ -106,7 +106,7 @@
            "How Pipelines Work": "pipelines",
            "Custom Components": "custom-components",
            "Developing Extensions": "extensions",
-            "Multi-threading": "multithreading",
+            "Multi-Threading": "multithreading",
            "Serialization": "serialization"
        }
    },
--- a/website/usage/_processing-pipelines/_multithreading.jade
+++ b/website/usage/_processing-pipelines/_multithreading.jade
@ -38,3 +38,16 @@ p
        |  the generator in two, and then #[code izip] the extra stream to the
        |  document stream. Here's
        |  #[+a(gh("spacy") + "/issues/172#issuecomment-183963403") an example].
+
+h(3, "multi-processing-example") Example: Multi-processing with Joblib
+
+p
+    |  This example shows how to use multiple cores to process text using
+    |  spaCy and #[+a("https://pythonhosted.org/joblib/") Joblib]. We're
+    |  exporting part-of-speech-tagged, true-cased, (very roughly)
+    |  sentence-separated text, with each "sentence" on a newline, and
+    |  spaces between tokens. Data is loaded from the IMDB movie reviews
+    |  dataset and will be loaded automatically via Thinc's built-in dataset
+    |  loader.
+
+github("spacy", "examples/parallel_tag.py")
--- a/website/usage/examples.jade
+++ b/website/usage/examples.jade
@ -71,6 +71,19 @@ include ../_includes/_mixins

    +github("spacy", "examples/pipeline/custom_attr_methods.py")

+    +h(3, "parallel-tag") Multi-processing with Joblib
+
+    p
+        |  This example shows how to use multiple cores to process text using
+        |  spaCy and #[+a("https://pythonhosted.org/joblib/") Joblib]. We're
+        |  exporting part-of-speech-tagged, true-cased, (very roughly)
+        |  sentence-separated text, with each "sentence" on a newline, and
+        |  spaces between tokens. Data is loaded from the IMDB movie reviews
+        |  dataset and will be loaded automatically via Thinc's built-in dataset
+        |  loader.
+
+    +github("spacy", "examples/parallel_tag.py")
+
 +section("training")
    +h(3, "training-ner") Training spaCy's Named Entity Recognizer