From 43e59bb22a5fdeb4dadc0572a1f51d6fb672e557 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 8 Oct 2020 10:58:50 +0200
Subject: [PATCH] Update docs and install extras [ci skip]

---
 setup.cfg                                     |  2 ++
 website/docs/api/transformer.md               | 24 ++++++++---------
 website/docs/usage/embeddings-transformers.md |  3 +--
 website/docs/usage/index.md                   | 27 ++++++++++---------
 website/docs/usage/linguistic-features.md     |  9 ++++---
 website/docs/usage/models.md                  |  4 +--
 website/docs/usage/projects.md                |  4 +--
 website/docs/usage/training.md                |  2 +-
 website/docs/usage/v3.md                      |  4 +--
 website/gatsby-config.js                      |  2 ++
 website/src/widgets/changelog.js              |  5 +++-
 website/src/widgets/landing.js                |  9 ++++---
 website/src/widgets/quickstart-install.js     | 11 +++++---
 13 files changed, 62 insertions(+), 44 deletions(-)
diff --git a/setup.cfg b/setup.cfg
index 53171a346..424b1ff8e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -68,6 +68,8 @@ lookups =
     spacy_lookups_data>=1.0.0rc0,<1.0.0
 transformers =
     spacy_transformers>=1.0.0a17,<1.0.0
+ray =
+    spacy_ray>=0.0.1,<1.0.0
 cuda =
     cupy>=5.0.0b4,<9.0.0
 cuda80 =
diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md
index abceeff4f..5754d2238 100644
--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@@ -11,7 +11,7 @@ api_string_name: transformer
 > #### Installation
 >
 > ```bash
-> $ pip install spacy-transformers
+> $ pip install -U %%SPACY_PKG_NAME[transformers] %%SPACY_PKG_FLAGS
 > ```
 
 <Infobox title="Important note" variant="warning">
@@ -385,12 +385,12 @@ are wrapped into the
 by this class. Instances of this class are typically assigned to the
 [`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute.
 
-| Name      | Description                                                                                                                                                                                                                                                                                                                                             |
-| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `tokens`  | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~                                         |
+| Name      | Description                                                                                                                                                                                                                                                                                                                                               |
+| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `tokens`  | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~                                            |
 | `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ |
-| `align`   | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~                                                                                                    |
-| `width`   | The width of the last hidden layer. ~~int~~                                                                                                                                                                                                                                                                                                             |
+| `align`   | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~                                                                                                      |
+| `width`   | The width of the last hidden layer. ~~int~~                                                                                                                                                                                                                                                                                                               |
 
 ### TransformerData.empty {#transformerdata-emoty tag="classmethod"}
 
@@ -406,13 +406,13 @@ Holds a batch of input and output objects for a transformer model. The data can
 then be split to a list of [`TransformerData`](/api/transformer#transformerdata)
 objects to associate the outputs to each [`Doc`](/api/doc) in the batch.
 
-| Name       | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| Name       | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `spans`    | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ |
-| `tokens`   | The output of the tokenizer. ~~transformers.BatchEncoding~~                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| `tensors`  | The output of the transformer model. ~~List[torch.Tensor]~~                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| `align`    | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~                                                                                                                                                                                                                                   |
-| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| `tokens`   | The output of the tokenizer. ~~transformers.BatchEncoding~~                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| `tensors`  | The output of the transformer model. ~~List[torch.Tensor]~~                                                                                                                                                                                                                                                                                                                                                                                                                              |
+| `align`    | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~                                                                                                                                                                                                                                       |
+| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~                                                                                                                                                                                                                                                                                                                                                                                                                           |
 
 ### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
 
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index c615097d6..c0611787b 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -216,8 +216,7 @@ in `/opt/nvidia/cuda`, you would run:
 ```bash
 ### Installation with CUDA
 $ export CUDA_PATH="/opt/nvidia/cuda"
-$ pip install cupy-cuda102
-$ pip install spacy-transformers
+$ pip install -U %%SPACY_PKG_NAME[cud102,transformers]%%SPACY_PKG_FLAGS
 ```
 
 ### Runtime usage {#transformers-runtime}
diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md
index e0a4fdb07..398f97bb4 100644
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@@ -47,7 +47,7 @@ Before you install spaCy and its dependencies, make sure that your `pip`,
 
 ```bash
 $ pip install -U pip setuptools wheel
-$ pip install -U spacy
+$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
 ```
 
 When using pip it is generally recommended to install packages in a virtual
@@ -57,7 +57,7 @@ environment to avoid modifying system state:
 $ python -m venv .env
 $ source .env/bin/activate
 $ pip install -U pip setuptools wheel
-$ pip install spacy
+$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
 ```
 
 spaCy also lets you install extra dependencies by specifying the following
@@ -68,15 +68,16 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
 > #### Example
 >
 > ```bash
-> $ pip install spacy[lookups,transformers]
+> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
 > ```
 
-| Name             | Description                                                                                                                                                                                                                                                    |
-| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `lookups`        | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
-| `transformers`   | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline.                                                                                    |
-| `cuda`, ...      | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options.                                                                                |
-| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages).                                                                                                                                                        |
+| Name                   | Description                                                                                                                                                                                                                                                    |
+| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `lookups`              | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
+| `transformers`         | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline.                                                                                    |
+| `ray`                  | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training).                                                                                                                  |
+| `cuda`, ...            | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options.                                                                                |
+| `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages).                                                                                                                                                        |
 
 ### conda {#conda}
 
@@ -88,8 +89,8 @@ $ conda install -c conda-forge spacy
 ```
 
 For the feedstock including the build recipe and configuration, check out
-[this repository](https://github.com/conda-forge/spacy-feedstock). Improvements
-and pull requests to the recipe and setup are always appreciated.
+[this repository](https://github.com/conda-forge/spacy-feedstock). Note that we
+currently don't publish any [pre-releases](#changelog-pre) on conda.
 
 ### Upgrading spaCy {#upgrading}
 
@@ -116,7 +117,7 @@ are printed. It's recommended to run the command with `python -m` to make sure
 you're executing the correct version of spaCy.
 
 ```cli
-$ pip install -U spacy
+$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
 $ python -m spacy validate
 ```
 
@@ -134,7 +135,7 @@ specifier allows cupy to be installed via wheel, saving some compilation time.
 The specifiers should install [`cupy`](https://cupy.chainer.org).
 
 ```bash
-$ pip install -U spacy[cuda92]
+$ pip install -U %%SPACY_PKG_NAME[cuda92]%%SPACY_PKG_FLAGS
 ```
 
 Once you have a GPU-enabled installation, the best way to activate it is to call
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 1964bac18..f669c0a84 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -166,7 +166,7 @@ lookup lemmatizer looks up the token surface form in the lookup table without
 reference to the token's part-of-speech or context.
 
 ```python
-# pip install spacy-lookups-data
+# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
 import spacy
 
 nlp = spacy.blank("sv")
@@ -181,7 +181,7 @@ rule-based lemmatizer can be added using rule tables from
 [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data):
 
 ```python
-# pip install spacy-lookups-data
+# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
 import spacy
 
 nlp = spacy.blank("de")
@@ -1801,7 +1801,10 @@ print(doc2[5].tag_, doc2[5].pos_)  # WP PRON
 
 <Infobox variant="warning" title="Migrating from spaCy v2.x">
 
-The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
+The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph
+rules** in the v2.x format via its built-in methods or when the component is
+initialized before training. See the
+[migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
 
 </Infobox>
 
diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md
index fe3ee6e04..8c8875b9e 100644
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@@ -54,7 +54,7 @@ contribute to development.
 > separately in the same environment:
 >
 > ```bash
-> $ pip install spacy[lookups]
+> $ pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
 > ```
 
 import Languages from 'widgets/languages.js'
@@ -287,7 +287,7 @@ The download command will [install the package](/usage/models#download-pip) via
 pip and place the package in your `site-packages` directory.
 
 ```cli
-$ pip install -U spacy
+$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
 $ python -m spacy download en_core_web_sm
 ```
 
diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index 5fced922d..409236fbc 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -813,7 +813,7 @@ full embedded visualizer, as well as individual components.
 > #### Installation
 >
 > ```bash
-> $ pip install "spacy-streamlit>=1.0.0a0"
+> $ pip install spacy-streamlit --pre
 > ```
 
 ![](../images/spacy-streamlit.png)
@@ -911,7 +911,7 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
 > #### Installation
 >
 > ```cli
-> $ pip install spacy-ray
+> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
 > # Check that the CLI is registered
 > $ python -m spacy ray --help
 > ```
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index e63e25e52..04924a431 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -1249,7 +1249,7 @@ valid.
 > #### Installation
 >
 > ```cli
-> $ pip install spacy-ray
+> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
 > # Check that the CLI is registered
 > $ python -m spacy ray --help
 > ```
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 1024a2551..0f30029e7 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -236,7 +236,7 @@ treebank.
 > #### Example
 >
 > ```cli
-> $ pip install spacy-ray
+> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
 > # Check that the CLI is registered
 > $ python -m spacy ray --help
 > # Train a pipeline
@@ -272,7 +272,7 @@ add to your pipeline and customize for your use case:
 > #### Example
 >
 > ```python
-> # pip install spacy-lookups-data
+> # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
 > nlp = spacy.blank("en")
 > nlp.add_pipe("lemmatizer")
 > ```
diff --git a/website/gatsby-config.js b/website/gatsby-config.js
index 4650711ac..5b11f56bc 100644
--- a/website/gatsby-config.js
+++ b/website/gatsby-config.js
@@ -30,6 +30,8 @@ const branch = isNightly ? 'develop' : 'master'
 const replacements = {
     GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
     GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`,
+    SPACY_PKG_NAME: isNightly ? 'spacy-nightly' : 'spacy',
+    SPACY_PKG_FLAGS: isNightly ? ' --pre' : '',
 }
 
 /**
diff --git a/website/src/widgets/changelog.js b/website/src/widgets/changelog.js
index 73890d320..c5aca9b62 100644
--- a/website/src/widgets/changelog.js
+++ b/website/src/widgets/changelog.js
@@ -97,7 +97,10 @@ const Changelog = () => {
             <p>
                 Pre-releases include alpha and beta versions, as well as release candidates. They
                 are not intended for production use. You can download spaCy pre-releases via the{' '}
-                <InlineCode>spacy-nightly</InlineCode> package on pip.
+                <Link to="https://pypi.org/packages/spacy-nightly">
+                    <InlineCode>spacy-nightly</InlineCode>
+                </Link>{' '}
+                package on pip.
             </p>
 
             <p>
diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js
index 6fe7f4cdf..ac1d7c5c7 100644
--- a/website/src/widgets/landing.js
+++ b/website/src/widgets/landing.js
@@ -28,7 +28,8 @@ import irlBackground from '../images/spacy-irl.jpg'
 
 import Benchmarks from 'usage/_benchmarks-models.md'
 
-const CODE_EXAMPLE = `# pip install spacy
+function getCodeExample(nightly) {
+    return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
 # python -m spacy download en_core_web_sm
 import spacy
 
@@ -52,9 +53,11 @@ print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
 for entity in doc.ents:
     print(entity.text, entity.label_)
 `
+}
 
 const Landing = ({ data }) => {
-    const { counts } = data
+    const { counts, nightly } = data
+    const codeExample = getCodeExample(nightly)
     return (
         <>
             <LandingHeader nightly={data.nightly}>
@@ -91,7 +94,7 @@ const Landing = ({ data }) => {
             </LandingGrid>
 
             <LandingGrid>
-                <LandingDemo title="Edit the code & try spaCy">{CODE_EXAMPLE}</LandingDemo>
+                <LandingDemo title="Edit the code &amp; try spaCy">{codeExample}</LandingDemo>
 
                 <LandingCol>
                     <H2>Features</H2>
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index ab91b8e30..37ae10da4 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -141,6 +141,11 @@ const QuickstartInstall = ({ id, title }) => {
                         setters={setters}
                         showDropdown={showDropdown}
                     >
+                        {nightly && (
+                            <QS package="conda" comment prompt={false}>
+                                # 🚨 Nightly releases are currently only available via pip
+                            </QS>
+                        )}
                         <QS config="venv">python -m venv .env</QS>
                         <QS config="venv" os="mac">
                             source .env/bin/activate
@@ -175,9 +180,9 @@ const QuickstartInstall = ({ id, title }) => {
                         </QS>
                         <QS package="source">pip install -r requirements.txt</QS>
                         <QS package="source">python setup.py build_ext --inplace</QS>
-                        <QS package="source" config="train">
-                            pip install -e '.[{pipExtras}]'
-                        </QS>
+                        {(train || hardware == 'gpu') && (
+                            <QS package="source">pip install -e '.[{pipExtras}]'</QS>
+                        )}
 
                         <QS config="train" package="conda">
                             conda install -c conda-forge spacy-transformers