From 43e59bb22a5fdeb4dadc0572a1f51d6fb672e557 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 8 Oct 2020 10:58:50 +0200 Subject: [PATCH] Update docs and install extras [ci skip] --- setup.cfg | 2 ++ website/docs/api/transformer.md | 24 ++++++++--------- website/docs/usage/embeddings-transformers.md | 3 +-- website/docs/usage/index.md | 27 ++++++++++--------- website/docs/usage/linguistic-features.md | 9 ++++--- website/docs/usage/models.md | 4 +-- website/docs/usage/projects.md | 4 +-- website/docs/usage/training.md | 2 +- website/docs/usage/v3.md | 4 +-- website/gatsby-config.js | 2 ++ website/src/widgets/changelog.js | 5 +++- website/src/widgets/landing.js | 9 ++++--- website/src/widgets/quickstart-install.js | 11 +++++--- 13 files changed, 62 insertions(+), 44 deletions(-) diff --git a/setup.cfg b/setup.cfg index 53171a346..424b1ff8e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -68,6 +68,8 @@ lookups = spacy_lookups_data>=1.0.0rc0,<1.0.0 transformers = spacy_transformers>=1.0.0a17,<1.0.0 +ray = + spacy_ray>=0.0.1,<1.0.0 cuda = cupy>=5.0.0b4,<9.0.0 cuda80 = diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md index abceeff4f..5754d2238 100644 --- a/website/docs/api/transformer.md +++ b/website/docs/api/transformer.md @@ -11,7 +11,7 @@ api_string_name: transformer > #### Installation > > ```bash -> $ pip install spacy-transformers +> $ pip install -U %%SPACY_PKG_NAME[transformers] %%SPACY_PKG_FLAGS > ``` @@ -385,12 +385,12 @@ are wrapped into the by this class. Instances of this class are typically assigned to the [`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute. -| Name | Description | -| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ | +| Name | Description | +| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ | | `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ | -| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | -| `width` | The width of the last hidden layer. ~~int~~ | +| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | +| `width` | The width of the last hidden layer. ~~int~~ | ### TransformerData.empty {#transformerdata-emoty tag="classmethod"} @@ -406,13 +406,13 @@ Holds a batch of input and output objects for a transformer model. The data can then be split to a list of [`TransformerData`](/api/transformer#transformerdata) objects to associate the outputs to each [`Doc`](/api/doc) in the batch. -| Name | Description | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Name | Description | +| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `spans` | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ | -| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ | -| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ | -| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | -| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ | +| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ | +| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ | +| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ | +| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ | ### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"} diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index c615097d6..c0611787b 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -216,8 +216,7 @@ in `/opt/nvidia/cuda`, you would run: ```bash ### Installation with CUDA $ export CUDA_PATH="/opt/nvidia/cuda" -$ pip install cupy-cuda102 -$ pip install spacy-transformers +$ pip install -U %%SPACY_PKG_NAME[cud102,transformers]%%SPACY_PKG_FLAGS ``` ### Runtime usage {#transformers-runtime} diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md index e0a4fdb07..398f97bb4 100644 --- a/website/docs/usage/index.md +++ b/website/docs/usage/index.md @@ -47,7 +47,7 @@ Before you install spaCy and its dependencies, make sure that your `pip`, ```bash $ pip install -U pip setuptools wheel -$ pip install -U spacy +$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS ``` When using pip it is generally recommended to install packages in a virtual @@ -57,7 +57,7 @@ environment to avoid modifying system state: $ python -m venv .env $ source .env/bin/activate $ pip install -U pip setuptools wheel -$ pip install spacy +$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS ``` spaCy also lets you install extra dependencies by specifying the following @@ -68,15 +68,16 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included. > #### Example > > ```bash -> $ pip install spacy[lookups,transformers] +> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS > ``` -| Name | Description | -| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. | -| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. | -| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. | -| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). | +| Name | Description | +| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. | +| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. | +| `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). | +| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. | +| `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). | ### conda {#conda} @@ -88,8 +89,8 @@ $ conda install -c conda-forge spacy ``` For the feedstock including the build recipe and configuration, check out -[this repository](https://github.com/conda-forge/spacy-feedstock). Improvements -and pull requests to the recipe and setup are always appreciated. +[this repository](https://github.com/conda-forge/spacy-feedstock). Note that we +currently don't publish any [pre-releases](#changelog-pre) on conda. ### Upgrading spaCy {#upgrading} @@ -116,7 +117,7 @@ are printed. It's recommended to run the command with `python -m` to make sure you're executing the correct version of spaCy. ```cli -$ pip install -U spacy +$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS $ python -m spacy validate ``` @@ -134,7 +135,7 @@ specifier allows cupy to be installed via wheel, saving some compilation time. The specifiers should install [`cupy`](https://cupy.chainer.org). ```bash -$ pip install -U spacy[cuda92] +$ pip install -U %%SPACY_PKG_NAME[cuda92]%%SPACY_PKG_FLAGS ``` Once you have a GPU-enabled installation, the best way to activate it is to call diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md index 1964bac18..f669c0a84 100644 --- a/website/docs/usage/linguistic-features.md +++ b/website/docs/usage/linguistic-features.md @@ -166,7 +166,7 @@ lookup lemmatizer looks up the token surface form in the lookup table without reference to the token's part-of-speech or context. ```python -# pip install spacy-lookups-data +# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS import spacy nlp = spacy.blank("sv") @@ -181,7 +181,7 @@ rule-based lemmatizer can be added using rule tables from [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data): ```python -# pip install spacy-lookups-data +# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS import spacy nlp = spacy.blank("de") @@ -1801,7 +1801,10 @@ print(doc2[5].tag_, doc2[5].pos_) # WP PRON -The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details. +The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph +rules** in the v2.x format via its built-in methods or when the component is +initialized before training. See the +[migration guide](/usage/v3#migrating-training-mappings-exceptions) for details. diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md index fe3ee6e04..8c8875b9e 100644 --- a/website/docs/usage/models.md +++ b/website/docs/usage/models.md @@ -54,7 +54,7 @@ contribute to development. > separately in the same environment: > > ```bash -> $ pip install spacy[lookups] +> $ pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS > ``` import Languages from 'widgets/languages.js' @@ -287,7 +287,7 @@ The download command will [install the package](/usage/models#download-pip) via pip and place the package in your `site-packages` directory. ```cli -$ pip install -U spacy +$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS $ python -m spacy download en_core_web_sm ``` diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md index 5fced922d..409236fbc 100644 --- a/website/docs/usage/projects.md +++ b/website/docs/usage/projects.md @@ -813,7 +813,7 @@ full embedded visualizer, as well as individual components. > #### Installation > > ```bash -> $ pip install "spacy-streamlit>=1.0.0a0" +> $ pip install spacy-streamlit --pre > ``` ![](../images/spacy-streamlit.png) @@ -911,7 +911,7 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main. > #### Installation > > ```cli -> $ pip install spacy-ray +> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS > # Check that the CLI is registered > $ python -m spacy ray --help > ``` diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md index e63e25e52..04924a431 100644 --- a/website/docs/usage/training.md +++ b/website/docs/usage/training.md @@ -1249,7 +1249,7 @@ valid. > #### Installation > > ```cli -> $ pip install spacy-ray +> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS > # Check that the CLI is registered > $ python -m spacy ray --help > ``` diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md index 1024a2551..0f30029e7 100644 --- a/website/docs/usage/v3.md +++ b/website/docs/usage/v3.md @@ -236,7 +236,7 @@ treebank. > #### Example > > ```cli -> $ pip install spacy-ray +> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS > # Check that the CLI is registered > $ python -m spacy ray --help > # Train a pipeline @@ -272,7 +272,7 @@ add to your pipeline and customize for your use case: > #### Example > > ```python -> # pip install spacy-lookups-data +> # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS > nlp = spacy.blank("en") > nlp.add_pipe("lemmatizer") > ``` diff --git a/website/gatsby-config.js b/website/gatsby-config.js index 4650711ac..5b11f56bc 100644 --- a/website/gatsby-config.js +++ b/website/gatsby-config.js @@ -30,6 +30,8 @@ const branch = isNightly ? 'develop' : 'master' const replacements = { GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`, GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`, + SPACY_PKG_NAME: isNightly ? 'spacy-nightly' : 'spacy', + SPACY_PKG_FLAGS: isNightly ? ' --pre' : '', } /** diff --git a/website/src/widgets/changelog.js b/website/src/widgets/changelog.js index 73890d320..c5aca9b62 100644 --- a/website/src/widgets/changelog.js +++ b/website/src/widgets/changelog.js @@ -97,7 +97,10 @@ const Changelog = () => {

Pre-releases include alpha and beta versions, as well as release candidates. They are not intended for production use. You can download spaCy pre-releases via the{' '} - spacy-nightly package on pip. + + spacy-nightly + {' '} + package on pip.

diff --git a/website/src/widgets/landing.js b/website/src/widgets/landing.js index 6fe7f4cdf..ac1d7c5c7 100644 --- a/website/src/widgets/landing.js +++ b/website/src/widgets/landing.js @@ -28,7 +28,8 @@ import irlBackground from '../images/spacy-irl.jpg' import Benchmarks from 'usage/_benchmarks-models.md' -const CODE_EXAMPLE = `# pip install spacy +function getCodeExample(nightly) { + return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'} # python -m spacy download en_core_web_sm import spacy @@ -52,9 +53,11 @@ print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"]) for entity in doc.ents: print(entity.text, entity.label_) ` +} const Landing = ({ data }) => { - const { counts } = data + const { counts, nightly } = data + const codeExample = getCodeExample(nightly) return ( <> @@ -91,7 +94,7 @@ const Landing = ({ data }) => { - {CODE_EXAMPLE} + {codeExample}

Features

diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js index ab91b8e30..37ae10da4 100644 --- a/website/src/widgets/quickstart-install.js +++ b/website/src/widgets/quickstart-install.js @@ -141,6 +141,11 @@ const QuickstartInstall = ({ id, title }) => { setters={setters} showDropdown={showDropdown} > + {nightly && ( + + # 🚨 Nightly releases are currently only available via pip + + )} python -m venv .env source .env/bin/activate @@ -175,9 +180,9 @@ const QuickstartInstall = ({ id, title }) => { pip install -r requirements.txt python setup.py build_ext --inplace - - pip install -e '.[{pipExtras}]' - + {(train || hardware == 'gpu') && ( + pip install -e '.[{pipExtras}]' + )} conda install -c conda-forge spacy-transformers