Update docs and install extras [ci skip]

This commit is contained in:
Ines Montani 2020-10-08 10:58:50 +02:00
parent eb28e8ce35
commit 43e59bb22a
13 changed files with 62 additions and 44 deletions

View File

@ -68,6 +68,8 @@ lookups =
spacy_lookups_data>=1.0.0rc0,<1.0.0
transformers =
spacy_transformers>=1.0.0a17,<1.0.0
ray =
spacy_ray>=0.0.1,<1.0.0
cuda =
cupy>=5.0.0b4,<9.0.0
cuda80 =

View File

@ -11,7 +11,7 @@ api_string_name: transformer
> #### Installation
>
> ```bash
> $ pip install spacy-transformers
> $ pip install -U %%SPACY_PKG_NAME[transformers] %%SPACY_PKG_FLAGS
> ```
<Infobox title="Important note" variant="warning">
@ -385,12 +385,12 @@ are wrapped into the
by this class. Instances of this class are typically assigned to the
[`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute.
| Name | Description |
| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ |
| Name | Description |
| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ |
| `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ |
| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
| `width` | The width of the last hidden layer. ~~int~~ |
| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
| `width` | The width of the last hidden layer. ~~int~~ |
### TransformerData.empty {#transformerdata-emoty tag="classmethod"}
@ -406,13 +406,13 @@ Holds a batch of input and output objects for a transformer model. The data can
then be split to a list of [`TransformerData`](/api/transformer#transformerdata)
objects to associate the outputs to each [`Doc`](/api/doc) in the batch.
| Name | Description |
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| Name | Description |
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `spans` | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ |
| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ |
| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ |
| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ |
| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ |
| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ |
| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ |
### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}

View File

@ -216,8 +216,7 @@ in `/opt/nvidia/cuda`, you would run:
```bash
### Installation with CUDA
$ export CUDA_PATH="/opt/nvidia/cuda"
$ pip install cupy-cuda102
$ pip install spacy-transformers
$ pip install -U %%SPACY_PKG_NAME[cud102,transformers]%%SPACY_PKG_FLAGS
```
### Runtime usage {#transformers-runtime}

View File

@ -47,7 +47,7 @@ Before you install spaCy and its dependencies, make sure that your `pip`,
```bash
$ pip install -U pip setuptools wheel
$ pip install -U spacy
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
```
When using pip it is generally recommended to install packages in a virtual
@ -57,7 +57,7 @@ environment to avoid modifying system state:
$ python -m venv .env
$ source .env/bin/activate
$ pip install -U pip setuptools wheel
$ pip install spacy
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
```
spaCy also lets you install extra dependencies by specifying the following
@ -68,15 +68,16 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
> #### Example
>
> ```bash
> $ pip install spacy[lookups,transformers]
> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
> ```
| Name | Description |
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
| Name | Description |
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
| `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). |
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
| `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
### conda {#conda}
@ -88,8 +89,8 @@ $ conda install -c conda-forge spacy
```
For the feedstock including the build recipe and configuration, check out
[this repository](https://github.com/conda-forge/spacy-feedstock). Improvements
and pull requests to the recipe and setup are always appreciated.
[this repository](https://github.com/conda-forge/spacy-feedstock). Note that we
currently don't publish any [pre-releases](#changelog-pre) on conda.
### Upgrading spaCy {#upgrading}
@ -116,7 +117,7 @@ are printed. It's recommended to run the command with `python -m` to make sure
you're executing the correct version of spaCy.
```cli
$ pip install -U spacy
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
$ python -m spacy validate
```
@ -134,7 +135,7 @@ specifier allows cupy to be installed via wheel, saving some compilation time.
The specifiers should install [`cupy`](https://cupy.chainer.org).
```bash
$ pip install -U spacy[cuda92]
$ pip install -U %%SPACY_PKG_NAME[cuda92]%%SPACY_PKG_FLAGS
```
Once you have a GPU-enabled installation, the best way to activate it is to call

View File

@ -166,7 +166,7 @@ lookup lemmatizer looks up the token surface form in the lookup table without
reference to the token's part-of-speech or context.
```python
# pip install spacy-lookups-data
# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
import spacy
nlp = spacy.blank("sv")
@ -181,7 +181,7 @@ rule-based lemmatizer can be added using rule tables from
[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data):
```python
# pip install spacy-lookups-data
# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
import spacy
nlp = spacy.blank("de")
@ -1801,7 +1801,10 @@ print(doc2[5].tag_, doc2[5].pos_) # WP PRON
<Infobox variant="warning" title="Migrating from spaCy v2.x">
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph
rules** in the v2.x format via its built-in methods or when the component is
initialized before training. See the
[migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
</Infobox>

View File

@ -54,7 +54,7 @@ contribute to development.
> separately in the same environment:
>
> ```bash
> $ pip install spacy[lookups]
> $ pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
> ```
import Languages from 'widgets/languages.js'
@ -287,7 +287,7 @@ The download command will [install the package](/usage/models#download-pip) via
pip and place the package in your `site-packages` directory.
```cli
$ pip install -U spacy
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
$ python -m spacy download en_core_web_sm
```

View File

@ -813,7 +813,7 @@ full embedded visualizer, as well as individual components.
> #### Installation
>
> ```bash
> $ pip install "spacy-streamlit>=1.0.0a0"
> $ pip install spacy-streamlit --pre
> ```
![](../images/spacy-streamlit.png)
@ -911,7 +911,7 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
> #### Installation
>
> ```cli
> $ pip install spacy-ray
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered
> $ python -m spacy ray --help
> ```

View File

@ -1249,7 +1249,7 @@ valid.
> #### Installation
>
> ```cli
> $ pip install spacy-ray
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered
> $ python -m spacy ray --help
> ```

View File

@ -236,7 +236,7 @@ treebank.
> #### Example
>
> ```cli
> $ pip install spacy-ray
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
> # Check that the CLI is registered
> $ python -m spacy ray --help
> # Train a pipeline
@ -272,7 +272,7 @@ add to your pipeline and customize for your use case:
> #### Example
>
> ```python
> # pip install spacy-lookups-data
> # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
> nlp = spacy.blank("en")
> nlp.add_pipe("lemmatizer")
> ```

View File

@ -30,6 +30,8 @@ const branch = isNightly ? 'develop' : 'master'
const replacements = {
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`,
SPACY_PKG_NAME: isNightly ? 'spacy-nightly' : 'spacy',
SPACY_PKG_FLAGS: isNightly ? ' --pre' : '',
}
/**

View File

@ -97,7 +97,10 @@ const Changelog = () => {
<p>
Pre-releases include alpha and beta versions, as well as release candidates. They
are not intended for production use. You can download spaCy pre-releases via the{' '}
<InlineCode>spacy-nightly</InlineCode> package on pip.
<Link to="https://pypi.org/packages/spacy-nightly">
<InlineCode>spacy-nightly</InlineCode>
</Link>{' '}
package on pip.
</p>
<p>

View File

@ -28,7 +28,8 @@ import irlBackground from '../images/spacy-irl.jpg'
import Benchmarks from 'usage/_benchmarks-models.md'
const CODE_EXAMPLE = `# pip install spacy
function getCodeExample(nightly) {
return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
# python -m spacy download en_core_web_sm
import spacy
@ -52,9 +53,11 @@ print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
for entity in doc.ents:
print(entity.text, entity.label_)
`
}
const Landing = ({ data }) => {
const { counts } = data
const { counts, nightly } = data
const codeExample = getCodeExample(nightly)
return (
<>
<LandingHeader nightly={data.nightly}>
@ -91,7 +94,7 @@ const Landing = ({ data }) => {
</LandingGrid>
<LandingGrid>
<LandingDemo title="Edit the code & try spaCy">{CODE_EXAMPLE}</LandingDemo>
<LandingDemo title="Edit the code &amp; try spaCy">{codeExample}</LandingDemo>
<LandingCol>
<H2>Features</H2>

View File

@ -141,6 +141,11 @@ const QuickstartInstall = ({ id, title }) => {
setters={setters}
showDropdown={showDropdown}
>
{nightly && (
<QS package="conda" comment prompt={false}>
# 🚨 Nightly releases are currently only available via pip
</QS>
)}
<QS config="venv">python -m venv .env</QS>
<QS config="venv" os="mac">
source .env/bin/activate
@ -175,9 +180,9 @@ const QuickstartInstall = ({ id, title }) => {
</QS>
<QS package="source">pip install -r requirements.txt</QS>
<QS package="source">python setup.py build_ext --inplace</QS>
<QS package="source" config="train">
pip install -e '.[{pipExtras}]'
</QS>
{(train || hardware == 'gpu') && (
<QS package="source">pip install -e '.[{pipExtras}]'</QS>
)}
<QS config="train" package="conda">
conda install -c conda-forge spacy-transformers