mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Update docs and install extras [ci skip]
This commit is contained in:
parent
eb28e8ce35
commit
43e59bb22a
|
@ -68,6 +68,8 @@ lookups =
|
|||
spacy_lookups_data>=1.0.0rc0,<1.0.0
|
||||
transformers =
|
||||
spacy_transformers>=1.0.0a17,<1.0.0
|
||||
ray =
|
||||
spacy_ray>=0.0.1,<1.0.0
|
||||
cuda =
|
||||
cupy>=5.0.0b4,<9.0.0
|
||||
cuda80 =
|
||||
|
|
|
@ -11,7 +11,7 @@ api_string_name: transformer
|
|||
> #### Installation
|
||||
>
|
||||
> ```bash
|
||||
> $ pip install spacy-transformers
|
||||
> $ pip install -U %%SPACY_PKG_NAME[transformers] %%SPACY_PKG_FLAGS
|
||||
> ```
|
||||
|
||||
<Infobox title="Important note" variant="warning">
|
||||
|
@ -385,12 +385,12 @@ are wrapped into the
|
|||
by this class. Instances of this class are typically assigned to the
|
||||
[`Doc._.trf_data`](/api/transformer#custom-attributes) extension attribute.
|
||||
|
||||
| Name | Description |
|
||||
| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ |
|
||||
| Name | Description |
|
||||
| --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `tokens` | A slice of the tokens data produced by the tokenizer. This may have several fields, including the token IDs, the texts and the attention mask. See the [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) object for details. ~~dict~~ |
|
||||
| `tensors` | The activations for the `Doc` from the transformer. Usually the last tensor that is 3-dimensional will be the most important, as that will provide the final hidden state. Generally activations that are 2-dimensional will be attention weights. Details of this variable will differ depending on the underlying transformer model. ~~List[FloatsXd]~~ |
|
||||
| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
|
||||
| `width` | The width of the last hidden layer. ~~int~~ |
|
||||
| `align` | Alignment from the `Doc`'s tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
|
||||
| `width` | The width of the last hidden layer. ~~int~~ |
|
||||
|
||||
### TransformerData.empty {#transformerdata-emoty tag="classmethod"}
|
||||
|
||||
|
@ -406,13 +406,13 @@ Holds a batch of input and output objects for a transformer model. The data can
|
|||
then be split to a list of [`TransformerData`](/api/transformer#transformerdata)
|
||||
objects to associate the outputs to each [`Doc`](/api/doc) in the batch.
|
||||
|
||||
| Name | Description |
|
||||
| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| Name | Description |
|
||||
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `spans` | The batch of input spans. The outer list refers to the Doc objects in the batch, and the inner list are the spans for that `Doc`. Note that spans are allowed to overlap or exclude tokens, but each `Span` can only refer to one `Doc` (by definition). This means that within a `Doc`, the regions of the output tensors that correspond to each `Span` may overlap or have gaps, but for each `Doc`, there is a non-overlapping contiguous slice of the outputs. ~~List[List[Span]]~~ |
|
||||
| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ |
|
||||
| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ |
|
||||
| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
|
||||
| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ |
|
||||
| `tokens` | The output of the tokenizer. ~~transformers.BatchEncoding~~ |
|
||||
| `tensors` | The output of the transformer model. ~~List[torch.Tensor]~~ |
|
||||
| `align` | Alignment from the spaCy tokenization to the wordpieces. This is a ragged array, where `align.lengths[i]` indicates the number of wordpiece tokens that token `i` aligns against. The actual indices are provided at `align[i].dataXd`. ~~Ragged~~ |
|
||||
| `doc_data` | The outputs, split per `Doc` object. ~~List[TransformerData]~~ |
|
||||
|
||||
### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
|
||||
|
||||
|
|
|
@ -216,8 +216,7 @@ in `/opt/nvidia/cuda`, you would run:
|
|||
```bash
|
||||
### Installation with CUDA
|
||||
$ export CUDA_PATH="/opt/nvidia/cuda"
|
||||
$ pip install cupy-cuda102
|
||||
$ pip install spacy-transformers
|
||||
$ pip install -U %%SPACY_PKG_NAME[cud102,transformers]%%SPACY_PKG_FLAGS
|
||||
```
|
||||
|
||||
### Runtime usage {#transformers-runtime}
|
||||
|
|
|
@ -47,7 +47,7 @@ Before you install spaCy and its dependencies, make sure that your `pip`,
|
|||
|
||||
```bash
|
||||
$ pip install -U pip setuptools wheel
|
||||
$ pip install -U spacy
|
||||
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
|
||||
```
|
||||
|
||||
When using pip it is generally recommended to install packages in a virtual
|
||||
|
@ -57,7 +57,7 @@ environment to avoid modifying system state:
|
|||
$ python -m venv .env
|
||||
$ source .env/bin/activate
|
||||
$ pip install -U pip setuptools wheel
|
||||
$ pip install spacy
|
||||
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
|
||||
```
|
||||
|
||||
spaCy also lets you install extra dependencies by specifying the following
|
||||
|
@ -68,15 +68,16 @@ spaCy's [`setup.cfg`](%%GITHUB_SPACY/setup.cfg) for details on what's included.
|
|||
> #### Example
|
||||
>
|
||||
> ```bash
|
||||
> $ pip install spacy[lookups,transformers]
|
||||
> $ pip install %%SPACY_PKG_NAME[lookups,transformers]%%SPACY_PKG_FLAGS
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
|
||||
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
|
||||
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
|
||||
| `ja`, `ko`, `th` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
|
||||
| Name | Description |
|
||||
| ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `lookups` | Install [`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data) for data tables for lemmatization and lexeme normalization. The data is serialized with trained pipelines, so you only need this package if you want to train your own models. |
|
||||
| `transformers` | Install [`spacy-transformers`](https://github.com/explosion/spacy-transformers). The package will be installed automatically when you install a transformer-based pipeline. |
|
||||
| `ray` | Install [`spacy-ray`](https://github.com/explosion/spacy-ray) to add CLI commands for [parallel training](/usage/training#parallel-training). |
|
||||
| `cuda`, ... | Install spaCy with GPU support provided by [CuPy](https://cupy.chainer.org) for your given CUDA version. See the GPU [installation instructions](#gpu) for details and options. |
|
||||
| `ja`, `ko`, `th`, `zh` | Install additional dependencies required for tokenization for the [languages](/usage/models#languages). |
|
||||
|
||||
### conda {#conda}
|
||||
|
||||
|
@ -88,8 +89,8 @@ $ conda install -c conda-forge spacy
|
|||
```
|
||||
|
||||
For the feedstock including the build recipe and configuration, check out
|
||||
[this repository](https://github.com/conda-forge/spacy-feedstock). Improvements
|
||||
and pull requests to the recipe and setup are always appreciated.
|
||||
[this repository](https://github.com/conda-forge/spacy-feedstock). Note that we
|
||||
currently don't publish any [pre-releases](#changelog-pre) on conda.
|
||||
|
||||
### Upgrading spaCy {#upgrading}
|
||||
|
||||
|
@ -116,7 +117,7 @@ are printed. It's recommended to run the command with `python -m` to make sure
|
|||
you're executing the correct version of spaCy.
|
||||
|
||||
```cli
|
||||
$ pip install -U spacy
|
||||
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
|
||||
$ python -m spacy validate
|
||||
```
|
||||
|
||||
|
@ -134,7 +135,7 @@ specifier allows cupy to be installed via wheel, saving some compilation time.
|
|||
The specifiers should install [`cupy`](https://cupy.chainer.org).
|
||||
|
||||
```bash
|
||||
$ pip install -U spacy[cuda92]
|
||||
$ pip install -U %%SPACY_PKG_NAME[cuda92]%%SPACY_PKG_FLAGS
|
||||
```
|
||||
|
||||
Once you have a GPU-enabled installation, the best way to activate it is to call
|
||||
|
|
|
@ -166,7 +166,7 @@ lookup lemmatizer looks up the token surface form in the lookup table without
|
|||
reference to the token's part-of-speech or context.
|
||||
|
||||
```python
|
||||
# pip install spacy-lookups-data
|
||||
# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
|
||||
import spacy
|
||||
|
||||
nlp = spacy.blank("sv")
|
||||
|
@ -181,7 +181,7 @@ rule-based lemmatizer can be added using rule tables from
|
|||
[`spacy-lookups-data`](https://github.com/explosion/spacy-lookups-data):
|
||||
|
||||
```python
|
||||
# pip install spacy-lookups-data
|
||||
# pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
|
||||
import spacy
|
||||
|
||||
nlp = spacy.blank("de")
|
||||
|
@ -1801,7 +1801,10 @@ print(doc2[5].tag_, doc2[5].pos_) # WP PRON
|
|||
|
||||
<Infobox variant="warning" title="Migrating from spaCy v2.x">
|
||||
|
||||
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph rules** in the v2.x format via its built-in methods or when the component is initialized before training. See the [migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
|
||||
The [`AttributeRuler`](/api/attributeruler) can import a **tag map and morph
|
||||
rules** in the v2.x format via its built-in methods or when the component is
|
||||
initialized before training. See the
|
||||
[migration guide](/usage/v3#migrating-training-mappings-exceptions) for details.
|
||||
|
||||
</Infobox>
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ contribute to development.
|
|||
> separately in the same environment:
|
||||
>
|
||||
> ```bash
|
||||
> $ pip install spacy[lookups]
|
||||
> $ pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
|
||||
> ```
|
||||
|
||||
import Languages from 'widgets/languages.js'
|
||||
|
@ -287,7 +287,7 @@ The download command will [install the package](/usage/models#download-pip) via
|
|||
pip and place the package in your `site-packages` directory.
|
||||
|
||||
```cli
|
||||
$ pip install -U spacy
|
||||
$ pip install -U %%SPACY_PKG_NAME%%SPACY_PKG_FLAGS
|
||||
$ python -m spacy download en_core_web_sm
|
||||
```
|
||||
|
||||
|
|
|
@ -813,7 +813,7 @@ full embedded visualizer, as well as individual components.
|
|||
> #### Installation
|
||||
>
|
||||
> ```bash
|
||||
> $ pip install "spacy-streamlit>=1.0.0a0"
|
||||
> $ pip install spacy-streamlit --pre
|
||||
> ```
|
||||
|
||||
![](../images/spacy-streamlit.png)
|
||||
|
@ -911,7 +911,7 @@ https://github.com/explosion/projects/blob/v3/integrations/fastapi/scripts/main.
|
|||
> #### Installation
|
||||
>
|
||||
> ```cli
|
||||
> $ pip install spacy-ray
|
||||
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
|
||||
> # Check that the CLI is registered
|
||||
> $ python -m spacy ray --help
|
||||
> ```
|
||||
|
|
|
@ -1249,7 +1249,7 @@ valid.
|
|||
> #### Installation
|
||||
>
|
||||
> ```cli
|
||||
> $ pip install spacy-ray
|
||||
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
|
||||
> # Check that the CLI is registered
|
||||
> $ python -m spacy ray --help
|
||||
> ```
|
||||
|
|
|
@ -236,7 +236,7 @@ treebank.
|
|||
> #### Example
|
||||
>
|
||||
> ```cli
|
||||
> $ pip install spacy-ray
|
||||
> $ pip install -U %%SPACY_PKG_NAME[ray]%%SPACY_PKG_FLAGS
|
||||
> # Check that the CLI is registered
|
||||
> $ python -m spacy ray --help
|
||||
> # Train a pipeline
|
||||
|
@ -272,7 +272,7 @@ add to your pipeline and customize for your use case:
|
|||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> # pip install spacy-lookups-data
|
||||
> # pip install -U %%SPACY_PKG_NAME[lookups]%%SPACY_PKG_FLAGS
|
||||
> nlp = spacy.blank("en")
|
||||
> nlp.add_pipe("lemmatizer")
|
||||
> ```
|
||||
|
|
|
@ -30,6 +30,8 @@ const branch = isNightly ? 'develop' : 'master'
|
|||
const replacements = {
|
||||
GITHUB_SPACY: `https://github.com/explosion/spaCy/tree/${branch}`,
|
||||
GITHUB_PROJECTS: `https://github.com/${site.projectsRepo}`,
|
||||
SPACY_PKG_NAME: isNightly ? 'spacy-nightly' : 'spacy',
|
||||
SPACY_PKG_FLAGS: isNightly ? ' --pre' : '',
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -97,7 +97,10 @@ const Changelog = () => {
|
|||
<p>
|
||||
Pre-releases include alpha and beta versions, as well as release candidates. They
|
||||
are not intended for production use. You can download spaCy pre-releases via the{' '}
|
||||
<InlineCode>spacy-nightly</InlineCode> package on pip.
|
||||
<Link to="https://pypi.org/packages/spacy-nightly">
|
||||
<InlineCode>spacy-nightly</InlineCode>
|
||||
</Link>{' '}
|
||||
package on pip.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
|
|
|
@ -28,7 +28,8 @@ import irlBackground from '../images/spacy-irl.jpg'
|
|||
|
||||
import Benchmarks from 'usage/_benchmarks-models.md'
|
||||
|
||||
const CODE_EXAMPLE = `# pip install spacy
|
||||
function getCodeExample(nightly) {
|
||||
return `# pip install -U ${nightly ? 'spacy-nightly --pre' : 'spacy'}
|
||||
# python -m spacy download en_core_web_sm
|
||||
import spacy
|
||||
|
||||
|
@ -52,9 +53,11 @@ print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])
|
|||
for entity in doc.ents:
|
||||
print(entity.text, entity.label_)
|
||||
`
|
||||
}
|
||||
|
||||
const Landing = ({ data }) => {
|
||||
const { counts } = data
|
||||
const { counts, nightly } = data
|
||||
const codeExample = getCodeExample(nightly)
|
||||
return (
|
||||
<>
|
||||
<LandingHeader nightly={data.nightly}>
|
||||
|
@ -91,7 +94,7 @@ const Landing = ({ data }) => {
|
|||
</LandingGrid>
|
||||
|
||||
<LandingGrid>
|
||||
<LandingDemo title="Edit the code & try spaCy">{CODE_EXAMPLE}</LandingDemo>
|
||||
<LandingDemo title="Edit the code & try spaCy">{codeExample}</LandingDemo>
|
||||
|
||||
<LandingCol>
|
||||
<H2>Features</H2>
|
||||
|
|
|
@ -141,6 +141,11 @@ const QuickstartInstall = ({ id, title }) => {
|
|||
setters={setters}
|
||||
showDropdown={showDropdown}
|
||||
>
|
||||
{nightly && (
|
||||
<QS package="conda" comment prompt={false}>
|
||||
# 🚨 Nightly releases are currently only available via pip
|
||||
</QS>
|
||||
)}
|
||||
<QS config="venv">python -m venv .env</QS>
|
||||
<QS config="venv" os="mac">
|
||||
source .env/bin/activate
|
||||
|
@ -175,9 +180,9 @@ const QuickstartInstall = ({ id, title }) => {
|
|||
</QS>
|
||||
<QS package="source">pip install -r requirements.txt</QS>
|
||||
<QS package="source">python setup.py build_ext --inplace</QS>
|
||||
<QS package="source" config="train">
|
||||
pip install -e '.[{pipExtras}]'
|
||||
</QS>
|
||||
{(train || hardware == 'gpu') && (
|
||||
<QS package="source">pip install -e '.[{pipExtras}]'</QS>
|
||||
)}
|
||||
|
||||
<QS config="train" package="conda">
|
||||
conda install -c conda-forge spacy-transformers
|
||||
|
|
Loading…
Reference in New Issue
Block a user