mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
Update docs [ci skip]
This commit is contained in:
parent
9f69afdd1e
commit
9c80cb673d
|
@ -24,10 +24,55 @@ TODO: intro and how architectures work, link to
|
||||||
|
|
||||||
## Transformer architectures {#transformers source="github.com/explosion/spacy-transformers/blob/master/spacy_transformers/architectures.py"}
|
## Transformer architectures {#transformers source="github.com/explosion/spacy-transformers/blob/master/spacy_transformers/architectures.py"}
|
||||||
|
|
||||||
|
The following architectures are provided by the package
|
||||||
|
[`spacy-transformers`](https://github.com/explosion/spacy-transformers). See the
|
||||||
|
[usage documentation](/usage/transformers) for how to integrate the
|
||||||
|
architectures into your training config.
|
||||||
|
|
||||||
### spacy-transformers.TransformerModel.v1 {#TransformerModel}
|
### spacy-transformers.TransformerModel.v1 {#TransformerModel}
|
||||||
|
|
||||||
|
<!-- TODO: description -->
|
||||||
|
|
||||||
|
> #### Example Config
|
||||||
|
>
|
||||||
|
> ```ini
|
||||||
|
> [model]
|
||||||
|
> @architectures = "spacy-transformers.TransformerModel.v1"
|
||||||
|
> name = "roberta-base"
|
||||||
|
> tokenizer_config = {"use_fast": true}
|
||||||
|
>
|
||||||
|
> [model.get_spans]
|
||||||
|
> @span_getters = "strided_spans.v1"
|
||||||
|
> window = 128
|
||||||
|
> stride = 96
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------------------ | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `name` | str | Any model name that can be loaded by [`transformers.AutoModel`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoModel). |
|
||||||
|
| `get_spans` | `Callable` | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. |
|
||||||
|
| `tokenizer_config` | `Dict[str, Any]` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). |
|
||||||
|
|
||||||
### spacy-transformers.Tok2VecListener.v1 {#Tok2VecListener}
|
### spacy-transformers.Tok2VecListener.v1 {#Tok2VecListener}
|
||||||
|
|
||||||
|
<!-- TODO: description -->
|
||||||
|
|
||||||
|
> #### Example Config
|
||||||
|
>
|
||||||
|
> ```ini
|
||||||
|
> [model]
|
||||||
|
> @architectures = "spacy-transformers.Tok2VecListener.v1"
|
||||||
|
> grad_factor = 1.0
|
||||||
|
>
|
||||||
|
> [model.pooling]
|
||||||
|
> @layers = "reduce_mean.v1"
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
| ------------- | ------------------------- | ---------------------------------------------------------------------------------------------- |
|
||||||
|
| `grad_factor` | float | Factor for weighting the gradient if multiple components listen to the same transformer model. |
|
||||||
|
| `pooling` | `Model[Ragged, Floats2d]` | Pooling layer to determine how the vector for each spaCy token will be computed. |
|
||||||
|
|
||||||
## Parser & NER architectures {#parser source="spacy/ml/models/parser.py"}
|
## Parser & NER architectures {#parser source="spacy/ml/models/parser.py"}
|
||||||
|
|
||||||
### spacy.TransitionBasedParser.v1 {#TransitionBasedParser}
|
### spacy.TransitionBasedParser.v1 {#TransitionBasedParser}
|
||||||
|
|
|
@ -366,13 +366,13 @@ Transformer tokens and outputs for one `Doc` object.
|
||||||
|
|
||||||
<!-- TODO: -->
|
<!-- TODO: -->
|
||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- |
|
| ---------- | -------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- |
|
||||||
| `spans` | `List[List[Span]]` | <!-- TODO: --> |
|
| `spans` | `List[List[Span]]` | <!-- TODO: --> |
|
||||||
| `tokens` | [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html?highlight=batchencoding#transformers.BatchEncoding) | <!-- TODO: --> |
|
| `tokens` | [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html#transformers.BatchEncoding) | <!-- TODO: --> |
|
||||||
| `tensors` | `List[torch.Tensor]` | <!-- TODO: --> |
|
| `tensors` | `List[torch.Tensor]` | <!-- TODO: --> |
|
||||||
| `align` | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | <!-- TODO: --> |
|
| `align` | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | <!-- TODO: --> |
|
||||||
| `doc_data` | `List[TransformerData]` | <!-- TODO: also mention it's property --> |
|
| `doc_data` | `List[TransformerData]` | <!-- TODO: also mention it's property --> |
|
||||||
|
|
||||||
### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
|
### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
|
||||||
|
|
||||||
|
|
|
@ -220,15 +220,19 @@ available pipeline components and component functions.
|
||||||
> ruler = nlp.add_pipe("entity_ruler")
|
> ruler = nlp.add_pipe("entity_ruler")
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| String name | Component | Description |
|
| String name | Component | Description |
|
||||||
| --------------- | ------------------------------------------- | ----------------------------------------------------------------------------------------- |
|
| --------------- | ----------------------------------------------- | ----------------------------------------------------------------------------------------- |
|
||||||
| `tagger` | [`Tagger`](/api/tagger) | Assign part-of-speech-tags. |
|
| `tagger` | [`Tagger`](/api/tagger) | Assign part-of-speech-tags. |
|
||||||
| `parser` | [`DependencyParser`](/api/dependencyparser) | Assign dependency labels. |
|
| `parser` | [`DependencyParser`](/api/dependencyparser) | Assign dependency labels. |
|
||||||
| `ner` | [`EntityRecognizer`](/api/entityrecognizer) | Assign named entities. |
|
| `ner` | [`EntityRecognizer`](/api/entityrecognizer) | Assign named entities. |
|
||||||
| `entity_linker` | [`EntityLinker`](/api/entitylinker) | Assign knowledge base IDs to named entities. Should be added after the entity recognizer. |
|
| `entity_linker` | [`EntityLinker`](/api/entitylinker) | Assign knowledge base IDs to named entities. Should be added after the entity recognizer. |
|
||||||
| `textcat` | [`TextCategorizer`](/api/textcategorizer) | Assign text categories. |
|
| `entity_ruler` | [`EntityRuler`](/api/entityruler) | Assign named entities based on pattern rules and dictionaries. |
|
||||||
| `entity_ruler` | [`EntityRuler`](/api/entityruler) | Assign named entities based on pattern rules. |
|
| `textcat` | [`TextCategorizer`](/api/textcategorizer) | Assign text categories. |
|
||||||
| `sentencizer` | [`Sentencizer`](/api/sentencizer) | Add rule-based sentence segmentation without the dependency parse. |
|
| `morphologizer` | [`Morphologizer`](/api/morphologizer) | Assign morphological features and coarse-grained POS tags. |
|
||||||
|
| `senter` | [`SentenceRecognizer`](/api/sentencerecognizer) | Assign sentence boundaries. |
|
||||||
|
| `sentencizer` | [`Sentencizer`](/api/sentencizer) | Add rule-based sentence segmentation without the dependency parse. |
|
||||||
|
| `tok2vec` | [`Tok2Vec`](/api/tok2vec) | <!-- TODO: --> |
|
||||||
|
| `transformer` | [`Transformer`](/api/transformer) | Assign the tokens and outputs of a transformer model. |
|
||||||
|
|
||||||
<!-- TODO: update with more components -->
|
<!-- TODO: update with more components -->
|
||||||
|
|
||||||
|
|
|
@ -101,7 +101,9 @@ evaluate, package and visualize your model.
|
||||||
The `[components]` section in the [`config.cfg`](#TODO:) describes the pipeline
|
The `[components]` section in the [`config.cfg`](#TODO:) describes the pipeline
|
||||||
components and the settings used to construct them, including their model
|
components and the settings used to construct them, including their model
|
||||||
implementation. Here's a config snippet for the
|
implementation. Here's a config snippet for the
|
||||||
[`Transformer`](/api/transformer) component, along with matching Python code:
|
[`Transformer`](/api/transformer) component, along with matching Python code. In
|
||||||
|
this case, the `[components.transformer]` block describes the `transformer`
|
||||||
|
component:
|
||||||
|
|
||||||
> #### Python equivalent
|
> #### Python equivalent
|
||||||
>
|
>
|
||||||
|
@ -257,10 +259,10 @@ grad_factor = 1.0
|
||||||
```
|
```
|
||||||
|
|
||||||
The [Tok2VecListener](/api/architectures#Tok2VecListener) layer expects a
|
The [Tok2VecListener](/api/architectures#Tok2VecListener) layer expects a
|
||||||
[pooling layer](https://thinc.ai/docs/api-layers#reduction-ops), which needs to
|
[pooling layer](https://thinc.ai/docs/api-layers#reduction-ops) as the argument
|
||||||
be of type `Model[Ragged, Floats2d]`. This layer determines how the vector for
|
`pooling`, which needs to be of type `Model[Ragged, Floats2d]`. This layer
|
||||||
each spaCy token will be computed from the zero or more source rows the token is
|
determines how the vector for each spaCy token will be computed from the zero or
|
||||||
aligned against. Here we use the
|
more source rows the token is aligned against. Here we use the
|
||||||
[`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean) layer, which
|
[`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean) layer, which
|
||||||
averages the wordpiece rows. We could instead use `reduce_last`,
|
averages the wordpiece rows. We could instead use `reduce_last`,
|
||||||
[`reduce_max`](https://thinc.ai/docs/api-layers#reduce_max), or a custom
|
[`reduce_max`](https://thinc.ai/docs/api-layers#reduce_max), or a custom
|
||||||
|
|
|
@ -36,13 +36,18 @@ const DATA = [
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
id: 'data',
|
id: 'addition',
|
||||||
title: 'Additional data',
|
title: 'Additions',
|
||||||
multiple: true,
|
multiple: true,
|
||||||
options: [
|
options: [
|
||||||
|
{
|
||||||
|
id: 'transformers',
|
||||||
|
title: 'Transformers',
|
||||||
|
help: 'Use transformers like BERT to train your spaCy models',
|
||||||
|
},
|
||||||
{
|
{
|
||||||
id: 'lookups',
|
id: 'lookups',
|
||||||
title: 'Lemmatization',
|
title: 'Lemmatizer data',
|
||||||
help: 'Install additional lookup tables and rules for lemmatization',
|
help: 'Install additional lookup tables and rules for lemmatization',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
|
@ -86,13 +91,22 @@ const QuickstartInstall = ({ id, title }) => (
|
||||||
set PYTHONPATH=C:\path\to\spaCy
|
set PYTHONPATH=C:\path\to\spaCy
|
||||||
</QS>
|
</QS>
|
||||||
<QS package="source">pip install -r requirements.txt</QS>
|
<QS package="source">pip install -r requirements.txt</QS>
|
||||||
<QS data="lookups" package="pip">
|
<QS addition="transformers" package="pip">
|
||||||
|
pip install -U spacy-lookups-transformers
|
||||||
|
</QS>
|
||||||
|
<QS addition="transformers" package="source">
|
||||||
|
pip install -U spacy-transformers
|
||||||
|
</QS>
|
||||||
|
<QS addition="transformers" package="conda">
|
||||||
|
conda install -c conda-forge spacy-transformers
|
||||||
|
</QS>
|
||||||
|
<QS addition="lookups" package="pip">
|
||||||
pip install -U spacy-lookups-data
|
pip install -U spacy-lookups-data
|
||||||
</QS>
|
</QS>
|
||||||
<QS data="lookups" package="source">
|
<QS addition="lookups" package="source">
|
||||||
pip install -U spacy-lookups-data
|
pip install -U spacy-lookups-data
|
||||||
</QS>
|
</QS>
|
||||||
<QS data="lookups" package="conda">
|
<QS addition="lookups" package="conda">
|
||||||
conda install -c conda-forge spacy-lookups-data
|
conda install -c conda-forge spacy-lookups-data
|
||||||
</QS>
|
</QS>
|
||||||
<QS package="source">python setup.py build_ext --inplace</QS>
|
<QS package="source">python setup.py build_ext --inplace</QS>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user