diff --git a/setup.cfg b/setup.cfg index ff12d511a..2b70e565d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,7 @@ console_scripts = lookups = spacy_lookups_data>=1.0.2,<1.1.0 transformers = - spacy_transformers>=1.0.1,<1.1.0 + spacy_transformers>=1.0.1,<1.2.0 ray = spacy_ray>=0.1.0,<1.0.0 cuda = diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index 339fb1e96..bb4061177 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -32,7 +32,7 @@ batch_size = {{ 128 if hardware == "gpu" else 1000 }} factory = "transformer" [components.transformer.model] -@architectures = "spacy-transformers.TransformerModel.v1" +@architectures = "spacy-transformers.TransformerModel.v3" name = "{{ transformer["name"] }}" tokenizer_config = {"use_fast": true} diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index ceeb388ab..047de0164 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -332,15 +332,18 @@ for details and system requirements. -### spacy-transformers.TransformerModel.v1 {#TransformerModel} +### spacy-transformers.TransformerModel.v3 {#TransformerModel} > #### Example Config > > ```ini > [model] -> @architectures = "spacy-transformers.TransformerModel.v1" +> @architectures = "spacy-transformers.TransformerModel.v3" > name = "roberta-base" > tokenizer_config = {"use_fast": true} +> transformer_config = {} +> mixed_precision = true +> grad_scaler_config = {"init_scale": 32768} > > [model.get_spans] > @span_getters = "spacy-transformers.strided_spans.v1" @@ -366,12 +369,31 @@ transformer weights across your pipeline. For a layer that's configured for use in other components, see [Tok2VecTransformer](/api/architectures#Tok2VecTransformer). -| Name | Description | -| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `name` | Any model name that can be loaded by [`transformers.AutoModel`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoModel). ~~str~~ | -| `get_spans` | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ | -| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~ | -| **CREATES** | The model using the architecture. ~~Model[List[Doc], FullTransformerBatch]~~ | +| Name | Description | +|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `name` | Any model name that can be loaded by [`transformers.AutoModel`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoModel). ~~str~~ | +| `get_spans` | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ | +| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~ | +| `transformer_config` | Settings to pass to the transformers forward pass. ~~Dict[str, Any]~~ | +| `mixed_precision` | Replace whitelisted ops by half-precision counterparts. Speeds up training and prediction on GPUs with [Tensor Cores](https://developer.nvidia.com/tensor-cores) and reduces GPU memory use. ~~bool~~ | +| `grad_scaler_config` | Configuration to pass to `thinc.api.PyTorchGradScaler` during training when `mixed_precision` is enabled. ~~Dict[str, Any]~~ | +| **CREATES** | The model using the architecture. ~~Model[List[Doc], FullTransformerBatch]~~ | +| | | + + +Mixed-precision support is currently an experimental feature. + + + + +* The `transformer_config` argument was added in +`spacy-transformers.TransformerModel.v2`. +* The `mixed_precision` and `grad_scaler_config` arguments were added in +`spacy-transformers.TransformerModel.v3`. + +The other arguments are shared between all versions. + + ### spacy-transformers.TransformerListener.v1 {#TransformerListener} @@ -403,16 +425,19 @@ a single token vector given zero or more wordpiece vectors. | `upstream` | A string to identify the "upstream" `Transformer` component to communicate with. By default, the upstream name is the wildcard string `"*"`, but you could also specify the name of the `Transformer` component. You'll almost never have multiple upstream `Transformer` components, so the wildcard string will almost always be fine. ~~str~~ | | **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | -### spacy-transformers.Tok2VecTransformer.v1 {#Tok2VecTransformer} +### spacy-transformers.Tok2VecTransformer.v3 {#Tok2VecTransformer} > #### Example Config > > ```ini > [model] -> @architectures = "spacy-transformers.Tok2VecTransformer.v1" +> @architectures = "spacy-transformers.Tok2VecTransformer.v3" > name = "albert-base-v2" > tokenizer_config = {"use_fast": false} +> transformer_config = {} > grad_factor = 1.0 +> mixed_precision = true +> grad_scaler_config = {"init_scale": 32768} > ``` Use a transformer as a [`Tok2Vec`](/api/tok2vec) layer directly. This does @@ -421,13 +446,32 @@ Use a transformer as a [`Tok2Vec`](/api/tok2vec) layer directly. This does object, but it's a **simpler solution** if you only need the transformer within one component. -| Name | Description | -| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `get_spans` | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ | -| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~ | -| `pooling` | A reduction layer used to calculate the token vectors based on zero or more wordpiece vectors. If in doubt, mean pooling (see [`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean)) is usually a good choice. ~~Model[Ragged, Floats2d]~~ | -| `grad_factor` | Reweight gradients from the component before passing them upstream. You can set this to `0` to "freeze" the transformer weights with respect to the component, or use it to make some components more significant than others. Leaving it at `1.0` is usually fine. ~~float~~ | -| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | +| Name | Description | +|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `get_spans` | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ | +| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~ | +| `transformer_config` | Settings to pass to the transformers forward pass. ~~Dict[str, Any]~~ | +| `pooling` | A reduction layer used to calculate the token vectors based on zero or more wordpiece vectors. If in doubt, mean pooling (see [`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean)) is usually a good choice. ~~Model[Ragged, Floats2d]~~ | +| `grad_factor` | Reweight gradients from the component before passing them upstream. You can set this to `0` to "freeze" the transformer weights with respect to the component, or use it to make some components more significant than others. Leaving it at `1.0` is usually fine. ~~float~~ | +| `mixed_precision` | Replace whitelisted ops by half-precision counterparts. Speeds up training and prediction on GPUs with [Tensor Cores](https://developer.nvidia.com/tensor-cores) and reduces GPU memory use. ~~bool~~ | +| `grad_scaler_config` | Configuration to pass to `thinc.api.PyTorchGradScaler` during training when `mixed_precision` is enabled. ~~Dict[str, Any]~~ | +| **CREATES** | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~ | + + + +Mixed-precision support is currently an experimental feature. + + + + +* The `transformer_config` argument was added in +`spacy-transformers.Tok2VecTransformer.v2`. +* The `mixed_precision` and `grad_scaler_config` arguments were added in +`spacy-transformers.Tok2VecTransformer.v3`. + +The other arguments are shared between all versions. + + ## Pretraining architectures {#pretrain source="spacy/ml/models/multi_task.py"} diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md index 6e68ac599..571fb8d80 100644 --- a/website/docs/api/transformer.md +++ b/website/docs/api/transformer.md @@ -92,9 +92,12 @@ https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/p > # Construction via add_pipe with custom config > config = { > "model": { -> "@architectures": "spacy-transformers.TransformerModel.v1", +> "@architectures": "spacy-transformers.TransformerModel.v3", > "name": "bert-base-uncased", -> "tokenizer_config": {"use_fast": True} +> "tokenizer_config": {"use_fast": True}, +> "transformer_config": {"output_attentions": True}, +> "mixed_precision": True, +> "grad_scaler_config": {"init_scale": 32768} > } > } > trf = nlp.add_pipe("transformer", config=config) diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index 88fb39f61..253b3d0b5 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -351,7 +351,7 @@ factory = "transformer" max_batch_items = 4096 [components.transformer.model] -@architectures = "spacy-transformers.TransformerModel.v1" +@architectures = "spacy-transformers.TransformerModel.v3" name = "bert-base-cased" tokenizer_config = {"use_fast": true} @@ -367,7 +367,7 @@ The `[components.transformer.model]` block describes the `model` argument passed to the transformer component. It's a Thinc [`Model`](https://thinc.ai/docs/api-model) object that will be passed into the component. Here, it references the function -[spacy-transformers.TransformerModel.v1](/api/architectures#TransformerModel) +[spacy-transformers.TransformerModel.v3](/api/architectures#TransformerModel) registered in the [`architectures` registry](/api/top-level#registry). If a key in a block starts with `@`, it's **resolved to a function** and all other settings are passed to the function as arguments. In this case, `name`,