diff --git a/setup.cfg b/setup.cfg
index 5dd0227f2..e101a2eb6 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -70,7 +70,7 @@ console_scripts =
 lookups =
     spacy_lookups_data>=1.0.2,<1.1.0
 transformers =
-    spacy_transformers>=1.0.1,<1.1.0
+    spacy_transformers>=1.0.1,<1.2.0
 ray =
     spacy_ray>=0.1.0,<1.0.0
 cuda =
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 339fb1e96..bb4061177 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -32,7 +32,7 @@ batch_size = {{ 128 if hardware == "gpu" else 1000 }}
 factory = "transformer"
 
 [components.transformer.model]
-@architectures = "spacy-transformers.TransformerModel.v1"
+@architectures = "spacy-transformers.TransformerModel.v3"
 name = "{{ transformer["name"] }}"
 tokenizer_config = {"use_fast": true}
 
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index ceeb388ab..047de0164 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -332,15 +332,18 @@ for details and system requirements.
 
 
 
-### spacy-transformers.TransformerModel.v1 {#TransformerModel}
+### spacy-transformers.TransformerModel.v3 {#TransformerModel}
 
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy-transformers.TransformerModel.v1"
+> @architectures = "spacy-transformers.TransformerModel.v3"
 > name = "roberta-base"
 > tokenizer_config = {"use_fast": true}
+> transformer_config = {}
+> mixed_precision = true
+> grad_scaler_config = {"init_scale": 32768}
 >
 > [model.get_spans]
 > @span_getters = "spacy-transformers.strided_spans.v1"
@@ -366,12 +369,31 @@ transformer weights across your pipeline. For a layer that's configured for use
 in other components, see
 [Tok2VecTransformer](/api/architectures#Tok2VecTransformer).
 
-| Name               | Description                                                                                                                                                                                                                                           |
-| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`             | Any model name that can be loaded by [`transformers.AutoModel`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoModel). ~~str~~                                                                                              |
-| `get_spans`        | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ |
-| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~                                                                                   |
-| **CREATES**        | The model using the architecture. ~~Model[List[Doc], FullTransformerBatch]~~                                                                                                                                                                          |
+| Name                 | Description                                                                                                                                                                                                                                           |
+|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `name`               | Any model name that can be loaded by [`transformers.AutoModel`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoModel). ~~str~~                                                                                              |
+| `get_spans`          | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~ |
+| `tokenizer_config`   | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~                                                                                   |
+| `transformer_config` | Settings to pass to the transformers forward pass. ~~Dict[str, Any]~~                                                                                                                                                                                 |
+| `mixed_precision`    | Replace whitelisted ops by half-precision counterparts. Speeds up training and prediction on GPUs with [Tensor Cores](https://developer.nvidia.com/tensor-cores) and reduces GPU memory use. ~~bool~~                                                 |
+| `grad_scaler_config` | Configuration to pass to `thinc.api.PyTorchGradScaler` during training when `mixed_precision` is enabled. ~~Dict[str, Any]~~                                                                                                                          |
+| **CREATES**          | The model using the architecture. ~~Model[List[Doc], FullTransformerBatch]~~                                                                                                                                                                          |
+|                      |                                                                                                                                                                                                                                                       |
+
+
+Mixed-precision support is currently an experimental feature.
+
+
+
+
+* The `transformer_config` argument was added in
+`spacy-transformers.TransformerModel.v2`.
+* The `mixed_precision` and `grad_scaler_config` arguments were added in
+`spacy-transformers.TransformerModel.v3`.
+
+The other arguments are shared between all versions.
+
+
 
 ### spacy-transformers.TransformerListener.v1 {#TransformerListener}
 
@@ -403,16 +425,19 @@ a single token vector given zero or more wordpiece vectors.
 | `upstream`    | A string to identify the "upstream" `Transformer` component to communicate with. By default, the upstream name is the wildcard string `"*"`, but you could also specify the name of the `Transformer` component. You'll almost never have multiple upstream `Transformer` components, so the wildcard string will almost always be fine. ~~str~~ |
 | **CREATES**   | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                           |
 
-### spacy-transformers.Tok2VecTransformer.v1 {#Tok2VecTransformer}
+### spacy-transformers.Tok2VecTransformer.v3 {#Tok2VecTransformer}
 
 > #### Example Config
 >
 > ```ini
 > [model]
-> @architectures = "spacy-transformers.Tok2VecTransformer.v1"
+> @architectures = "spacy-transformers.Tok2VecTransformer.v3"
 > name = "albert-base-v2"
 > tokenizer_config = {"use_fast": false}
+> transformer_config = {}
 > grad_factor = 1.0
+> mixed_precision = true
+> grad_scaler_config = {"init_scale": 32768}
 > ```
 
 Use a transformer as a [`Tok2Vec`](/api/tok2vec) layer directly. This does
@@ -421,13 +446,32 @@ Use a transformer as a [`Tok2Vec`](/api/tok2vec) layer directly. This does
 object, but it's a **simpler solution** if you only need the transformer within
 one component.
 
-| Name               | Description                                                                                                                                                                                                                                                                   |
-| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `get_spans`        | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~                         |
-| `tokenizer_config` | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~                                                                                                           |
-| `pooling`          | A reduction layer used to calculate the token vectors based on zero or more wordpiece vectors. If in doubt, mean pooling (see [`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean)) is usually a good choice. ~~Model[Ragged, Floats2d]~~                            |
-| `grad_factor`      | Reweight gradients from the component before passing them upstream. You can set this to `0` to "freeze" the transformer weights with respect to the component, or use it to make some components more significant than others. Leaving it at `1.0` is usually fine. ~~float~~ |
-| **CREATES**        | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                        |
+| Name                 | Description                                                                                                                                                                                                                                                                   |
+|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `get_spans`          | Function that takes a batch of [`Doc`](/api/doc) object and returns lists of [`Span`](/api) objects to process by the transformer. [See here](/api/transformer#span_getters) for built-in options and examples. ~~Callable[[List[Doc]], List[Span]]~~                         |
+| `tokenizer_config`   | Tokenizer settings passed to [`transformers.AutoTokenizer`](https://huggingface.co/transformers/model_doc/auto.html#transformers.AutoTokenizer). ~~Dict[str, Any]~~                                                                                                           |
+| `transformer_config` | Settings to pass to the transformers forward pass. ~~Dict[str, Any]~~                                                                                                                                                                                                         |
+| `pooling`            | A reduction layer used to calculate the token vectors based on zero or more wordpiece vectors. If in doubt, mean pooling (see [`reduce_mean`](https://thinc.ai/docs/api-layers#reduce_mean)) is usually a good choice. ~~Model[Ragged, Floats2d]~~                            |
+| `grad_factor`        | Reweight gradients from the component before passing them upstream. You can set this to `0` to "freeze" the transformer weights with respect to the component, or use it to make some components more significant than others. Leaving it at `1.0` is usually fine. ~~float~~ |
+| `mixed_precision`    | Replace whitelisted ops by half-precision counterparts. Speeds up training and prediction on GPUs with [Tensor Cores](https://developer.nvidia.com/tensor-cores) and reduces GPU memory use. ~~bool~~                                                                         |
+| `grad_scaler_config` | Configuration to pass to `thinc.api.PyTorchGradScaler` during training when `mixed_precision` is enabled. ~~Dict[str, Any]~~                                                                                                                                                  |
+| **CREATES**          | The model using the architecture. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                        |
+
+
+
+Mixed-precision support is currently an experimental feature.
+
+
+
+
+* The `transformer_config` argument was added in
+`spacy-transformers.Tok2VecTransformer.v2`.
+* The `mixed_precision` and `grad_scaler_config` arguments were added in
+`spacy-transformers.Tok2VecTransformer.v3`.
+
+The other arguments are shared between all versions.
+
+
 
 ## Pretraining architectures {#pretrain source="spacy/ml/models/multi_task.py"}
 
diff --git a/website/docs/api/transformer.md b/website/docs/api/transformer.md
index 6e68ac599..571fb8d80 100644
--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@@ -92,9 +92,12 @@ https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/p
 > # Construction via add_pipe with custom config
 > config = {
 >     "model": {
->         "@architectures": "spacy-transformers.TransformerModel.v1",
+>         "@architectures": "spacy-transformers.TransformerModel.v3",
 >         "name": "bert-base-uncased",
->         "tokenizer_config": {"use_fast": True}
+>         "tokenizer_config": {"use_fast": True},
+>         "transformer_config": {"output_attentions": True},
+>         "mixed_precision": True,
+>         "grad_scaler_config": {"init_scale": 32768}
 >     }
 > }
 > trf = nlp.add_pipe("transformer", config=config)
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index 88fb39f61..253b3d0b5 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -351,7 +351,7 @@ factory = "transformer"
 max_batch_items = 4096
 
 [components.transformer.model]
-@architectures = "spacy-transformers.TransformerModel.v1"
+@architectures = "spacy-transformers.TransformerModel.v3"
 name = "bert-base-cased"
 tokenizer_config = {"use_fast": true}
 
@@ -367,7 +367,7 @@ The `[components.transformer.model]` block describes the `model` argument passed
 to the transformer component. It's a Thinc
 [`Model`](https://thinc.ai/docs/api-model) object that will be passed into the
 component. Here, it references the function
-[spacy-transformers.TransformerModel.v1](/api/architectures#TransformerModel)
+[spacy-transformers.TransformerModel.v3](/api/architectures#TransformerModel)
 registered in the [`architectures` registry](/api/top-level#registry). If a key
 in a block starts with `@`, it's **resolved to a function** and all other
 settings are passed to the function as arguments. In this case, `name`,