diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py
index 80751a695..97137313d 100644
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@@ -56,7 +56,7 @@ def build_tb_parser_model(
         non-linearity if use_upper=False.
     use_upper (bool): Whether to use an additional hidden layer after the state
         vector in order to predict the action scores. It is recommended to set
-        this to False for large pretrained models such as transformers, and False
+        this to False for large pretrained models such as transformers, and True
         for smaller networks. The upper layer is computed on CPU, which becomes
         a bottleneck on larger GPU-based models, where it's also less necessary.
     nO (int or None): The number of actions the model will predict between.