From e656189ec35b15ea1fedbbafc115b91fea9f5957 Mon Sep 17 00:00:00 2001
From: Paul O'Leary McCann <polm@dampfkraft.com>
Date: Wed, 8 Mar 2023 01:47:45 +0900
Subject: [PATCH 01/18] Change GPU efficient textcat to use CNN, not BOW in
 generated configs (#11900)

* Change GPU efficient textcat to use CNN, not BOW

If you generate a config with a textcat component using GPU
(transformers), the defaut option (efficiency) uses a BOW architecture,
which does not use tok2vec features. While that can make sense as part
of a larger pipeline, in the case of just a transformer and a textcat,
that means the transformer is doing a lot of work for no purpose.

This changes it so that the CNN architecture is used instead. It could
also be changed to be the same as the accuracy config, which uses the
ensemble architecture.

* Add the transformer when using a textcat with GPU

* Switch ubuntu-latest to ubuntu-20.04 in main tests (#11928)

* Switch ubuntu-latest to ubuntu-20.04 in main tests

* Only use 20.04 for 3.6

* Require thinc v8.1.7

* Require thinc v8.1.8

* Break up longer expression

---------

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 pyproject.toml                                |  2 +-
 requirements.txt                              |  2 +-
 setup.cfg                                     |  4 +--
 spacy/cli/templates/quickstart_training.jinja | 31 ++++++++++++++-----
 4 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7abd7a96f..9cd96ac2d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ requires = [
     "cymem>=2.0.2,<2.1.0",
     "preshed>=3.0.2,<3.1.0",
     "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.1.0,<8.2.0",
+    "thinc>=8.1.8,<8.2.0",
     "numpy>=1.15.0",
 ]
 build-backend = "setuptools.build_meta"
diff --git a/requirements.txt b/requirements.txt
index bc9fc183c..63e03d558 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,7 +3,7 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.1.0,<8.2.0
+thinc>=8.1.8,<8.2.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
diff --git a/setup.cfg b/setup.cfg
index cddc5148c..27499805b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,7 +39,7 @@ setup_requires =
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
     murmurhash>=0.28.0,<1.1.0
-    thinc>=8.1.0,<8.2.0
+    thinc>=8.1.8,<8.2.0
 install_requires =
     # Our libraries
     spacy-legacy>=3.0.11,<3.1.0
@@ -47,7 +47,7 @@ install_requires =
     murmurhash>=0.28.0,<1.1.0
     cymem>=2.0.2,<2.1.0
     preshed>=3.0.2,<3.1.0
-    thinc>=8.1.0,<8.2.0
+    thinc>=8.1.8,<8.2.0
     wasabi>=0.9.1,<1.2.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index b961ac892..441189341 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -24,8 +24,11 @@ gpu_allocator = null
 lang = "{{ lang }}"
 {%- set has_textcat = ("textcat" in components or "textcat_multilabel" in components) -%}
 {%- set with_accuracy = optimize == "accuracy" -%}
-{%- set has_accurate_textcat = has_textcat and with_accuracy -%}
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "trainable_lemmatizer" in components or "entity_linker" in components or has_accurate_textcat) -%}
+{# The BOW textcat doesn't need a source of features, so it can omit the
+tok2vec/transformer. #}
+{%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
+{%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
+{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@@ -221,10 +224,16 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = true
-ngram_size = 1
-no_output_layer = false
+nO = null
+
+[components.textcat.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.textcat.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
 
@@ -252,10 +261,16 @@ no_output_layer = false
 
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = false
-ngram_size = 1
-no_output_layer = false
+nO = null
+
+[components.textcat_multilabel.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.textcat_multilabel.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
 

From b3093367125294d57c2d34593f8dfb06b32de9f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcus=20Bl=C3=A4ttermann?= <marcus@essenmitsosse.de>
Date: Wed, 8 Mar 2023 11:59:10 +0100
Subject: [PATCH 02/18] Make sure to run Python setup before NPM dev mode
 (#12384)

---
 website/package.json | 1 +
 1 file changed, 1 insertion(+)

diff --git a/website/package.json b/website/package.json
index eeefe32df..5f8bae47e 100644
--- a/website/package.json
+++ b/website/package.json
@@ -6,6 +6,7 @@
     "dev": "next dev",
     "build": "next build && npm run sitemap && next export",
     "prebuild": "pip install -r setup/requirements.txt && sh setup/setup.sh",
+    "predev": "npm run prebuild",
     "sitemap": "next-sitemap --config next-sitemap.config.mjs",
     "start": "next start",
     "lint": "next lint",

From 4fdf356b29160af7a77649c76b8c75413e9c029c Mon Sep 17 00:00:00 2001
From: Victoria <80417010+victorialslocum@users.noreply.github.com>
Date: Thu, 9 Mar 2023 10:01:18 +0100
Subject: [PATCH 03/18] Add links in website and readme for survey (#12385)

---
 README.md                      | 3 +++
 website/src/templates/index.js | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 49aa6796e..36a015caf 100644
--- a/README.md
+++ b/README.md
@@ -16,6 +16,9 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
 model packaging, deployment and workflow management. spaCy is commercial
 open-source software, released under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
 
+💥 **We'd love to hear more about your experience with spaCy!**
+[Fill out our survey here.](https://form.typeform.com/to/aMel9q9f)
+
 💫 **Version 3.5 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
 
diff --git a/website/src/templates/index.js b/website/src/templates/index.js
index 227b25be8..2ee29a9e9 100644
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@@ -25,6 +25,11 @@ const AlertSpace = ({ nightly, legacy }) => {
     const isOnline = useOnlineStatus()
     return (
         <>
+            {isOnline && (
+                <Alert title="💥 We'd love to learn more about your experience with spaCy!">
+                    <Link to="https://form.typeform.com/to/aMel9q9f">Take our survey here.</Link>
+                </Alert>
+            )}
             {nightly && (
                 <Alert
                     title="You're viewing the pre-release docs."

From 913d74f5092f7a1b286724412c975b590da64fca Mon Sep 17 00:00:00 2001
From: Lj Miranda <12949683+ljvmiranda921@users.noreply.github.com>
Date: Thu, 9 Mar 2023 17:30:59 +0800
Subject: [PATCH 04/18] Add spancat_singlelabel pipeline for multiclass and
 non-overlapping span labelling tasks (#11365)

* [wip] Update

* [wip] Update

* Add initial port

* [wip] Update

* Fix all imports

* Add spancat_exclusive to pipeline

* [WIP] Update

* [ci skip] Add breakpoint for debugging

* Use spacy.SpanCategorizer.v1 as default archi

* Update spacy/pipeline/spancat_exclusive.py

Co-authored-by: kadarakos <kadar.akos@gmail.com>

* [ci skip] Small updates

* Use Softmax v2 directly from thinc

* Cache the label map

* Fix mypy errors

However, I ignored line 370 because it opened up a bunch of type errors
that might be trickier to solve and might lead to a more complicated
codebase.

* avoid multiplication with 1.0

Co-authored-by: kadarakos <kadar.akos@gmail.com>

* Update spacy/pipeline/spancat_exclusive.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update component versions to v2

* Add scorer to docstring

* Add _n_labels property to SpanCategorizer

Instead of using len(self.labels) in initialize() I am using a private
property self._n_labels. This achieves implementation parity and allows
me to delete the whole initialize() method for spancat_exclusive (since
it's now the same with spancat).

* Inherit from SpanCat instead of TrainablePipe

This commit changes the inheritance structure of Exclusive_Spancat,
now it's inheriting from SpanCategorizer than TrainablePipe. This
allows me to remove duplicate methods that are already present in
the parent function.

* Revert documentation link to spancat

* Fix init call for exclusive spancat

* Update spacy/pipeline/spancat_exclusive.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Import Suggester from spancat

* Include zero_init.v1 for spancat

* Implement _allow_extra_label to use _n_labels

To ensure that spancat / spancat_exclusive cannot be resized after
initialization, I inherited the _allow_extra_label() method from
spacy/pipeline/trainable_pipe.pyx and used self._n_labels instead
of len(self.labels) for checking.

I think that changing it locally is a better solution rather than
forcing each class that inherits TrainablePipe to use the self._n_labels
attribute.

Also note that I turned-off black formatting in this block of code
because it reads better without the overhang.

* Extend existing tests to spancat_exclusive

In this commit, I extended the existing tests for spancat to include
spancat_exclusive. I parametrized the test functions with 'name'
(similar var name with textcat and textcat_multilabel) for each
applicable test.

TODO: Add overfitting tests for spancat_exclusive

* Update documentation for spancat

* Turn on formatting for allow_extra_label

* Remove initializers in default config

* Use DEFAULT_EXCL_SPANCAT_MODEL

I also renamed spancat_exclusive_default_config into
spancat_excl_default_config because black does some not pretty
formatting changes.

* Update documentation

Update grammar and usage

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Clarify docstring for Exclusive_SpanCategorizer

* Remove mypy ignore and typecast labels to list

* Fix documentation API

* Use a single variable for tests

* Update defaults for number of rows

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Put back initializers in spancat config

Whenever I remove model.scorer.init_w and model.scorer.init_b,
I encounter an error in the test:

    SystemError: <method '__getitem__' of 'dict' objects> returned a result
    with an error set.

My Thinc version is 8.1.5, but I can't seem to check what's causing the
error.

* Update spancat_exclusive docstring

* Remove init_W and init_B parameters

This commit is expected to fail until the new Thinc release.

* Require thinc>=8.1.6 for serializable Softmax defaults

* Handle zero suggestions to make tests pass

I'm not sure if this is the most elegant solution. But what should
happen is that the _make_span_group function MUST return an empty
SpanGroup if there are no suggestions.

The error happens when the 'scores' variable is empty. We cannot
get the 'predicted' and other downstream vars.

* Better approach for handling zero suggestions

* Update website/docs/api/spancategorizer.md

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spancategorizer headers

* Apply suggestions from code review

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Add default value in negative_weight in docs

* Add default value in allow_overlap in docs

* Update how spancat_exclusive is constructed

In this commit, I added the following:
- Put the default values of negative_weight and allow_overlap
    in the default_config dictionary.
- Rename make_spancat -> make_exclusive_spancat

* Run prettier on spancategorizer.mdx

* Change exactly one -> at most one

* Add suggester documentation in Exclusive_SpanCategorizer

* Add suggester to spancat docstrings

* merge multilabel and singlelabel spancat

* rename spancat_exclusive to singlelable

* wire up different make_spangroups for single and multilabel

* black

* black

* add docstrings

* more docstring and fix negative_label

* don't rely on default arguments

* black

* remove spancat exclusive

* replace single_label with add_negative_label and adjust inference

* mypy

* logical bug in configuration check

* add spans.attrs[scores]

* single label make_spangroup test

* bugfix

* black

* tests for make_span_group with negative labels

* refactor make_span_group

* black

* Update spacy/tests/pipeline/test_spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* remove duplicate declaration

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* raise error instead of just print

* make label mapper private

* update docs

* run prettier

* Update website/docs/api/spancategorizer.mdx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update website/docs/api/spancategorizer.mdx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* don't keep recomputing self._label_map for each span

* typo in docs

* Intervals to private and document 'name' param

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/spancat.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* add Tag to new features

* replace tags

* revert

* revert

* revert

* revert

* Update website/docs/api/spancategorizer.mdx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update website/docs/api/spancategorizer.mdx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* prettier

* Fix merge

* Update website/docs/api/spancategorizer.mdx

* remove references to 'single_label'

* remove old paragraph

* Add spancat_singlelabel to config template

* Format

* Extend init config tests

---------

Co-authored-by: kadarakos <kadar.akos@gmail.com>
Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 spacy/cli/templates/quickstart_training.jinja |  61 +++-
 spacy/errors.py                               |   1 +
 spacy/pipeline/spancat.py                     | 323 ++++++++++++++++--
 spacy/tests/pipeline/test_spancat.py          | 157 ++++++++-
 spacy/tests/test_cli.py                       |   9 +-
 website/docs/api/spancategorizer.mdx          |  68 ++--
 6 files changed, 552 insertions(+), 67 deletions(-)

diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 441189341..c5e8c6c43 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -3,7 +3,7 @@ the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
 {%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
-{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "trainable_lemmatizer"] -%}
+{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
 [paths]
 train = null
 dev = null
@@ -28,7 +28,7 @@ lang = "{{ lang }}"
 tok2vec/transformer. #}
 {%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
 {%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
+{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@@ -159,6 +159,36 @@ grad_factor = 1.0
 sizes = [1,2,3]
 {% endif -%}
 
+{% if "spancat_singlelabel" in components %}
+[components.spancat_singlelabel]
+factory = "spancat_singlelabel"
+negative_weight = 1.0
+allow_overlap = true
+scorer = {"@scorers":"spacy.spancat_scorer.v1"}
+spans_key = "sc"
+
+[components.spancat_singlelabel.model]
+@architectures = "spacy.SpanCategorizer.v1"
+
+[components.spancat_singlelabel.model.reducer]
+@layers = "spacy.mean_max_reducer.v1"
+hidden_size = 128
+
+[components.spancat_singlelabel.model.scorer]
+@layers = "Softmax.v2"
+
+[components.spancat_singlelabel.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.spancat_singlelabel.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+
+[components.spancat_singlelabel.suggester]
+@misc = "spacy.ngram_suggester.v1"
+sizes = [1,2,3]
+{% endif %}
+
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
@@ -389,6 +419,33 @@ width = ${components.tok2vec.model.encode.width}
 sizes = [1,2,3]
 {% endif %}
 
+{% if "spancat_singlelabel" in components %}
+[components.spancat_singlelabel]
+factory = "spancat_singlelabel"
+negative_weight = 1.0
+allow_overlap = true
+scorer = {"@scorers":"spacy.spancat_scorer.v1"}
+spans_key = "sc"
+
+[components.spancat_singlelabel.model]
+@architectures = "spacy.SpanCategorizer.v1"
+
+[components.spancat_singlelabel.model.reducer]
+@layers = "spacy.mean_max_reducer.v1"
+hidden_size = 128
+
+[components.spancat_singlelabel.model.scorer]
+@layers = "Softmax.v2"
+
+[components.spancat_singlelabel.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+
+[components.spancat_singlelabel.suggester]
+@misc = "spacy.ngram_suggester.v1"
+sizes = [1,2,3]
+{% endif %}
+
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
diff --git a/spacy/errors.py b/spacy/errors.py
index 1047ed21a..c897c29ff 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -969,6 +969,7 @@ class Errors(metaclass=ErrorsWithCodes):
              "with `displacy.serve(doc, port=port)`")
     E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
              "or use `auto_select_port=True` to pick an available port automatically.")
+    E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index a3388e81a..983e1fba9 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,4 +1,5 @@
-from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any
+from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
+from dataclasses import dataclass
 from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
 from thinc.api import Optimizer
 from thinc.types import Ragged, Ints2d, Floats2d
@@ -43,7 +44,36 @@ maxout_pieces = 3
 depth = 4
 """
 
+spancat_singlelabel_default_config = """
+[model]
+@architectures = "spacy.SpanCategorizer.v1"
+scorer = {"@layers": "Softmax.v2"}
+
+[model.reducer]
+@layers = spacy.mean_max_reducer.v1
+hidden_size = 128
+
+[model.tok2vec]
+@architectures = "spacy.Tok2Vec.v2"
+[model.tok2vec.embed]
+@architectures = "spacy.MultiHashEmbed.v1"
+width = 96
+rows = [5000, 1000, 2500, 1000]
+attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
+include_static_vectors = false
+
+[model.tok2vec.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v2"
+width = ${model.tok2vec.embed.width}
+window_size = 1
+maxout_pieces = 3
+depth = 4
+"""
+
 DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
+DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str(
+    spancat_singlelabel_default_config
+)["model"]
 
 
 @runtime_checkable
@@ -119,10 +149,14 @@ def make_spancat(
     threshold: float,
     max_positive: Optional[int],
 ) -> "SpanCategorizer":
-    """Create a SpanCategorizer component. The span categorizer consists of two
+    """Create a SpanCategorizer component and configure it for multi-label
+    classification to be able to assign multiple labels for each span.
+    The span categorizer consists of two
     parts: a suggester function that proposes candidate spans, and a labeller
     model that predicts one or more labels for each span.
 
+    name (str): The component instance name, used to add entries to the
+        losses during training.
     suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
         Spans are returned as a ragged array with two integer columns, for the
         start and end positions.
@@ -144,12 +178,80 @@ def make_spancat(
     """
     return SpanCategorizer(
         nlp.vocab,
-        suggester=suggester,
         model=model,
-        spans_key=spans_key,
-        threshold=threshold,
-        max_positive=max_positive,
+        suggester=suggester,
         name=name,
+        spans_key=spans_key,
+        negative_weight=None,
+        allow_overlap=True,
+        max_positive=max_positive,
+        threshold=threshold,
+        scorer=scorer,
+        add_negative_label=False,
+    )
+
+
+@Language.factory(
+    "spancat_singlelabel",
+    assigns=["doc.spans"],
+    default_config={
+        "spans_key": "sc",
+        "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
+        "negative_weight": 1.0,
+        "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
+        "scorer": {"@scorers": "spacy.spancat_scorer.v1"},
+        "allow_overlap": True,
+    },
+    default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
+)
+def make_spancat_singlelabel(
+    nlp: Language,
+    name: str,
+    suggester: Suggester,
+    model: Model[Tuple[List[Doc], Ragged], Floats2d],
+    spans_key: str,
+    negative_weight: float,
+    allow_overlap: bool,
+    scorer: Optional[Callable],
+) -> "SpanCategorizer":
+    """Create a SpanCategorizer component and configure it for multi-class
+    classification. With this configuration each span can get at most one
+    label. The span categorizer consists of two
+    parts: a suggester function that proposes candidate spans, and a labeller
+    model that predicts one or more labels for each span.
+
+    name (str): The component instance name, used to add entries to the
+        losses during training.
+    suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
+        Spans are returned as a ragged array with two integer columns, for the
+        start and end positions.
+    model (Model[Tuple[List[Doc], Ragged], Floats2d]): A model instance that
+        is given a list of documents and (start, end) indices representing
+        candidate span offsets. The model predicts a probability for each category
+        for each span.
+    spans_key (str): Key of the doc.spans dict to save the spans under. During
+        initialization and training, the component will look for spans on the
+        reference document under the same key.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
+    negative_weight (float): Multiplier for the loss terms.
+        Can be used to downweight the negative samples if there are too many.
+    allow_overlap (bool): If True the data is assumed to contain overlapping spans.
+        Otherwise it produces non-overlapping spans greedily prioritizing
+        higher assigned label scores.
+    """
+    return SpanCategorizer(
+        nlp.vocab,
+        model=model,
+        suggester=suggester,
+        name=name,
+        spans_key=spans_key,
+        negative_weight=negative_weight,
+        allow_overlap=allow_overlap,
+        max_positive=1,
+        add_negative_label=True,
+        threshold=None,
         scorer=scorer,
     )
 
@@ -172,6 +274,27 @@ def make_spancat_scorer():
     return spancat_score
 
 
+@dataclass
+class _Intervals:
+    """
+    Helper class to avoid storing overlapping spans.
+    """
+
+    def __init__(self):
+        self.ranges = set()
+
+    def add(self, i, j):
+        for e in range(i, j):
+            self.ranges.add(e)
+
+    def __contains__(self, rang):
+        i, j = rang
+        for e in range(i, j):
+            if e in self.ranges:
+                return True
+        return False
+
+
 class SpanCategorizer(TrainablePipe):
     """Pipeline component to label spans of text.
 
@@ -185,25 +308,43 @@ class SpanCategorizer(TrainablePipe):
         suggester: Suggester,
         name: str = "spancat",
         *,
+        add_negative_label: bool = False,
         spans_key: str = "spans",
-        threshold: float = 0.5,
+        negative_weight: Optional[float] = 1.0,
+        allow_overlap: Optional[bool] = True,
         max_positive: Optional[int] = None,
+        threshold: Optional[float] = 0.5,
         scorer: Optional[Callable] = spancat_score,
     ) -> None:
-        """Initialize the span categorizer.
+        """Initialize the multi-label or multi-class span categorizer.
+
         vocab (Vocab): The shared vocabulary.
         model (thinc.api.Model): The Thinc Model powering the pipeline component.
+            For multi-class classification (single label per span) we recommend
+            using a Softmax classifier as a the final layer, while for multi-label
+            classification (multiple possible labels per span) we recommend Logistic.
+        suggester (Callable[[Iterable[Doc], Optional[Ops]], Ragged]): A function that suggests spans.
+            Spans are returned as a ragged array with two integer columns, for the
+            start and end positions.
         name (str): The component instance name, used to add entries to the
             losses during training.
         spans_key (str): Key of the Doc.spans dict to save the spans under.
             During initialization and training, the component will look for
             spans on the reference document under the same key. Defaults to
             `"spans"`.
-        threshold (float): Minimum probability to consider a prediction
-            positive. Spans with a positive prediction will be saved on the Doc.
-            Defaults to 0.5.
+        add_negative_label (bool): Learn to predict a special 'negative_label'
+            when a Span is not annotated.
+        threshold (Optional[float]): Minimum probability to consider a prediction
+            positive. Defaults to 0.5. Spans with a positive prediction will be saved
+            on the Doc.
         max_positive (Optional[int]): Maximum number of labels to consider
             positive per span. Defaults to None, indicating no limit.
+        negative_weight (float): Multiplier for the loss terms.
+            Can be used to downweight the negative samples if there are too many
+            when add_negative_label is True. Otherwise its unused.
+        allow_overlap (bool): If True the data is assumed to contain overlapping spans.
+            Otherwise it produces non-overlapping spans greedily prioritizing
+            higher assigned label scores. Only used when max_positive is 1.
         scorer (Optional[Callable]): The scoring method. Defaults to
             Scorer.score_spans for the Doc.spans[spans_key] with overlapping
             spans allowed.
@@ -215,12 +356,17 @@ class SpanCategorizer(TrainablePipe):
             "spans_key": spans_key,
             "threshold": threshold,
             "max_positive": max_positive,
+            "negative_weight": negative_weight,
+            "allow_overlap": allow_overlap,
         }
         self.vocab = vocab
         self.suggester = suggester
         self.model = model
         self.name = name
         self.scorer = scorer
+        self.add_negative_label = add_negative_label
+        if not allow_overlap and max_positive is not None and max_positive > 1:
+            raise ValueError(Errors.E1051.format(max_positive=max_positive))
 
     @property
     def key(self) -> str:
@@ -230,6 +376,21 @@ class SpanCategorizer(TrainablePipe):
         """
         return str(self.cfg["spans_key"])
 
+    def _allow_extra_label(self) -> None:
+        """Raise an error if the component can not add any more labels."""
+        nO = None
+        if self.model.has_dim("nO"):
+            nO = self.model.get_dim("nO")
+        elif self.model.has_ref("output_layer") and self.model.get_ref(
+            "output_layer"
+        ).has_dim("nO"):
+            nO = self.model.get_ref("output_layer").get_dim("nO")
+        if nO is not None and nO == self._n_labels:
+            if not self.is_resizable:
+                raise ValueError(
+                    Errors.E922.format(name=self.name, nO=self.model.get_dim("nO"))
+                )
+
     def add_label(self, label: str) -> int:
         """Add a new label to the pipe.
 
@@ -263,6 +424,27 @@ class SpanCategorizer(TrainablePipe):
         """
         return list(self.labels)
 
+    @property
+    def _label_map(self) -> Dict[str, int]:
+        """RETURNS (Dict[str, int]): The label map."""
+        return {label: i for i, label in enumerate(self.labels)}
+
+    @property
+    def _n_labels(self) -> int:
+        """RETURNS (int): Number of labels."""
+        if self.add_negative_label:
+            return len(self.labels) + 1
+        else:
+            return len(self.labels)
+
+    @property
+    def _negative_label_i(self) -> Union[int, None]:
+        """RETURNS (Union[int, None]): Index of the negative label."""
+        if self.add_negative_label:
+            return len(self.label_data)
+        else:
+            return None
+
     def predict(self, docs: Iterable[Doc]):
         """Apply the pipeline's model to a batch of docs, without modifying them.
 
@@ -304,14 +486,24 @@ class SpanCategorizer(TrainablePipe):
 
         DOCS: https://spacy.io/api/spancategorizer#set_annotations
         """
-        labels = self.labels
         indices, scores = indices_scores
         offset = 0
         for i, doc in enumerate(docs):
             indices_i = indices[i].dataXd
-            doc.spans[self.key] = self._make_span_group(
-                doc, indices_i, scores[offset : offset + indices.lengths[i]], labels  # type: ignore[arg-type]
-            )
+            allow_overlap = cast(bool, self.cfg["allow_overlap"])
+            if self.cfg["max_positive"] == 1:
+                doc.spans[self.key] = self._make_span_group_singlelabel(
+                    doc,
+                    indices_i,
+                    scores[offset : offset + indices.lengths[i]],
+                    allow_overlap,
+                )
+            else:
+                doc.spans[self.key] = self._make_span_group_multilabel(
+                    doc,
+                    indices_i,
+                    scores[offset : offset + indices.lengths[i]],
+                )
             offset += indices.lengths[i]
 
     def update(
@@ -371,9 +563,11 @@ class SpanCategorizer(TrainablePipe):
         spans = Ragged(
             self.model.ops.to_numpy(spans.data), self.model.ops.to_numpy(spans.lengths)
         )
-        label_map = {label: i for i, label in enumerate(self.labels)}
         target = numpy.zeros(scores.shape, dtype=scores.dtype)
+        if self.add_negative_label:
+            negative_spans = numpy.ones((scores.shape[0]))
         offset = 0
+        label_map = self._label_map
         for i, eg in enumerate(examples):
             # Map (start, end) offset of spans to the row in the d_scores array,
             # so that we can adjust the gradient for predictions that were
@@ -390,10 +584,16 @@ class SpanCategorizer(TrainablePipe):
                     row = spans_index[key]
                     k = label_map[gold_span.label_]
                     target[row, k] = 1.0
+                    if self.add_negative_label:
+                        # delete negative label target.
+                        negative_spans[row] = 0.0
             # The target is a flat array for all docs. Track the position
             # we're at within the flat array.
             offset += spans.lengths[i]
         target = self.model.ops.asarray(target, dtype="f")  # type: ignore
+        if self.add_negative_label:
+            negative_samples = numpy.nonzero(negative_spans)[0]
+            target[negative_samples, self._negative_label_i] = 1.0  # type: ignore
         # The target will have the values 0 (for untrue predictions) or 1
         # (for true predictions).
         # The scores should be in the range [0, 1].
@@ -402,6 +602,10 @@ class SpanCategorizer(TrainablePipe):
         # If the prediction is 0.9 and it's false, the gradient will be
         # 0.9 (0.9 - 0.0)
         d_scores = scores - target
+        if self.add_negative_label:
+            neg_weight = cast(float, self.cfg["negative_weight"])
+            if neg_weight != 1.0:
+                d_scores[negative_samples] *= neg_weight
         loss = float((d_scores**2).sum())
         return loss, d_scores
 
@@ -438,7 +642,7 @@ class SpanCategorizer(TrainablePipe):
         if subbatch:
             docs = [eg.x for eg in subbatch]
             spans = build_ngram_suggester(sizes=[1])(docs)
-            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], len(self.labels))
+            Y = self.model.ops.alloc2f(spans.dataXd.shape[0], self._n_labels)
             self.model.initialize(X=(docs, spans), Y=Y)
         else:
             self.model.initialize()
@@ -452,31 +656,96 @@ class SpanCategorizer(TrainablePipe):
             eg.reference.spans.get(self.key, []), allow_overlap=True
         )
 
-    def _make_span_group(
-        self, doc: Doc, indices: Ints2d, scores: Floats2d, labels: List[str]
+    def _make_span_group_multilabel(
+        self,
+        doc: Doc,
+        indices: Ints2d,
+        scores: Floats2d,
     ) -> SpanGroup:
+        """Find the top-k labels for each span (k=max_positive)."""
         spans = SpanGroup(doc, name=self.key)
-        max_positive = self.cfg["max_positive"]
+        if scores.size == 0:
+            return spans
+        scores = self.model.ops.to_numpy(scores)
+        indices = self.model.ops.to_numpy(indices)
         threshold = self.cfg["threshold"]
+        max_positive = self.cfg["max_positive"]
 
         keeps = scores >= threshold
-        ranked = (scores * -1).argsort()  # type: ignore
         if max_positive is not None:
             assert isinstance(max_positive, int)
+            if self.add_negative_label:
+                negative_scores = numpy.copy(scores[:, self._negative_label_i])
+                scores[:, self._negative_label_i] = -numpy.inf
+                ranked = (scores * -1).argsort()  # type: ignore
+                scores[:, self._negative_label_i] = negative_scores
+            else:
+                ranked = (scores * -1).argsort()  # type: ignore
             span_filter = ranked[:, max_positive:]
             for i, row in enumerate(span_filter):
                 keeps[i, row] = False
-        spans.attrs["scores"] = scores[keeps].flatten()
-
-        indices = self.model.ops.to_numpy(indices)
-        keeps = self.model.ops.to_numpy(keeps)
 
+        attrs_scores = []
         for i in range(indices.shape[0]):
             start = indices[i, 0]
             end = indices[i, 1]
-
             for j, keep in enumerate(keeps[i]):
                 if keep:
-                    spans.append(Span(doc, start, end, label=labels[j]))
+                    if j != self._negative_label_i:
+                        spans.append(Span(doc, start, end, label=self.labels[j]))
+                        attrs_scores.append(scores[i, j])
+        spans.attrs["scores"] = numpy.array(attrs_scores)
+        return spans
+
+    def _make_span_group_singlelabel(
+        self,
+        doc: Doc,
+        indices: Ints2d,
+        scores: Floats2d,
+        allow_overlap: bool = True,
+    ) -> SpanGroup:
+        """Find the argmax label for each span."""
+        # Handle cases when there are zero suggestions
+        if scores.size == 0:
+            return SpanGroup(doc, name=self.key)
+        scores = self.model.ops.to_numpy(scores)
+        indices = self.model.ops.to_numpy(indices)
+        predicted = scores.argmax(axis=1)
+        argmax_scores = numpy.take_along_axis(
+            scores, numpy.expand_dims(predicted, 1), axis=1
+        )
+        keeps = numpy.ones(predicted.shape, dtype=bool)
+        # Remove samples where the negative label is the argmax.
+        if self.add_negative_label:
+            keeps = numpy.logical_and(keeps, predicted != self._negative_label_i)
+        # Filter samples according to threshold.
+        threshold = self.cfg["threshold"]
+        if threshold is not None:
+            keeps = numpy.logical_and(keeps, (argmax_scores >= threshold).squeeze())
+        # Sort spans according to argmax probability
+        if not allow_overlap:
+            # Get the probabilities
+            sort_idx = (argmax_scores.squeeze() * -1).argsort()
+            predicted = predicted[sort_idx]
+            indices = indices[sort_idx]
+            keeps = keeps[sort_idx]
+        seen = _Intervals()
+        spans = SpanGroup(doc, name=self.key)
+        attrs_scores = []
+        for i in range(indices.shape[0]):
+            if not keeps[i]:
+                continue
+
+            label = predicted[i]
+            start = indices[i, 0]
+            end = indices[i, 1]
+
+            if not allow_overlap:
+                if (start, end) in seen:
+                    continue
+                else:
+                    seen.add(start, end)
+            attrs_scores.append(argmax_scores[i])
+            spans.append(Span(doc, start, end, label=self.labels[label]))
 
         return spans
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index e9db983d3..cf6304042 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -15,6 +15,8 @@ OPS = get_current_ops()
 
 SPAN_KEY = "labeled_spans"
 
+SPANCAT_COMPONENTS = ["spancat", "spancat_singlelabel"]
+
 TRAIN_DATA = [
     ("Who is Shaka Khan?", {"spans": {SPAN_KEY: [(7, 17, "PERSON")]}}),
     (
@@ -41,38 +43,42 @@ def make_examples(nlp, data=TRAIN_DATA):
     return train_examples
 
 
-def test_no_label():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_no_label(name):
     nlp = Language()
-    nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     with pytest.raises(ValueError):
         nlp.initialize()
 
 
-def test_no_resize():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_no_resize(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     spancat.add_label("Thing")
     spancat.add_label("Phrase")
     assert spancat.labels == ("Thing", "Phrase")
     nlp.initialize()
-    assert spancat.model.get_dim("nO") == 2
+    assert spancat.model.get_dim("nO") == spancat._n_labels
     # this throws an error because the spancat can't be resized after initialization
     with pytest.raises(ValueError):
         spancat.add_label("Stuff")
 
 
-def test_implicit_labels():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_implicit_labels(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     assert len(spancat.labels) == 0
     train_examples = make_examples(nlp)
     nlp.initialize(get_examples=lambda: train_examples)
     assert spancat.labels == ("PERSON", "LOC")
 
 
-def test_explicit_labels():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_explicit_labels(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     assert len(spancat.labels) == 0
     spancat.add_label("PERSON")
     spancat.add_label("LOC")
@@ -102,13 +108,13 @@ def test_doc_gc():
             # XXX This fails with length 0 sometimes
             assert len(spangroup) > 0
             with pytest.raises(RuntimeError):
-                span = spangroup[0]
+                spangroup[0]
 
 
 @pytest.mark.parametrize(
     "max_positive,nr_results", [(None, 4), (1, 2), (2, 3), (3, 4), (4, 4)]
 )
-def test_make_spangroup(max_positive, nr_results):
+def test_make_spangroup_multilabel(max_positive, nr_results):
     fix_random_seed(0)
     nlp = Language()
     spancat = nlp.add_pipe(
@@ -120,10 +126,12 @@ def test_make_spangroup(max_positive, nr_results):
     indices = ngram_suggester([doc])[0].dataXd
     assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
     labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat.add_label(label)
     scores = numpy.asarray(
         [[0.2, 0.4, 0.3, 0.1], [0.1, 0.6, 0.2, 0.4], [0.8, 0.7, 0.3, 0.9]], dtype="f"
     )
-    spangroup = spancat._make_span_group(doc, indices, scores, labels)
+    spangroup = spancat._make_span_group_multilabel(doc, indices, scores)
     assert len(spangroup) == nr_results
 
     # first span is always the second token "London"
@@ -154,6 +162,118 @@ def test_make_spangroup(max_positive, nr_results):
     assert_almost_equal(0.9, spangroup.attrs["scores"][-1], 5)
 
 
+@pytest.mark.parametrize(
+    "threshold,allow_overlap,nr_results",
+    [(0.05, True, 3), (0.05, False, 1), (0.5, True, 2), (0.5, False, 1)],
+)
+def test_make_spangroup_singlelabel(threshold, allow_overlap, nr_results):
+    fix_random_seed(0)
+    nlp = Language()
+    spancat = nlp.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": threshold,
+            "max_positive": 1,
+        },
+    )
+    doc = nlp.make_doc("Greater London")
+    ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2])
+    indices = ngram_suggester([doc])[0].dataXd
+    assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
+    labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat.add_label(label)
+    scores = numpy.asarray(
+        [[0.2, 0.4, 0.3, 0.1], [0.1, 0.6, 0.2, 0.4], [0.8, 0.7, 0.3, 0.9]], dtype="f"
+    )
+    spangroup = spancat._make_span_group_singlelabel(
+        doc, indices, scores, allow_overlap
+    )
+    assert len(spangroup) == nr_results
+    if threshold > 0.4:
+        if allow_overlap:
+            assert spangroup[0].text == "London"
+            assert spangroup[0].label_ == "City"
+            assert spangroup[1].text == "Greater London"
+            assert spangroup[1].label_ == "GreatCity"
+
+        else:
+            assert spangroup[0].text == "Greater London"
+            assert spangroup[0].label_ == "GreatCity"
+    else:
+        if allow_overlap:
+            assert spangroup[0].text == "Greater"
+            assert spangroup[0].label_ == "City"
+            assert spangroup[1].text == "London"
+            assert spangroup[1].label_ == "City"
+            assert spangroup[2].text == "Greater London"
+            assert spangroup[2].label_ == "GreatCity"
+        else:
+            assert spangroup[0].text == "Greater London"
+
+
+def test_make_spangroup_negative_label():
+    fix_random_seed(0)
+    nlp_single = Language()
+    nlp_multi = Language()
+    spancat_single = nlp_single.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": 0.1,
+            "max_positive": 1,
+        },
+    )
+    spancat_multi = nlp_multi.add_pipe(
+        "spancat",
+        config={
+            "spans_key": SPAN_KEY,
+            "threshold": 0.1,
+            "max_positive": 2,
+        },
+    )
+    spancat_single.add_negative_label = True
+    spancat_multi.add_negative_label = True
+    doc = nlp_single.make_doc("Greater London")
+    labels = ["Thing", "City", "Person", "GreatCity"]
+    for label in labels:
+        spancat_multi.add_label(label)
+        spancat_single.add_label(label)
+    ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2])
+    indices = ngram_suggester([doc])[0].dataXd
+    assert_array_equal(OPS.to_numpy(indices), numpy.asarray([[0, 1], [1, 2], [0, 2]]))
+    scores = numpy.asarray(
+        [
+            [0.2, 0.4, 0.3, 0.1, 0.1],
+            [0.1, 0.6, 0.2, 0.4, 0.9],
+            [0.8, 0.7, 0.3, 0.9, 0.1],
+        ],
+        dtype="f",
+    )
+    spangroup_multi = spancat_multi._make_span_group_multilabel(doc, indices, scores)
+    spangroup_single = spancat_single._make_span_group_singlelabel(doc, indices, scores)
+    assert len(spangroup_single) == 2
+    assert spangroup_single[0].text == "Greater"
+    assert spangroup_single[0].label_ == "City"
+    assert spangroup_single[1].text == "Greater London"
+    assert spangroup_single[1].label_ == "GreatCity"
+
+    assert len(spangroup_multi) == 6
+    assert spangroup_multi[0].text == "Greater"
+    assert spangroup_multi[0].label_ == "City"
+    assert spangroup_multi[1].text == "Greater"
+    assert spangroup_multi[1].label_ == "Person"
+    assert spangroup_multi[2].text == "London"
+    assert spangroup_multi[2].label_ == "City"
+    assert spangroup_multi[3].text == "London"
+    assert spangroup_multi[3].label_ == "GreatCity"
+    assert spangroup_multi[4].text == "Greater London"
+    assert spangroup_multi[4].label_ == "Thing"
+    assert spangroup_multi[5].text == "Greater London"
+    assert spangroup_multi[5].label_ == "GreatCity"
+
+
 def test_ngram_suggester(en_tokenizer):
     # test different n-gram lengths
     for size in [1, 2, 3]:
@@ -371,9 +491,9 @@ def test_overfitting_IO_overlapping():
         assert set([span.label_ for span in spans2]) == {"LOC", "DOUBLE_LOC"}
 
 
-def test_zero_suggestions():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_zero_suggestions(name):
     # Test with a suggester that can return 0 suggestions
-
     @registry.misc("test_mixed_zero_suggester")
     def make_mixed_zero_suggester():
         def mixed_zero_suggester(docs, *, ops=None):
@@ -400,7 +520,7 @@ def test_zero_suggestions():
     fix_random_seed(0)
     nlp = English()
     spancat = nlp.add_pipe(
-        "spancat",
+        name,
         config={
             "suggester": {"@misc": "test_mixed_zero_suggester"},
             "spans_key": SPAN_KEY,
@@ -408,7 +528,7 @@ def test_zero_suggestions():
     )
     train_examples = make_examples(nlp)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
-    assert spancat.model.get_dim("nO") == 2
+    assert spancat.model.get_dim("nO") == spancat._n_labels
     assert set(spancat.labels) == {"LOC", "PERSON"}
 
     nlp.update(train_examples, sgd=optimizer)
@@ -424,9 +544,10 @@ def test_zero_suggestions():
     list(nlp.pipe(["", "one", "three three three"]))
 
 
-def test_set_candidates():
+@pytest.mark.parametrize("name", SPANCAT_COMPONENTS)
+def test_set_candidates(name):
     nlp = Language()
-    spancat = nlp.add_pipe("spancat", config={"spans_key": SPAN_KEY})
+    spancat = nlp.add_pipe(name, config={"spans_key": SPAN_KEY})
     train_examples = make_examples(nlp)
     nlp.initialize(get_examples=lambda: train_examples)
     texts = [
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index f5bcdfd23..1fdf059b3 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -552,7 +552,14 @@ def test_parse_cli_overrides():
 
 @pytest.mark.parametrize("lang", ["en", "nl"])
 @pytest.mark.parametrize(
-    "pipeline", [["tagger", "parser", "ner"], [], ["ner", "textcat", "sentencizer"]]
+    "pipeline",
+    [
+        ["tagger", "parser", "ner"],
+        [],
+        ["ner", "textcat", "sentencizer"],
+        ["morphologizer", "spancat", "entity_linker"],
+        ["spancat_singlelabel", "textcat_multilabel"],
+    ],
 )
 @pytest.mark.parametrize("optimize", ["efficiency", "accuracy"])
 @pytest.mark.parametrize("pretraining", [True, False])
diff --git a/website/docs/api/spancategorizer.mdx b/website/docs/api/spancategorizer.mdx
index f39c0aff9..c7de2324b 100644
--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@@ -13,6 +13,13 @@ A span categorizer consists of two parts: a [suggester function](#suggesters)
 that proposes candidate spans, which may or may not overlap, and a labeler model
 that predicts zero or more labels for each candidate.
 
+This component comes in two forms: `spancat` and `spancat_singlelabel` (added in
+spaCy v3.5.1). When you need to perform multi-label classification on your
+spans, use `spancat`. The `spancat` component uses a `Logistic` layer where the
+output class probabilities are independent for each class. However, if you need
+to predict at most one true class for a span, then use `spancat_singlelabel`. It
+uses a `Softmax` layer and treats the task as a multi-class problem.
+
 Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the doc.
 Individual span scores can be found in `spangroup.attrs["scores"]`.
 
@@ -38,7 +45,7 @@ how the component should be configured. You can override its settings via the
 [model architectures](/api/architectures) documentation for details on the
 architectures and their arguments and hyperparameters.
 
-> #### Example
+> #### Example (spancat)
 >
 > ```python
 > from spacy.pipeline.spancat import DEFAULT_SPANCAT_MODEL
@@ -52,14 +59,33 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("spancat", config=config)
 > ```
 
-| Setting        | Description                                                                                                                                                                                                                                                                                             |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `suggester`    | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                  |
-| `model`        | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
-| `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                                  |
-| `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                                          |
-| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                                      |
-| `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       |
+> #### Example (spancat_singlelabel)
+>
+> ```python
+> from spacy.pipeline.spancat import DEFAULT_SPANCAT_SINGLELABEL_MODEL
+> config = {
+>     "threshold": 0.5,
+>     "spans_key": "labeled_spans",
+>     "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
+>     "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
+>     # Additional spancat_singlelabel parameters
+>     "negative_weight": 0.8,
+>     "allow_overlap": True,
+> }
+> nlp.add_pipe("spancat_singlelabel", config=config)
+> ```
+
+| Setting                                             | Description                                                                                                                                                                                                                                                                                             |
+| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. Defaults to [`ngram_suggester`](#ngram_suggester). ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                  |
+| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. Defaults to [SpanCategorizer](/api/architectures#SpanCategorizer). ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
+| `spans_key`                                         | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                                  |
+| `threshold`                                         | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Meant to be used in combination with the multi-class `spancat` component with a `Logistic` scoring layer. Defaults to `0.5`. ~~float~~                                                |
+| `max_positive`                                      | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. Meant to be used together with the `spancat` component and defaults to 0 with `spancat_singlelabel`. ~~Optional[int]~~                                                                                 |
+| `scorer`                                            | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                                                                                                       |
+| `add_negative_label` <Tag variant="new">3.5.1</Tag> | Whether to learn to predict a special negative label for each unannotated `Span` . This should be `True` when using a `Softmax` classifier layer and so its `True` by default for `spancat_singlelabel`. Spans with negative labels and their scores are not stored as annotations. ~~bool~~            |
+| `negative_weight` <Tag variant="new">3.5.1</Tag>    | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many. It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~                                                                                                               |
+| `allow_overlap` <Tag variant="new">3.5.1</Tag>      | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~                                                                                                                                                        |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/spancat.py
@@ -71,6 +97,7 @@ architectures and their arguments and hyperparameters.
 >
 > ```python
 > # Construction via add_pipe with default model
+> # Replace 'spancat' with 'spancat_singlelabel' for exclusive classes
 > spancat = nlp.add_pipe("spancat")
 >
 > # Construction via add_pipe with custom model
@@ -86,16 +113,19 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#create_pipe).
 
-| Name           | Description                                                                                                                                                                                                                          |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                     |
-| `model`        | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~ |
-| `suggester`    | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                  |
-| `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                  |
-| _keyword-only_ |                                                                                                                                                                                                                                      |
-| `spans_key`    | Key of the [`Doc.spans`](/api/doc#sans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                |
-| `threshold`    | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                       |
-| `max_positive` | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                   |
+| Name                                                | Description                                                                                                                                                                                                                                                                                  |
+| --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`                                             | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                                                                                             |
+| `model`                                             | A model instance that is given a a list of documents and `(start, end)` indices representing candidate span offsets. The model predicts a probability for each category for each span. ~~Model[Tuple[List[Doc], Ragged], Floats2d]~~                                                         |
+| `suggester`                                         | A function that [suggests spans](#suggesters). Spans are returned as a ragged array with two integer columns, for the start and end positions. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~                                                                                          |
+| `name`                                              | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                                                                                          |
+| _keyword-only_                                      |                                                                                                                                                                                                                                                                                              |
+| `spans_key`                                         | Key of the [`Doc.spans`](/api/doc#sans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~                                                                        |
+| `threshold`                                         | Minimum probability to consider a prediction positive. Spans with a positive prediction will be saved on the Doc. Defaults to `0.5`. ~~float~~                                                                                                                                               |
+| `max_positive`                                      | Maximum number of labels to consider positive per span. Defaults to `None`, indicating no limit. ~~Optional[int]~~                                                                                                                                                                           |
+| `allow_overlap` <Tag variant="new">3.5.1</Tag>      | If `True`, the data is assumed to contain overlapping spans. It is only available when `max_positive` is exactly 1. Defaults to `True`. ~~bool~~                                                                                                                                             |
+| `add_negative_label` <Tag variant="new">3.5.1</Tag> | Whether to learn to predict a special negative label for each unannotated `Span`. This should be `True` when using a `Softmax` classifier layer and so its `True` by default for `spancat_singlelabel` . Spans with negative labels and their scores are not stored as annotations. ~~bool~~ |
+| `negative_weight` <Tag variant="new">3.5.1</Tag>    | Multiplier for the loss terms. It can be used to downweight the negative samples if there are too many . It is only used when `add_negative_label` is `True`. Defaults to `1.0`. ~~float~~                                                                                                   |
 
 ## SpanCategorizer.\_\_call\_\_ {id="call",tag="method"}
 

From f27bce67fdfcd5ea7e5fd011273012e8b8c2ed54 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 9 Mar 2023 16:41:21 +0100
Subject: [PATCH 05/18] Skip project clone tests if git is not available
 (#12394)

---
 spacy/tests/test_cli_app.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 8aaadf686..9ba4f0e5c 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -5,10 +5,18 @@ import srsly
 from typer.testing import CliRunner
 from spacy.tokens import DocBin, Doc
 
-from spacy.cli._util import app
+from spacy.cli._util import app, get_git_version
 from .util import make_tempdir, normalize_whitespace
 
 
+def has_git():
+    try:
+        get_git_version()
+        return True
+    except RuntimeError:
+        return False
+
+
 def test_convert_auto():
     with make_tempdir() as d_in, make_tempdir() as d_out:
         for f in ["data1.iob", "data2.iob", "data3.iob"]:
@@ -181,6 +189,7 @@ def test_project_run(project_dir):
     assert "okokok" in result.stdout
 
 
+@pytest.mark.skipif(not has_git(), reason="git not installed")
 @pytest.mark.parametrize(
     "options",
     [

From 3c999f052e1655f93fce4e768a5cfa747768abce Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 13 Mar 2023 13:13:47 +0100
Subject: [PATCH 06/18] Add GHA for CI tests (#12403)

* Add GHA for CI tests

* Reorder paths
---
 .github/workflows/tests.yml | 195 ++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 000000000..ad380d39a
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,195 @@
+name: tests
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/docs/**"
+      - "website/src/**"
+      - "website/meta/*.tsx"
+      - "website/meta/*.mjs"
+      - "website/meta/languages.json"
+      - "website/meta/site.json"
+      - "website/meta/sidebars.json"
+      - "website/meta/type-annotations.json"
+      - "website/pages/**"
+      - ".github/workflows/**"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths:
+      - "!*.md"
+      - "!*.mdx"
+      - "!website/docs/**"
+      - "!website/src/**"
+      - "!website/meta/*.tsx"
+      - "!website/meta/*.mjs"
+      - "!website/meta/languages.json"
+      - "!website/meta/site.json"
+      - "!website/meta/sidebars.json"
+      - "!website/meta/type-annotations.json"
+      - "!website/pages/**"
+      - "!.github/workflows/**"
+      - ".github/workflows/tests.yml"
+
+jobs:
+  validate:
+    name: Validate
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: black
+        run: |
+          python -m pip install black -c requirements.txt
+          python -m black spacy --check
+      - name: flake8
+        run: |
+          python -m pip install flake8==5.0.4
+          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+  tests:
+    name: Test
+    needs: Validate
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python_version: ["3.11"]
+        include:
+          - os: ubuntu-20.04
+            python_version: "3.6"
+          - os: windows-latest
+            python_version: "3.7"
+          - os: macos-latest
+            python_version: "3.8"
+          - os: ubuntu-latest
+            python_version: "3.9"
+          - os: windows-latest
+            python_version: "3.10"
+
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python_version }}
+          architecture: x64
+
+      - name: Install dependencies
+        run: |
+          python -m pip install -U build pip setuptools
+          python -m pip install -U -r requirements.txt
+
+      - name: Build sdist
+        run: |
+          python -m build --sdist
+
+      - name: Run mypy
+        run: |
+          python -m mypy spacy
+        if: matrix.python_version != '3.6'
+
+      - name: Delete source directory and .egg-info
+        run: |
+          rm -rf spacy *.egg-info
+        shell: bash
+
+      - name: Uninstall all packages
+        run: |
+          python -m pip freeze
+          python -m pip freeze --exclude pywin32 > installed.txt
+          python -m pip uninstall -y -r installed.txt
+
+      - name: Install from sdist
+        run: |
+          SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+          SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
+        shell: bash
+
+      - name: Test import
+        run: python -W error -c "import spacy"
+
+      - name: "Test download CLI"
+        run: |
+          python -m spacy download ca_core_news_sm
+          python -m spacy download ca_core_news_md
+          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
+        if: matrix.python_version == '3.8'
+
+      - name: "Test download_url in info CLI"
+        run: |
+          python -W error -m spacy info ca_core_news_sm | grep -q download_url
+        if: matrix.python_version == '3.8'
+
+      - name: "Test no warnings on load (#11713)"
+        run: |
+          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
+        if: matrix.python_version == '3.8'
+
+      - name: "Test convert CLI"
+        run: |
+          python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
+        if: matrix.python_version == '3.8'
+
+      - name: "Test debug config CLI"
+        run: |
+          python -m spacy init config -p ner -l ca ner.cfg
+          python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
+        if: matrix.python_version == '3.8'
+
+      - name: "Test debug data CLI"
+        run: |
+          # will have errors due to sparse data, check for summary in output
+          python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
+        if: matrix.python_version == '3.8'
+
+      - name: "Test train CLI"
+        run: |
+          python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
+        if: matrix.python_version == '3.8'
+
+      - name: "Test assemble CLI"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
+          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+        if: matrix.python_version == '3.8'
+
+      - name: "Test assemble CLI vectors warning"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
+          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
+        if: matrix.python_version == '3.8'
+
+      - name: "Install test requirements"
+        run: |
+          python -m pip install -U -r requirements.txt
+
+      - name: "Run CPU tests"
+        run: |
+          python -m pytest --pyargs spacy -W error
+
+      - name: "Run CPU tests with thinc-apple-ops"
+        run: |
+          python -m pip install 'spacy[apple]'
+          python -m pytest --pyargs spacy
+        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.8'
+
+      - run: |
+          python .github/validate_universe_json.py website/meta/universe.json
+        name: "Test website/meta/universe.json"
+        if: matrix.python_version == '3.8'

From 8ff90731610d63fbf53251f9730ad25e9d25799c Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 13 Mar 2023 14:21:17 +0100
Subject: [PATCH 07/18] CI: Move universe validation to validate job (#12406)

* CI: Move universe validation to validate job

* Fix indentation

* Update step name
---
 .github/azure-steps.yml     | 6 ------
 .github/workflows/tests.yml | 8 +++-----
 azure-pipelines.yml         | 3 +++
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index b2ccf3d81..1b8d81521 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -116,9 +116,3 @@ steps:
       python -m pytest --pyargs spacy
     displayName: "Run CPU tests with thinc-apple-ops"
     condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
-
-  - script: |
-      python .github/validate_universe_json.py website/meta/universe.json
-    displayName: 'Test website/meta/universe.json'
-    condition: eq(variables['python_version'], '3.8')
-
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index ad380d39a..b04e2a8c0 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -58,6 +58,9 @@ jobs:
         run: |
           python -m pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+      - name: Validate website/meta/universe.json
+        run: |
+          python .github/validate_universe_json.py website/meta/universe.json
   tests:
     name: Test
     needs: Validate
@@ -188,8 +191,3 @@ jobs:
           python -m pip install 'spacy[apple]'
           python -m pytest --pyargs spacy
         if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.8'
-
-      - run: |
-          python .github/validate_universe_json.py website/meta/universe.json
-        name: "Test website/meta/universe.json"
-        if: matrix.python_version == '3.8'
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index dba11bd1a..83c57a164 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -48,6 +48,9 @@ jobs:
           pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
         displayName: "flake8"
+      - script: |
+          python .github/validate_universe_json.py website/meta/universe.json
+        displayName: 'Validate website/meta/universe.json'
 
   - job: "Test"
     dependsOn: "Validate"

From 8f1280a514ea3014ae3f8b334608028a11caca57 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 13 Mar 2023 15:10:04 +0100
Subject: [PATCH 08/18] Fix thinc-apple-ops test to run for python 3.11
 (#12408)

---
 .github/workflows/tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index b04e2a8c0..880c09128 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -190,4 +190,4 @@ jobs:
         run: |
           python -m pip install 'spacy[apple]'
           python -m pytest --pyargs spacy
-        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.8'
+        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
\ No newline at end of file

From ea6de64596076c2fb35c322c6b8a821e05b2de1c Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Mon, 13 Mar 2023 15:14:46 +0100
Subject: [PATCH 09/18] CI: Move CLI tests to ubuntu for speed (#12409)

---
 .github/azure-steps.yml     | 18 +++++++++---------
 .github/workflows/tests.yml | 18 +++++++++---------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 1b8d81521..20d4582cb 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -57,51 +57,51 @@ steps:
       python -m spacy download ca_core_news_md
       python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
     displayName: 'Test download CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -W error -m spacy info ca_core_news_sm | grep -q download_url
     displayName: 'Test download_url in info CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
     displayName: 'Test no warnings on load (#11713)'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
     displayName: 'Test convert CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy init config -p ner -l ca ner.cfg
       python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
     displayName: 'Test debug config CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       # will have errors due to sparse data, check for summary in output
       python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
     displayName: 'Test debug data CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
     displayName: 'Test train CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
       PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
     displayName: 'Test assemble CLI'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
       python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
     displayName: 'Test assemble CLI vectors warning'
-    condition: eq(variables['python_version'], '3.8')
+    condition: eq(variables['python_version'], '3.9')
 
   - script: |
       python -m pip install -U -r requirements.txt
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 880c09128..e51bb6c17 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -132,51 +132,51 @@ jobs:
           python -m spacy download ca_core_news_sm
           python -m spacy download ca_core_news_md
           python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test download_url in info CLI"
         run: |
           python -W error -m spacy info ca_core_news_sm | grep -q download_url
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test no warnings on load (#11713)"
         run: |
           python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test convert CLI"
         run: |
           python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test debug config CLI"
         run: |
           python -m spacy init config -p ner -l ca ner.cfg
           python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test debug data CLI"
         run: |
           # will have errors due to sparse data, check for summary in output
           python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test train CLI"
         run: |
           python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test assemble CLI"
         run: |
           python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
           PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Test assemble CLI vectors warning"
         run: |
           python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
           python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
-        if: matrix.python_version == '3.8'
+        if: matrix.python_version == '3.9'
 
       - name: "Install test requirements"
         run: |

From e8cab4625c12666ef599f19eb60403500af2a385 Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 14 Mar 2023 10:21:53 +0100
Subject: [PATCH 10/18] Fix sentence indexing bug in `Span.sents` (#12405)

* Add test for partial sentences in ent.sents.

* Removed unneeded import.

* Format. Simplify code.
---
 spacy/tests/doc/test_span.py | 16 ++++++++++++++++
 spacy/tokens/span.pyx        |  5 ++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/spacy/tests/doc/test_span.py b/spacy/tests/doc/test_span.py
index b4631037a..adef5922f 100644
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@@ -700,3 +700,19 @@ def test_span_group_copy(doc):
     assert len(doc.spans["test"]) == 3
     # check that the copy spans were not modified and this is an isolated doc
     assert len(doc_copy.spans["test"]) == 2
+
+
+def test_for_partial_ent_sents():
+    """Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences,
+    which this tests for.
+    """
+    doc = Doc(
+        English().vocab,
+        words=["Mahler's", "Symphony", "No.", "8", "was", "beautiful."],
+        sent_starts=[1, 0, 0, 1, 0, 0],
+    )
+    doc.set_ents([Span(doc, 1, 4, "WORK")])
+    # The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be
+    # equal to the sentences referenced in ent.sents.
+    for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
+        assert doc_sent == ent_sent
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index cfe1236df..7750b16ed 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -460,9 +460,8 @@ cdef class Span:
                     start = i
                     if start >= self.end:
                         break
-            if start < self.end:
-                yield Span(self.doc, start, self.end)
-
+                elif i == self.doc.length - 1:
+                    yield Span(self.doc, start, self.doc.length)
 
     @property
     def ents(self):

From 377f601bff321519149bec361801b5965e0d0c7a Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 14 Mar 2023 16:06:08 +0100
Subject: [PATCH 11/18] CI: Add all paths before excluding patterns (#12419)

---
 .github/workflows/tests.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index e51bb6c17..c18f9cd23 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -22,6 +22,7 @@ on:
   pull_request:
     types: [opened, synchronize, reopened, edited]
     paths:
+      - "**"
       - "!*.md"
       - "!*.mdx"
       - "!website/docs/**"
@@ -190,4 +191,4 @@ jobs:
         run: |
           python -m pip install 'spacy[apple]'
           python -m pytest --pyargs spacy
-        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
\ No newline at end of file
+        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'

From 2ce9a220dbd30d3a79c2a232230204a102fb3f1d Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Tue, 14 Mar 2023 17:16:49 +0100
Subject: [PATCH 12/18] Fix --verbose for spacy find-threshold (#12418)

---
 spacy/cli/find_threshold.py |  2 +-
 website/docs/api/cli.mdx    | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py
index efa664832..6d591053d 100644
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@@ -35,7 +35,7 @@ def find_threshold_cli(
     code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
     use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
     gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
+    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
     # fmt: on
 ):
     """
diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx
index 3f31bef95..2bb0199fc 100644
--- a/website/docs/api/cli.mdx
+++ b/website/docs/api/cli.mdx
@@ -1254,19 +1254,19 @@ be provided.
 > $ python -m spacy find-threshold my_nlp data.spacy spancat threshold spans_sc_f
 > ```
 
-| Name                    | Description                                                                                                                                                                          |
-| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                 | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
-| `data_path`             | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
-| `pipe_name`             | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
-| `threshold_key`         | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
-| `scores_key`            | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
-| `--n_trials`, `-n`      | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
-| `--code`, `-c`          | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
-| `--gpu-id`, `-g`        | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--gold-preproc`, `-G`  | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
-| `--silent`, `-V`, `-VV` | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
-| `--help`, `-h`          | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
+| Name                     | Description                                                                                                                                                                          |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                  | Pipeline to evaluate. Can be a package or a path to a data directory. ~~str (positional)~~                                                                                           |
+| `data_path`              | Path to file with DocBin with docs to use for threshold search. ~~Path (positional)~~                                                                                                |
+| `pipe_name`              | Name of pipe to examine thresholds for. ~~str (positional)~~                                                                                                                         |
+| `threshold_key`          | Key of threshold attribute in component's configuration. ~~str (positional)~~                                                                                                        |
+| `scores_key`             | Name of score to metric to optimize. ~~str (positional)~~                                                                                                                            |
+| `--n_trials`, `-n`       | Number of trials to determine optimal thresholds. ~~int (option)~~                                                                                                                   |
+| `--code`, `-c`           | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
+| `--gpu-id`, `-g`         | GPU to use, if any. Defaults to `-1` for CPU. ~~int (option)~~                                                                                                                       |
+| `--gold-preproc`, `-G`   | Use gold preprocessing. ~~bool (flag)~~                                                                                                                                              |
+| `--verbose`, `-V`, `-VV` | Display more information for debugging purposes. ~~bool (flag)~~                                                                                                                     |
+| `--help`, `-h`           | Show help message and available arguments. ~~bool (flag)~~                                                                                                                           |
 
 ## assemble {id="assemble",tag="command"}
 

From 96b61d06712e8de7dd59a6ab8ae153893e5d7d2c Mon Sep 17 00:00:00 2001
From: Raphael Mitsch <r.mitsch@outlook.com>
Date: Tue, 14 Mar 2023 22:02:49 +0100
Subject: [PATCH 13/18] Fix EL failure with sentence-crossing entities (#12398)

* Add test reproducing EL failure in sentence-crossing entities.

* Format.

* Draft fix.

* Format.

* Fix case for len(ent.sents) == 1.

* Format.

* Format.

* Format.

* Fix mypy error.

* Merge EL sentence crossing tests.

* Remove unneeded sentencizer component.

* Fix or ignore mypy issues in test.

* Simplify ent.sents handling.

* Format. Update assert in ent.sents handling.

* Small rewrite

---------

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 spacy/pipeline/entity_linker.py            | 14 ++++--
 spacy/tests/pipeline/test_entity_linker.py | 50 ++++++++--------------
 2 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py
index f2dae0529..76ccc3247 100644
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@@ -474,18 +474,24 @@ class EntityLinker(TrainablePipe):
 
                 # Looping through each entity in batch (TODO: rewrite)
                 for j, ent in enumerate(ent_batch):
-                    sent_index = sentences.index(ent.sent)
-                    assert sent_index >= 0
+                    assert hasattr(ent, "sents")
+                    sents = list(ent.sents)
+                    sent_indices = (
+                        sentences.index(sents[0]),
+                        sentences.index(sents[-1]),
+                    )
+                    assert sent_indices[1] >= sent_indices[0] >= 0
 
                     if self.incl_context:
                         # get n_neighbour sentences, clipped to the length of the document
-                        start_sentence = max(0, sent_index - self.n_sents)
+                        start_sentence = max(0, sent_indices[0] - self.n_sents)
                         end_sentence = min(
-                            len(sentences) - 1, sent_index + self.n_sents
+                            len(sentences) - 1, sent_indices[1] + self.n_sents
                         )
                         start_token = sentences[start_sentence].start
                         end_token = sentences[end_sentence].end
                         sent_doc = doc[start_token:end_token].as_doc()
+
                         # currently, the context is the same for each entity in a sentence (should be refined)
                         sentence_encoding = self.model.predict([sent_doc])[0]
                         sentence_encoding_t = sentence_encoding.T
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 2a6258386..fc960cb01 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1,9 +1,9 @@
-from typing import Callable, Iterable, Dict, Any
+from typing import Callable, Iterable, Dict, Any, Tuple
 
 import pytest
 from numpy.testing import assert_equal
 
-from spacy import registry, util
+from spacy import registry, util, Language
 from spacy.attrs import ENT_KB_ID
 from spacy.compat import pickle
 from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
@@ -108,18 +108,23 @@ def test_issue7065():
 
 
 @pytest.mark.issue(7065)
-def test_issue7065_b():
+@pytest.mark.parametrize("entity_in_first_sentence", [True, False])
+def test_sentence_crossing_ents(entity_in_first_sentence: bool):
+    """Tests if NEL crashes if entities cross sentence boundaries and the first associated sentence doesn't have an
+    entity.
+    entity_in_prior_sentence (bool): Whether to include an entity in the first sentence associated with the
+    sentence-crossing entity.
+    """
     # Test that the NEL doesn't crash when an entity crosses a sentence boundary
     nlp = English()
     vector_length = 3
-    nlp.add_pipe("sentencizer")
     text = "Mahler 's Symphony No. 8 was beautiful."
-    entities = [(0, 6, "PERSON"), (10, 24, "WORK")]
-    links = {
-        (0, 6): {"Q7304": 1.0, "Q270853": 0.0},
-        (10, 24): {"Q7304": 0.0, "Q270853": 1.0},
-    }
-    sent_starts = [1, -1, 0, 0, 0, 0, 0, 0, 0]
+    entities = [(10, 24, "WORK")]
+    links = {(10, 24): {"Q7304": 0.0, "Q270853": 1.0}}
+    if entity_in_first_sentence:
+        entities.append((0, 6, "PERSON"))
+        links[(0, 6)] = {"Q7304": 1.0, "Q270853": 0.0}
+    sent_starts = [1, -1, 0, 0, 0, 1, 0, 0, 0]
     doc = nlp(text)
     example = Example.from_dict(
         doc, {"entities": entities, "links": links, "sent_starts": sent_starts}
@@ -145,31 +150,14 @@ def test_issue7065_b():
 
     # Create the Entity Linker component and add it to the pipeline
     entity_linker = nlp.add_pipe("entity_linker", last=True)
-    entity_linker.set_kb(create_kb)
+    entity_linker.set_kb(create_kb)  # type: ignore
     # train the NEL pipe
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
     for i in range(2):
-        losses = {}
-        nlp.update(train_examples, sgd=optimizer, losses=losses)
+        nlp.update(train_examples, sgd=optimizer)
 
-    # Add a custom rule-based component to mimick NER
-    patterns = [
-        {"label": "PERSON", "pattern": [{"LOWER": "mahler"}]},
-        {
-            "label": "WORK",
-            "pattern": [
-                {"LOWER": "symphony"},
-                {"LOWER": "no"},
-                {"LOWER": "."},
-                {"LOWER": "8"},
-            ],
-        },
-    ]
-    ruler = nlp.add_pipe("entity_ruler", before="entity_linker")
-    ruler.add_patterns(patterns)
-    # test the trained model - this should not throw E148
-    doc = nlp(text)
-    assert doc
+    # This shouldn't crash.
+    entity_linker.predict([example.reference])  # type: ignore
 
 
 def test_no_entities():

From 4c5a3a2a7ba19ce71023e9d34ad66d005ab74000 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 17 Mar 2023 09:35:00 +0100
Subject: [PATCH 14/18] Remove autoblack workflow (#12437)

Now that all PRs have `black` formatting validation, we no longer need the
autoblack workflow.
---
 .github/workflows/autoblack.yml | 45 ---------------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 .github/workflows/autoblack.yml

diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml
deleted file mode 100644
index 555322782..000000000
--- a/.github/workflows/autoblack.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-# GitHub Action that uses Black to reformat all Python code and submits a PR
-# in regular intervals. Inspired by: https://github.com/cclauss/autoblack
-
-name: autoblack
-on:
-  workflow_dispatch:  # allow manual trigger
-  schedule:
-    - cron: '0 8 * * 5'  # every Friday at 8am UTC
-
-jobs:
-  autoblack:
-    if: github.repository_owner == 'explosion'
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-            ref: ${{ github.head_ref }}
-      - uses: actions/setup-python@v4
-      - run: pip install black -c requirements.txt
-      - name: Auto-format code if needed
-        run: black spacy
-      # We can't run black --check here because that returns a non-zero excit
-      # code and makes GitHub think the action failed
-      - name: Check for modified files
-        id: git-check
-        run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
-
-      - name: Create Pull Request
-        if: steps.git-check.outputs.modified == 'true'
-        uses: peter-evans/create-pull-request@v4
-        with:
-            title: Auto-format code with black
-            labels: meta
-            commit-message: Auto-format code with black
-            committer: GitHub <noreply@github.com>
-            author: explosion-bot <explosion-bot@users.noreply.github.com>
-            body: _This PR is auto-generated._
-            branch: autoblack
-            delete-branch: true
-            draft: false
-      - name: Check outputs
-        if: steps.git-check.outputs.modified == 'true'
-        run: |
-          echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
-          echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"

From 5f72d6c83605d391550c3a992d7e772440501b07 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 17 Mar 2023 10:01:49 +0100
Subject: [PATCH 15/18] CI: Switch PR back to paths-ignore (#12438)

Switch PR tests back to paths-ignore but include changes to `.github`
for all PRs rather than trying to figure out complicated
includes+excludes.  Changes to `.github` are relatively rare and should
not be a huge burden for the CI.
---
 .github/workflows/tests.yml | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c18f9cd23..eef24ff33 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -21,21 +21,18 @@ on:
       - ".github/workflows/**"
   pull_request:
     types: [opened, synchronize, reopened, edited]
-    paths:
-      - "**"
-      - "!*.md"
-      - "!*.mdx"
-      - "!website/docs/**"
-      - "!website/src/**"
-      - "!website/meta/*.tsx"
-      - "!website/meta/*.mjs"
-      - "!website/meta/languages.json"
-      - "!website/meta/site.json"
-      - "!website/meta/sidebars.json"
-      - "!website/meta/type-annotations.json"
-      - "!website/pages/**"
-      - "!.github/workflows/**"
-      - ".github/workflows/tests.yml"
+    paths-ignore:
+      - "*.md"
+      - "*.mdx"
+      - "website/docs/**"
+      - "website/src/**"
+      - "website/meta/*.tsx"
+      - "website/meta/*.mjs"
+      - "website/meta/languages.json"
+      - "website/meta/site.json"
+      - "website/meta/sidebars.json"
+      - "website/meta/type-annotations.json"
+      - "website/pages/**"
 
 jobs:
   validate:

From 54c614e116cb6d97f6fedab4bbbb49e43aa0696f Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Fri, 17 Mar 2023 10:59:53 +0100
Subject: [PATCH 16/18] CI: Separate spacy universe validation into a separate
 workflow (#12440)

* Separate spacy universe validation into a separate workflow

* Fix new workflow name
---
 .github/workflows/tests.yml               | 23 ++--------------
 .github/workflows/universe_validation.yml | 32 +++++++++++++++++++++++
 2 files changed, 34 insertions(+), 21 deletions(-)
 create mode 100644 .github/workflows/universe_validation.yml

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index eef24ff33..41ea6ce50 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,30 +9,14 @@ on:
     paths-ignore:
       - "*.md"
       - "*.mdx"
-      - "website/docs/**"
-      - "website/src/**"
-      - "website/meta/*.tsx"
-      - "website/meta/*.mjs"
-      - "website/meta/languages.json"
-      - "website/meta/site.json"
-      - "website/meta/sidebars.json"
-      - "website/meta/type-annotations.json"
-      - "website/pages/**"
+      - "website/**"
       - ".github/workflows/**"
   pull_request:
     types: [opened, synchronize, reopened, edited]
     paths-ignore:
       - "*.md"
       - "*.mdx"
-      - "website/docs/**"
-      - "website/src/**"
-      - "website/meta/*.tsx"
-      - "website/meta/*.mjs"
-      - "website/meta/languages.json"
-      - "website/meta/site.json"
-      - "website/meta/sidebars.json"
-      - "website/meta/type-annotations.json"
-      - "website/pages/**"
+      - "website/**"
 
 jobs:
   validate:
@@ -56,9 +40,6 @@ jobs:
         run: |
           python -m pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
-      - name: Validate website/meta/universe.json
-        run: |
-          python .github/validate_universe_json.py website/meta/universe.json
   tests:
     name: Test
     needs: Validate
diff --git a/.github/workflows/universe_validation.yml b/.github/workflows/universe_validation.yml
new file mode 100644
index 000000000..f9e317aaa
--- /dev/null
+++ b/.github/workflows/universe_validation.yml
@@ -0,0 +1,32 @@
+name: universe validation
+
+on:
+  push:
+    branches-ignore:
+      - "spacy.io"
+      - "nightly.spacy.io"
+      - "v2.spacy.io"
+    paths:
+      - "website/meta/universe.json"
+  pull_request:
+    types: [opened, synchronize, reopened, edited]
+    paths:
+      - "website/meta/universe.json"
+
+jobs:
+  validate:
+    name: Validate
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v3
+
+      - name: Configure Python version
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+          architecture: x64
+
+      - name: Validate website/meta/universe.json
+        run: |
+          python .github/validate_universe_json.py website/meta/universe.json

From b479f8bfa59bc6f3c398c0a3ff89eb672cd92b8b Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 22 Mar 2023 11:09:37 +0100
Subject: [PATCH 17/18] Add user survey alert to the top (#12452)

* Add user survey alert to the top

* Shorter

---------

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
---
 website/src/styles/navigation.module.sass |  7 ++++---
 website/src/templates/index.js            | 15 ++++++++-------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/website/src/styles/navigation.module.sass b/website/src/styles/navigation.module.sass
index da5c18b6f..3adc5cd03 100644
--- a/website/src/styles/navigation.module.sass
+++ b/website/src/styles/navigation.module.sass
@@ -111,11 +111,12 @@
     line-height: var(--line-height-xs)
     text-align: center
 
-@include breakpoint(max, xs)
-    .list
+@include breakpoint(max, md)
+    .alert
         display: none
 
-    .alert
+@include breakpoint(max, xs)
+    .list
         display: none
 
     .has-alert
diff --git a/website/src/templates/index.js b/website/src/templates/index.js
index 2ee29a9e9..4c10e09c5 100644
--- a/website/src/templates/index.js
+++ b/website/src/templates/index.js
@@ -25,11 +25,6 @@ const AlertSpace = ({ nightly, legacy }) => {
     const isOnline = useOnlineStatus()
     return (
         <>
-            {isOnline && (
-                <Alert title="💥 We'd love to learn more about your experience with spaCy!">
-                    <Link to="https://form.typeform.com/to/aMel9q9f">Take our survey here.</Link>
-                </Alert>
-            )}
             {nightly && (
                 <Alert
                     title="You're viewing the pre-release docs."
@@ -62,9 +57,15 @@ const AlertSpace = ({ nightly, legacy }) => {
     )
 }
 
+// const navAlert = (
+//     <Link to="/usage/v3-5" noLinkLayout>
+//         <strong>💥 Out now:</strong> spaCy v3.5
+//     </Link>
+// )
+
 const navAlert = (
-    <Link to="/usage/v3-5" noLinkLayout>
-        <strong>💥 Out now:</strong> spaCy v3.5
+    <Link to="https://form.typeform.com/to/aMel9q9f" noLinkLayout>
+        <strong>💥 Take the user survey!</strong>
     </Link>
 )
 

From 28de85737fa2e1c6f04c830d3bc41961f712ce77 Mon Sep 17 00:00:00 2001
From: Vinit Ravishankar <vinit.ravishankar@gmail.com>
Date: Wed, 22 Mar 2023 12:17:56 +0100
Subject: [PATCH 18/18] Tagger label smoothing (#12293)

* add label smoothing

* use True/False instead of floats

* add entropy to debug data

* formatting

* docs

* change test to check difference in distributions

* Update website/docs/api/tagger.mdx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* Update spacy/pipeline/tagger.pyx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* bool -> float

* update docs

* fix seed

* black

* update tests to use label_smoothing = 0.0

* set default to 0.0, update quickstart

* Update spacy/pipeline/tagger.pyx

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>

* update morphologizer, tagger test

* fix morph docs

* add url to docs

---------

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
---
 spacy/cli/debug_data.py                       |  9 +++++--
 spacy/cli/templates/quickstart_training.jinja |  2 ++
 spacy/pipeline/morphologizer.pyx              | 11 +++++---
 spacy/pipeline/tagger.pyx                     | 10 ++++---
 spacy/tests/pipeline/test_morphologizer.py    | 27 ++++++++++++++++++-
 spacy/tests/pipeline/test_tagger.py           | 25 ++++++++++++++++-
 website/docs/api/morphologizer.mdx            | 13 ++++-----
 website/docs/api/tagger.mdx                   | 13 ++++-----
 8 files changed, 87 insertions(+), 23 deletions(-)

diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index f20673f25..97b4db285 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -7,6 +7,7 @@ import srsly
 from wasabi import Printer, MESSAGES, msg
 import typer
 import math
+import numpy
 
 from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
 from ._util import import_code, debug_cli, _format_number
@@ -521,9 +522,13 @@ def debug_data(
 
     if "tagger" in factory_names:
         msg.divider("Part-of-speech Tagging")
-        label_list = [label for label in gold_train_data["tags"]]
-        model_labels = _get_labels_from_model(nlp, "tagger")
+        label_list, counts = zip(*gold_train_data["tags"].items())
         msg.info(f"{len(label_list)} label(s) in train data")
+        p = numpy.array(counts)
+        p = p / p.sum()
+        norm_entropy = (-p * numpy.log2(p)).sum() / numpy.log2(len(label_list))
+        msg.info(f"{norm_entropy} is the normalised label entropy")
+        model_labels = _get_labels_from_model(nlp, "tagger")
         labels = set(label_list)
         missing_labels = model_labels - labels
         if missing_labels:
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index c5e8c6c43..9481e53be 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -331,6 +331,7 @@ maxout_pieces = 3
 {% if "morphologizer" in components %}
 [components.morphologizer]
 factory = "morphologizer"
+label_smoothing = 0.05
 
 [components.morphologizer.model]
 @architectures = "spacy.Tagger.v2"
@@ -344,6 +345,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "tagger" in components %}
 [components.tagger]
 factory = "tagger"
+label_smoothing = 0.05
 
 [components.tagger.model]
 @architectures = "spacy.Tagger.v2"
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 24f98508f..be8f82212 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -52,7 +52,8 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "morphologizer",
     assigns=["token.morph", "token.pos"],
-    default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False, "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}},
+    default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False,
+                    "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, "label_smoothing": 0.0},
     default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
 )
 def make_morphologizer(
@@ -61,9 +62,10 @@ def make_morphologizer(
     name: str,
     overwrite: bool,
     extend: bool,
+    label_smoothing: float,
     scorer: Optional[Callable],
 ):
-    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer)
+    return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, label_smoothing=label_smoothing, scorer=scorer)
 
 
 def morphologizer_score(examples, **kwargs):
@@ -94,6 +96,7 @@ class Morphologizer(Tagger):
         *,
         overwrite: bool = BACKWARD_OVERWRITE,
         extend: bool = BACKWARD_EXTEND,
+        label_smoothing: float = 0.0,
         scorer: Optional[Callable] = morphologizer_score,
     ):
         """Initialize a morphologizer.
@@ -121,6 +124,7 @@ class Morphologizer(Tagger):
             "labels_pos": {},
             "overwrite": overwrite,
             "extend": extend,
+            "label_smoothing": label_smoothing,
         }
         self.cfg = dict(sorted(cfg.items()))
         self.scorer = scorer
@@ -270,7 +274,8 @@ class Morphologizer(Tagger):
         DOCS: https://spacy.io/api/morphologizer#get_loss
         """
         validate_examples(examples, "Morphologizer.get_loss")
-        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False,
+                                                    label_smoothing=self.cfg["label_smoothing"])
         truths = []
         for eg in examples:
             eg_truths = []
diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx
index d6ecbf084..4d5d78035 100644
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@@ -45,7 +45,7 @@ DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]
 @Language.factory(
     "tagger",
     assigns=["token.tag"],
-    default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!"},
+    default_config={"model": DEFAULT_TAGGER_MODEL, "overwrite": False, "scorer": {"@scorers": "spacy.tagger_scorer.v1"}, "neg_prefix": "!", "label_smoothing": 0.0},
     default_score_weights={"tag_acc": 1.0},
 )
 def make_tagger(
@@ -55,6 +55,7 @@ def make_tagger(
     overwrite: bool,
     scorer: Optional[Callable],
     neg_prefix: str,
+    label_smoothing: float,
 ):
     """Construct a part-of-speech tagger component.
 
@@ -63,7 +64,7 @@ def make_tagger(
         in size, and be normalized as probabilities (all scores between 0 and 1,
         with the rows summing to 1).
     """
-    return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix)
+    return Tagger(nlp.vocab, model, name, overwrite=overwrite, scorer=scorer, neg_prefix=neg_prefix, label_smoothing=label_smoothing)
 
 
 def tagger_score(examples, **kwargs):
@@ -89,6 +90,7 @@ class Tagger(TrainablePipe):
         overwrite=BACKWARD_OVERWRITE,
         scorer=tagger_score,
         neg_prefix="!",
+        label_smoothing=0.0,
     ):
         """Initialize a part-of-speech tagger.
 
@@ -105,7 +107,7 @@ class Tagger(TrainablePipe):
         self.model = model
         self.name = name
         self._rehearsal_model = None
-        cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix}
+        cfg = {"labels": [], "overwrite": overwrite, "neg_prefix": neg_prefix, "label_smoothing": label_smoothing}
         self.cfg = dict(sorted(cfg.items()))
         self.scorer = scorer
 
@@ -256,7 +258,7 @@ class Tagger(TrainablePipe):
         DOCS: https://spacy.io/api/tagger#get_loss
         """
         validate_examples(examples, "Tagger.get_loss")
-        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"])
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False, neg_prefix=self.cfg["neg_prefix"], label_smoothing=self.cfg["label_smoothing"])
         # Convert empty tag "" to missing value None so that both misaligned
         # tokens and tokens with missing annotation have the default missing
         # value None.
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 33696bfd8..8ce74ccfa 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -1,5 +1,5 @@
 import pytest
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_almost_equal
 
 from spacy import util
 from spacy.training import Example
@@ -19,6 +19,8 @@ def test_label_types():
         morphologizer.add_label(9)
 
 
+TAGS = ["Feat=N", "Feat=V", "Feat=J"]
+
 TRAIN_DATA = [
     (
         "I like green eggs",
@@ -32,6 +34,29 @@ TRAIN_DATA = [
 ]
 
 
+def test_label_smoothing():
+    nlp = Language()
+    morph_no_ls = nlp.add_pipe("morphologizer", "no_label_smoothing")
+    morph_ls = nlp.add_pipe(
+        "morphologizer", "label_smoothing", config=dict(label_smoothing=0.05)
+    )
+    train_examples = []
+    losses = {}
+    for tag in TAGS:
+        morph_no_ls.add_label(tag)
+        morph_ls.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    nlp.initialize(get_examples=lambda: train_examples)
+    tag_scores, bp_tag_scores = morph_ls.model.begin_update(
+        [eg.predicted for eg in train_examples]
+    )
+    no_ls_grads = morph_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads = morph_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_almost_equal(ls_grads / no_ls_grads, 0.94285715)
+
+
 def test_no_label():
     nlp = Language()
     nlp.add_pipe("morphologizer")
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 96e75851e..0cc25a64b 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,5 +1,5 @@
 import pytest
-from numpy.testing import assert_equal
+from numpy.testing import assert_equal, assert_almost_equal
 from spacy.attrs import TAG
 
 from spacy import util
@@ -67,6 +67,29 @@ PARTIAL_DATA = [
 ]
 
 
+def test_label_smoothing():
+    nlp = Language()
+    tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing")
+    tagger_ls = nlp.add_pipe(
+        "tagger", "label_smoothing", config=dict(label_smoothing=0.05)
+    )
+    train_examples = []
+    losses = {}
+    for tag in TAGS:
+        tagger_no_ls.add_label(tag)
+        tagger_ls.add_label(tag)
+    for t in TRAIN_DATA:
+        train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
+
+    nlp.initialize(get_examples=lambda: train_examples)
+    tag_scores, bp_tag_scores = tagger_ls.model.begin_update(
+        [eg.predicted for eg in train_examples]
+    )
+    no_ls_grads = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
+    ls_grads = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
+    assert_almost_equal(ls_grads / no_ls_grads, 0.925)
+
+
 def test_no_label():
     nlp = Language()
     nlp.add_pipe("tagger")
diff --git a/website/docs/api/morphologizer.mdx b/website/docs/api/morphologizer.mdx
index f097f2ae3..8f189d129 100644
--- a/website/docs/api/morphologizer.mdx
+++ b/website/docs/api/morphologizer.mdx
@@ -42,12 +42,13 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("morphologizer", config=config)
 > ```
 
-| Setting                                  | Description                                                                                                                                                                                                                                                            |
-| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `model`                                  | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
-| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
-| `extend` <Tag variant="new">3.2</Tag>    | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
-| `scorer` <Tag variant="new">3.2</Tag>    | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
+| Setting                                        | Description                                                                                                                                                                                                                                                            |
+| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`                                        | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                |
+| `overwrite` <Tag variant="new">3.2</Tag>       | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~                                                                                                                                                                                  |
+| `extend` <Tag variant="new">3.2</Tag>          | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~                                                                                                                      |
+| `scorer` <Tag variant="new">3.2</Tag>          | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
+| `label_smoothing` <Tag variant="new">3.6</Tag> | [Label smoothing](https://arxiv.org/abs/1906.02629) factor. Defaults to `0.0`. ~~float~~                                                                                                                                                                               |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
diff --git a/website/docs/api/tagger.mdx b/website/docs/api/tagger.mdx
index ee38de81c..d9b0506fb 100644
--- a/website/docs/api/tagger.mdx
+++ b/website/docs/api/tagger.mdx
@@ -40,12 +40,13 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("tagger", config=config)
 > ```
 
-| Setting                                     | Description                                                                                                                                                                                                                                                                                            |
-| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `model`                                     | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
-| `overwrite` <Tag variant="new">3.2</Tag>    | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              |
-| `scorer` <Tag variant="new">3.2</Tag>       | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            |
-| `neg_prefix` <Tag variant="new">3.2.1</Tag> | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              |
+| Setting                                        | Description                                                                                                                                                                                                                                                                                            |
+| ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `model`                                        | A model instance that predicts the tag probabilities. The output vectors should match the number of tags in size, and be normalized as probabilities (all scores between 0 and 1, with the rows summing to `1`). Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
+| `overwrite` <Tag variant="new">3.2</Tag>       | Whether existing annotation is overwritten. Defaults to `False`. ~~bool~~                                                                                                                                                                                                                              |
+| `scorer` <Tag variant="new">3.2</Tag>          | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Optional[Callable]~~                                                                                                                                                            |
+| `neg_prefix` <Tag variant="new">3.2.1</Tag>    | The prefix used to specify incorrect tags while training. The tagger will learn not to predict exactly this tag. Defaults to `!`. ~~str~~                                                                                                                                                              |
+| `label_smoothing` <Tag variant="new">3.6</Tag> | [Label smoothing](https://arxiv.org/abs/1906.02629) factor. Defaults to `0.0`. ~~float~~                                                                                                                                                                                                               |
 
 ```python
 %%GITHUB_SPACY/spacy/pipeline/tagger.pyx