diff --git a/spacy/about.py b/spacy/about.py
index c6b09039e..7c0a59b4e 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.6.0.dev0"
+__version__ = "3.6.0.dev1"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 9481e53be..e3ca73cfb 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -3,7 +3,7 @@ the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
 {%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
-{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
+{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "span_finder", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
 [paths]
 train = null
 dev = null
@@ -28,7 +28,7 @@ lang = "{{ lang }}"
 tok2vec/transformer. #}
 {%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
 {%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
+{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "span_finder" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@@ -127,6 +127,30 @@ grad_factor = 1.0
 @layers = "reduce_mean.v1"
 {% endif -%}
 
+{% if "span_finder" in components -%}
+[components.span_finder]
+factory = "span_finder"
+max_length = null
+min_length = null
+scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
+spans_key = "sc"
+threshold = 0.5
+
+[components.span_finder.model]
+@architectures = "spacy.SpanFinder.v1"
+
+[components.span_finder.model.scorer]
+@layers = "spacy.LinearLogistic.v1"
+nO = 2
+
+[components.span_finder.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.span_finder.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+{% endif -%}
+
 {% if "spancat" in components -%}
 [components.spancat]
 factory = "spancat"
@@ -392,6 +416,27 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 {% endif %}
 
+{% if "span_finder" in components %}
+[components.span_finder]
+factory = "span_finder"
+max_length = null
+min_length = null
+scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
+spans_key = "sc"
+threshold = 0.5
+
+[components.span_finder.model]
+@architectures = "spacy.SpanFinder.v1"
+
+[components.span_finder.model.scorer]
+@layers = "spacy.LinearLogistic.v1"
+nO = 2
+
+[components.span_finder.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+{% endif %}
+
 {% if "spancat" in components %}
 [components.spancat]
 factory = "spancat"
diff --git a/spacy/errors.py b/spacy/errors.py
index 40cfa8d92..928c3be90 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -970,6 +970,13 @@ class Errors(metaclass=ErrorsWithCodes):
     E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
              "or use `auto_select_port=True` to pick an available port automatically.")
     E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
+    E1052 = ("Unable to copy spans: the character offsets for the span at "
+             "index {i} in the span group do not align with the tokenization "
+             "in the target doc.")
+    E1053 = ("Both 'min_length' and 'max_length' should be larger than 0, but found"
+             " 'min_length': {min_length}, 'max_length': {max_length}")
+    E1054 = ("The text, including whitespace, must match between reference and "
+             "predicted docs when training {component}.")
 
 
 # Deprecated model shortcuts, only used in errors and warnings
diff --git a/spacy/language.py b/spacy/language.py
index 289e6dd2c..0e9ff6893 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,6 +1,6 @@
 from typing import Iterator, Optional, Any, Dict, Callable, Iterable
 from typing import Union, Tuple, List, Set, Pattern, Sequence
-from typing import NoReturn, TYPE_CHECKING, TypeVar, cast, overload
+from typing import NoReturn, TypeVar, cast, overload
 
 from dataclasses import dataclass
 import random
@@ -1269,7 +1269,10 @@ class Language:
                 "No 'get_examples' callback provided to 'Language.initialize', creating dummy examples"
             )
             doc = Doc(self.vocab, words=["x", "y", "z"])
-            get_examples = lambda: [Example.from_dict(doc, {})]
+
+            def get_examples():
+                return [Example.from_dict(doc, {})]
+
         if not hasattr(get_examples, "__call__"):
             err = Errors.E930.format(
                 method="Language.initialize", obj=type(get_examples)
diff --git a/spacy/ml/models/__init__.py b/spacy/ml/models/__init__.py
index 9b7628f0e..5125018e5 100644
--- a/spacy/ml/models/__init__.py
+++ b/spacy/ml/models/__init__.py
@@ -1,6 +1,7 @@
 from .entity_linker import *  # noqa
 from .multi_task import *  # noqa
 from .parser import *  # noqa
+from .span_finder import *  # noqa
 from .spancat import *  # noqa
 from .tagger import *  # noqa
 from .textcat import *  # noqa
diff --git a/spacy/ml/models/span_finder.py b/spacy/ml/models/span_finder.py
new file mode 100644
index 000000000..a805e2086
--- /dev/null
+++ b/spacy/ml/models/span_finder.py
@@ -0,0 +1,42 @@
+from typing import Callable, List, Tuple
+
+from thinc.api import Model, chain, with_array
+from thinc.types import Floats1d, Floats2d
+
+from ...tokens import Doc
+
+from ...util import registry
+
+InT = List[Doc]
+OutT = Floats2d
+
+
+@registry.architectures("spacy.SpanFinder.v1")
+def build_finder_model(
+    tok2vec: Model[InT, List[Floats2d]], scorer: Model[OutT, OutT]
+) -> Model[InT, OutT]:
+
+    logistic_layer: Model[List[Floats2d], List[Floats2d]] = with_array(scorer)
+    model: Model[InT, OutT] = chain(tok2vec, logistic_layer, flattener())
+    model.set_ref("tok2vec", tok2vec)
+    model.set_ref("scorer", scorer)
+    model.set_ref("logistic_layer", logistic_layer)
+
+    return model
+
+
+def flattener() -> Model[List[Floats2d], Floats2d]:
+    """Flattens the input to a 1-dimensional list of scores"""
+
+    def forward(
+        model: Model[Floats1d, Floats1d], X: List[Floats2d], is_train: bool
+    ) -> Tuple[Floats2d, Callable[[Floats2d], List[Floats2d]]]:
+        lens = model.ops.asarray1i([len(doc) for doc in X])
+        Y = model.ops.flatten(X)
+
+        def backprop(dY: Floats2d) -> List[Floats2d]:
+            return model.ops.unflatten(dY, lens)
+
+        return Y, backprop
+
+    return Model("Flattener", forward=forward)
diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 26931606b..40e3fd638 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -2,21 +2,22 @@ from .attributeruler import AttributeRuler
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
-from .ner import EntityRecognizer
 from .entityruler import EntityRuler
+from .functions import merge_entities, merge_noun_chunks, merge_subtokens
 from .lemmatizer import Lemmatizer
 from .morphologizer import Morphologizer
+from .ner import EntityRecognizer
 from .pipe import Pipe
-from .trainable_pipe import TrainablePipe
-from .senter import SentenceRecognizer
 from .sentencizer import Sentencizer
+from .senter import SentenceRecognizer
+from .span_finder import SpanFinder
+from .span_ruler import SpanRuler
+from .spancat import SpanCategorizer
 from .tagger import Tagger
 from .textcat import TextCategorizer
-from .spancat import SpanCategorizer
-from .span_ruler import SpanRuler
 from .textcat_multilabel import MultiLabel_TextCategorizer
 from .tok2vec import Tok2Vec
-from .functions import merge_entities, merge_noun_chunks, merge_subtokens
+from .trainable_pipe import TrainablePipe
 
 __all__ = [
     "AttributeRuler",
@@ -31,6 +32,7 @@ __all__ = [
     "SentenceRecognizer",
     "Sentencizer",
     "SpanCategorizer",
+    "SpanFinder",
     "SpanRuler",
     "Tagger",
     "TextCategorizer",
diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py
new file mode 100644
index 000000000..da3c38430
--- /dev/null
+++ b/spacy/pipeline/span_finder.py
@@ -0,0 +1,336 @@
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+
+from thinc.api import Config, Model, Optimizer, set_dropout_rate
+from thinc.types import Floats2d
+
+from ..language import Language
+from .trainable_pipe import TrainablePipe
+from ..scorer import Scorer
+from ..tokens import Doc, Span
+from ..training import Example
+from ..errors import Errors
+
+from ..util import registry
+from .spancat import DEFAULT_SPANS_KEY
+
+span_finder_default_config = """
+[model]
+@architectures = "spacy.SpanFinder.v1"
+
+[model.scorer]
+@layers = "spacy.LinearLogistic.v1"
+nO = 2
+
+[model.tok2vec]
+@architectures = "spacy.Tok2Vec.v2"
+
+[model.tok2vec.embed]
+@architectures = "spacy.MultiHashEmbed.v2"
+width = 96
+rows = [5000, 1000, 2500, 1000]
+attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
+include_static_vectors = false
+
+[model.tok2vec.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v2"
+width = ${model.tok2vec.embed.width}
+window_size = 1
+maxout_pieces = 3
+depth = 4
+"""
+
+DEFAULT_SPAN_FINDER_MODEL = Config().from_str(span_finder_default_config)["model"]
+
+
+@Language.factory(
+    "span_finder",
+    assigns=["doc.spans"],
+    default_config={
+        "threshold": 0.5,
+        "model": DEFAULT_SPAN_FINDER_MODEL,
+        "spans_key": DEFAULT_SPANS_KEY,
+        "max_length": None,
+        "min_length": None,
+        "scorer": {"@scorers": "spacy.span_finder_scorer.v1"},
+    },
+    default_score_weights={
+        f"span_finder_{DEFAULT_SPANS_KEY}_f": 1.0,
+        f"span_finder_{DEFAULT_SPANS_KEY}_p": 0.0,
+        f"span_finder_{DEFAULT_SPANS_KEY}_r": 0.0,
+    },
+)
+def make_span_finder(
+    nlp: Language,
+    name: str,
+    model: Model[Iterable[Doc], Floats2d],
+    spans_key: str,
+    threshold: float,
+    max_length: Optional[int],
+    min_length: Optional[int],
+    scorer: Optional[Callable],
+) -> "SpanFinder":
+    """Create a SpanFinder component. The component predicts whether a token is
+    the start or the end of a potential span.
+
+    model (Model[List[Doc], Floats2d]): A model instance that
+        is given a list of documents and predicts a probability for each token.
+    spans_key (str): Key of the doc.spans dict to save the spans under. During
+        initialization and training, the component will look for spans on the
+        reference document under the same key.
+    threshold (float): Minimum probability to consider a prediction positive.
+    max_length (Optional[int]): Maximum length of the produced spans, defaults
+        to None meaning unlimited length.
+    min_length (Optional[int]): Minimum length of the produced spans, defaults
+        to None meaning shortest span length is 1.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
+    """
+    return SpanFinder(
+        nlp,
+        model=model,
+        threshold=threshold,
+        name=name,
+        scorer=scorer,
+        max_length=max_length,
+        min_length=min_length,
+        spans_key=spans_key,
+    )
+
+
+@registry.scorers("spacy.span_finder_scorer.v1")
+def make_span_finder_scorer():
+    return span_finder_score
+
+
+def span_finder_score(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+    kwargs = dict(kwargs)
+    attr_prefix = "span_finder_"
+    key = kwargs["spans_key"]
+    kwargs.setdefault("attr", f"{attr_prefix}{key}")
+    kwargs.setdefault(
+        "getter", lambda doc, key: doc.spans.get(key[len(attr_prefix) :], [])
+    )
+    kwargs.setdefault("has_annotation", lambda doc: key in doc.spans)
+    kwargs.setdefault("allow_overlap", True)
+    kwargs.setdefault("labeled", False)
+    scores = Scorer.score_spans(examples, **kwargs)
+    scores.pop(f"{kwargs['attr']}_per_type", None)
+    return scores
+
+
+def _char_indices(span: Span) -> Tuple[int, int]:
+    start = span[0].idx
+    end = span[-1].idx + len(span[-1])
+    return start, end
+
+
+class SpanFinder(TrainablePipe):
+    """Pipeline that learns span boundaries.
+
+    DOCS: https://spacy.io/api/spanfinder
+    """
+
+    def __init__(
+        self,
+        nlp: Language,
+        model: Model[Iterable[Doc], Floats2d],
+        name: str = "span_finder",
+        *,
+        spans_key: str = DEFAULT_SPANS_KEY,
+        threshold: float = 0.5,
+        max_length: Optional[int] = None,
+        min_length: Optional[int] = None,
+        scorer: Optional[Callable] = span_finder_score,
+    ) -> None:
+        """Initialize the span finder.
+        model (thinc.api.Model): The Thinc Model powering the pipeline
+            component.
+        name (str): The component instance name, used to add entries to the
+            losses during training.
+        threshold (float): Minimum probability to consider a prediction
+            positive.
+        scorer (Optional[Callable]): The scoring method.
+        spans_key (str): Key of the doc.spans dict to save the spans under.
+            During initialization and training, the component will look for
+            spans on the reference document under the same key.
+        max_length (Optional[int]): Maximum length of the produced spans,
+            defaults to None meaning unlimited length.
+        min_length (Optional[int]): Minimum length of the produced spans,
+            defaults to None meaning shortest span length is 1.
+
+        DOCS: https://spacy.io/api/spanfinder#init
+        """
+        self.vocab = nlp.vocab
+        if (max_length is not None and max_length < 1) or (
+            min_length is not None and min_length < 1
+        ):
+            raise ValueError(
+                Errors.E1053.format(min_length=min_length, max_length=max_length)
+            )
+        self.model = model
+        self.name = name
+        self.scorer = scorer
+        self.cfg: Dict[str, Any] = {
+            "min_length": min_length,
+            "max_length": max_length,
+            "threshold": threshold,
+            "spans_key": spans_key,
+        }
+
+    def predict(self, docs: Iterable[Doc]):
+        """Apply the pipeline's model to a batch of docs, without modifying
+        them.
+
+        docs (Iterable[Doc]): The documents to predict.
+        RETURNS: The models prediction for each document.
+
+        DOCS: https://spacy.io/api/spanfinder#predict
+        """
+        scores = self.model.predict(docs)
+        return scores
+
+    def set_annotations(self, docs: Iterable[Doc], scores: Floats2d) -> None:
+        """Modify a batch of Doc objects, using pre-computed scores.
+        docs (Iterable[Doc]): The documents to modify.
+        scores: The scores to set, produced by SpanFinder predict method.
+
+        DOCS: https://spacy.io/api/spanfinder#set_annotations
+        """
+        offset = 0
+        for i, doc in enumerate(docs):
+            doc.spans[self.cfg["spans_key"]] = []
+            starts = []
+            ends = []
+            doc_scores = scores[offset : offset + len(doc)]
+
+            for token, token_score in zip(doc, doc_scores):
+                if token_score[0] >= self.cfg["threshold"]:
+                    starts.append(token.i)
+                if token_score[1] >= self.cfg["threshold"]:
+                    ends.append(token.i)
+
+            for start in starts:
+                for end in ends:
+                    span_length = end + 1 - start
+                    if span_length < 1:
+                        continue
+                    if (
+                        self.cfg["min_length"] is None
+                        or self.cfg["min_length"] <= span_length
+                    ) and (
+                        self.cfg["max_length"] is None
+                        or span_length <= self.cfg["max_length"]
+                    ):
+                        doc.spans[self.cfg["spans_key"]].append(doc[start : end + 1])
+            offset += len(doc)
+
+    def update(
+        self,
+        examples: Iterable[Example],
+        *,
+        drop: float = 0.0,
+        sgd: Optional[Optimizer] = None,
+        losses: Optional[Dict[str, float]] = None,
+    ) -> Dict[str, float]:
+        """Learn from a batch of documents and gold-standard information,
+        updating the pipe's model. Delegates to predict and get_loss.
+        examples (Iterable[Example]): A batch of Example objects.
+        drop (float): The dropout rate.
+        sgd (Optional[thinc.api.Optimizer]): The optimizer.
+        losses (Optional[Dict[str, float]]): Optional record of the loss during
+            training. Updated using the component name as the key.
+        RETURNS (Dict[str, float]): The updated losses dictionary.
+
+        DOCS: https://spacy.io/api/spanfinder#update
+        """
+        if losses is None:
+            losses = {}
+        losses.setdefault(self.name, 0.0)
+        predicted = [eg.predicted for eg in examples]
+        set_dropout_rate(self.model, drop)
+        scores, backprop_scores = self.model.begin_update(predicted)
+        loss, d_scores = self.get_loss(examples, scores)
+        backprop_scores(d_scores)
+        if sgd is not None:
+            self.finish_update(sgd)
+        losses[self.name] += loss
+        return losses
+
+    def get_loss(self, examples, scores) -> Tuple[float, Floats2d]:
+        """Find the loss and gradient of loss for the batch of documents and
+        their predicted scores.
+        examples (Iterable[Examples]): The batch of examples.
+        scores: Scores representing the model's predictions.
+        RETURNS (Tuple[float, Floats2d]): The loss and the gradient.
+
+        DOCS: https://spacy.io/api/spanfinder#get_loss
+        """
+        truths, masks = self._get_aligned_truth_scores(examples, self.model.ops)
+        d_scores = scores - self.model.ops.asarray2f(truths)
+        d_scores *= masks
+        loss = float((d_scores**2).sum())
+        return loss, d_scores
+
+    def _get_aligned_truth_scores(self, examples, ops) -> Tuple[Floats2d, Floats2d]:
+        """Align scores of the predictions to the references for calculating
+        the loss.
+        """
+        truths = []
+        masks = []
+        for eg in examples:
+            if eg.x.text != eg.y.text:
+                raise ValueError(Errors.E1054.format(component="span_finder"))
+            n_tokens = len(eg.predicted)
+            truth = ops.xp.zeros((n_tokens, 2), dtype="float32")
+            mask = ops.xp.ones((n_tokens, 2), dtype="float32")
+            if self.cfg["spans_key"] in eg.reference.spans:
+                for span in eg.reference.spans[self.cfg["spans_key"]]:
+                    ref_start_char, ref_end_char = _char_indices(span)
+                    pred_span = eg.predicted.char_span(
+                        ref_start_char, ref_end_char, alignment_mode="expand"
+                    )
+                    pred_start_char, pred_end_char = _char_indices(pred_span)
+                    start_match = pred_start_char == ref_start_char
+                    end_match = pred_end_char == ref_end_char
+                    if start_match:
+                        truth[pred_span[0].i, 0] = 1
+                    else:
+                        mask[pred_span[0].i, 0] = 0
+                    if end_match:
+                        truth[pred_span[-1].i, 1] = 1
+                    else:
+                        mask[pred_span[-1].i, 1] = 0
+            truths.append(truth)
+            masks.append(mask)
+        truths = ops.xp.concatenate(truths, axis=0)
+        masks = ops.xp.concatenate(masks, axis=0)
+        return truths, masks
+
+    def initialize(
+        self,
+        get_examples: Callable[[], Iterable[Example]],
+        *,
+        nlp: Optional[Language] = None,
+    ) -> None:
+        """Initialize the pipe for training, using a representative set
+        of data examples.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects.
+        nlp (Optional[Language]): The current nlp object the component is part
+            of.
+
+        DOCS: https://spacy.io/api/spanfinder#initialize
+        """
+        subbatch: List[Example] = []
+
+        for eg in get_examples():
+            if len(subbatch) < 10:
+                subbatch.append(eg)
+
+        if subbatch:
+            docs = [eg.reference for eg in subbatch]
+            Y, _ = self._get_aligned_truth_scores(subbatch, self.model.ops)
+            self.model.initialize(X=docs, Y=Y)
+        else:
+            self.model.initialize()
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 5a087e42a..08a5478a9 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -1,22 +1,20 @@
-from typing import List, Dict, Callable, Tuple, Optional, Iterable, Any, cast, Union
 from dataclasses import dataclass
 from functools import partial
-from thinc.api import Config, Model, get_current_ops, set_dropout_rate, Ops
-from thinc.api import Optimizer
-from thinc.types import Ragged, Ints2d, Floats2d
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union, cast
 
 import numpy
+from thinc.api import Config, Model, Ops, Optimizer, get_current_ops, set_dropout_rate
+from thinc.types import Floats2d, Ints1d, Ints2d, Ragged
 
 from ..compat import Protocol, runtime_checkable
-from ..scorer import Scorer
-from ..language import Language
-from .trainable_pipe import TrainablePipe
-from ..tokens import Doc, SpanGroup, Span
-from ..vocab import Vocab
-from ..training import Example, validate_examples
 from ..errors import Errors
+from ..language import Language
+from ..scorer import Scorer
+from ..tokens import Doc, Span, SpanGroup
+from ..training import Example, validate_examples
 from ..util import registry
-
+from ..vocab import Vocab
+from .trainable_pipe import TrainablePipe
 
 spancat_default_config = """
 [model]
@@ -33,8 +31,8 @@ hidden_size = 128
 [model.tok2vec.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
 width = 96
-rows = [5000, 2000, 1000, 1000]
-attrs = ["ORTH", "PREFIX", "SUFFIX", "SHAPE"]
+rows = [5000, 1000, 2500, 1000]
+attrs = ["NORM", "PREFIX", "SUFFIX", "SHAPE"]
 include_static_vectors = false
 
 [model.tok2vec.encode]
@@ -71,6 +69,7 @@ maxout_pieces = 3
 depth = 4
 """
 
+DEFAULT_SPANS_KEY = "sc"
 DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
 DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str(
     spancat_singlelabel_default_config
@@ -112,6 +111,29 @@ def ngram_suggester(
     return output
 
 
+def preset_spans_suggester(
+    docs: Iterable[Doc], spans_key: str, *, ops: Optional[Ops] = None
+) -> Ragged:
+    if ops is None:
+        ops = get_current_ops()
+    spans = []
+    lengths = []
+    for doc in docs:
+        length = 0
+        if doc.spans[spans_key]:
+            for span in doc.spans[spans_key]:
+                spans.append([span.start, span.end])
+                length += 1
+
+        lengths.append(length)
+    lengths_array = cast(Ints1d, ops.asarray(lengths, dtype="i"))
+    if len(spans) > 0:
+        output = Ragged(ops.asarray(spans, dtype="i"), lengths_array)
+    else:
+        output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
+    return output
+
+
 @registry.misc("spacy.ngram_suggester.v1")
 def build_ngram_suggester(sizes: List[int]) -> Suggester:
     """Suggest all spans of the given lengths. Spans are returned as a ragged
@@ -130,12 +152,20 @@ def build_ngram_range_suggester(min_size: int, max_size: int) -> Suggester:
     return build_ngram_suggester(sizes)
 
 
+@registry.misc("spacy.preset_spans_suggester.v1")
+def build_preset_spans_suggester(spans_key: str) -> Suggester:
+    """Suggest all spans that are already stored in doc.spans[spans_key].
+    This is useful when an upstream component is used to set the spans
+    on the Doc such as a SpanRuler or SpanFinder."""
+    return partial(preset_spans_suggester, spans_key=spans_key)
+
+
 @Language.factory(
     "spancat",
     assigns=["doc.spans"],
     default_config={
         "threshold": 0.5,
-        "spans_key": "sc",
+        "spans_key": DEFAULT_SPANS_KEY,
         "max_positive": None,
         "model": DEFAULT_SPANCAT_MODEL,
         "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
@@ -199,7 +229,7 @@ def make_spancat(
     "spancat_singlelabel",
     assigns=["doc.spans"],
     default_config={
-        "spans_key": "sc",
+        "spans_key": DEFAULT_SPANS_KEY,
         "model": DEFAULT_SPANCAT_SINGLELABEL_MODEL,
         "negative_weight": 1.0,
         "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
diff --git a/spacy/tests/doc/test_span_group.py b/spacy/tests/doc/test_span_group.py
index 818569c64..cea2c42ee 100644
--- a/spacy/tests/doc/test_span_group.py
+++ b/spacy/tests/doc/test_span_group.py
@@ -93,6 +93,21 @@ def test_span_group_copy(doc):
     assert span_group.attrs["key"] == "value"
     assert list(span_group) != list(clone)
 
+    # can't copy if the character offsets don't align to tokens
+    doc2 = Doc(doc.vocab, words=[t.text + "x" for t in doc])
+    with pytest.raises(ValueError):
+        span_group.copy(doc=doc2)
+
+    # can copy with valid character offsets despite different tokenization
+    doc3 = doc.copy()
+    with doc3.retokenize() as retokenizer:
+        retokenizer.merge(doc3[0:2])
+        retokenizer.merge(doc3[3:6])
+    span_group = SpanGroup(doc, spans=[doc[0:6], doc[3:6]])
+    for span1, span2 in zip(span_group, span_group.copy(doc=doc3)):
+        assert span1.start_char == span2.start_char
+        assert span1.end_char == span2.end_char
+
 
 def test_span_group_set_item(doc, other_doc):
     span_group = doc.spans["SPANS"]
@@ -253,3 +268,12 @@ def test_span_group_typing(doc: Doc):
     for i, span in enumerate(span_group):
         assert span == span_group[i] == spans[i]
     filter_spans(span_group)
+
+
+def test_span_group_init_doc(en_tokenizer):
+    """Test that all spans must come from the specified doc."""
+    doc1 = en_tokenizer("a b c")
+    doc2 = en_tokenizer("a b c")
+    span_group = SpanGroup(doc1, spans=[doc1[0:1], doc1[1:2]])
+    with pytest.raises(ValueError):
+        span_group = SpanGroup(doc1, spans=[doc1[0:1], doc2[1:2]])
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 030182a63..7198859b3 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -728,9 +728,9 @@ def test_neg_annotation(neg_key):
     ner.add_label("ORG")
     example = Example.from_dict(neg_doc, {"entities": [(7, 17, "PERSON")]})
     example.reference.spans[neg_key] = [
-        Span(neg_doc, 2, 4, "ORG"),
-        Span(neg_doc, 2, 3, "PERSON"),
-        Span(neg_doc, 1, 4, "PERSON"),
+        Span(example.reference, 2, 4, "ORG"),
+        Span(example.reference, 2, 3, "PERSON"),
+        Span(example.reference, 1, 4, "PERSON"),
     ]
 
     optimizer = nlp.initialize()
@@ -755,7 +755,7 @@ def test_neg_annotation_conflict(neg_key):
     ner.add_label("PERSON")
     ner.add_label("LOC")
     example = Example.from_dict(neg_doc, {"entities": [(7, 17, "PERSON")]})
-    example.reference.spans[neg_key] = [Span(neg_doc, 2, 4, "PERSON")]
+    example.reference.spans[neg_key] = [Span(example.reference, 2, 4, "PERSON")]
     assert len(example.reference.ents) == 1
     assert example.reference.ents[0].text == "Shaka Khan"
     assert example.reference.ents[0].label_ == "PERSON"
@@ -788,7 +788,7 @@ def test_beam_valid_parse(neg_key):
 
     doc = Doc(nlp.vocab, words=tokens)
     example = Example.from_dict(doc, {"ner": iob})
-    neg_span = Span(doc, 50, 53, "ORG")
+    neg_span = Span(example.reference, 50, 53, "ORG")
     example.reference.spans[neg_key] = [neg_span]
 
     optimizer = nlp.initialize()
diff --git a/spacy/tests/pipeline/test_span_finder.py b/spacy/tests/pipeline/test_span_finder.py
new file mode 100644
index 000000000..91b08cabf
--- /dev/null
+++ b/spacy/tests/pipeline/test_span_finder.py
@@ -0,0 +1,242 @@
+import pytest
+from thinc.api import Config
+
+from spacy.language import Language
+from spacy.lang.en import English
+from spacy.pipeline.span_finder import span_finder_default_config
+from spacy.tokens import Doc
+from spacy.training import Example
+from spacy import util
+from spacy.util import registry
+from spacy.util import fix_random_seed, make_tempdir
+
+
+SPANS_KEY = "pytest"
+TRAIN_DATA = [
+    ("Who is Shaka Khan?", {"spans": {SPANS_KEY: [(7, 17)]}}),
+    (
+        "I like London and Berlin.",
+        {"spans": {SPANS_KEY: [(7, 13), (18, 24)]}},
+    ),
+]
+
+TRAIN_DATA_OVERLAPPING = [
+    ("Who is Shaka Khan?", {"spans": {SPANS_KEY: [(7, 17)]}}),
+    (
+        "I like London and Berlin",
+        {"spans": {SPANS_KEY: [(7, 13), (18, 24), (7, 24)]}},
+    ),
+    ("", {"spans": {SPANS_KEY: []}}),
+]
+
+
+def make_examples(nlp, data=TRAIN_DATA):
+    train_examples = []
+    for t in data:
+        eg = Example.from_dict(nlp.make_doc(t[0]), t[1])
+        train_examples.append(eg)
+    return train_examples
+
+
+@pytest.mark.parametrize(
+    "tokens_predicted, tokens_reference, reference_truths",
+    [
+        (
+            ["Mon", ".", "-", "June", "16"],
+            ["Mon.", "-", "June", "16"],
+            [(0, 0), (0, 0), (0, 0), (1, 1), (0, 0)],
+        ),
+        (
+            ["Mon.", "-", "J", "une", "16"],
+            ["Mon.", "-", "June", "16"],
+            [(0, 0), (0, 0), (1, 0), (0, 1), (0, 0)],
+        ),
+        (
+            ["Mon", ".", "-", "June", "16"],
+            ["Mon.", "-", "June", "1", "6"],
+            [(0, 0), (0, 0), (0, 0), (1, 1), (0, 0)],
+        ),
+        (
+            ["Mon.", "-J", "un", "e 16"],
+            ["Mon.", "-", "June", "16"],
+            [(0, 0), (0, 0), (0, 0), (0, 0)],
+        ),
+        pytest.param(
+            ["Mon.-June", "16"],
+            ["Mon.", "-", "June", "16"],
+            [(0, 1), (0, 0)],
+        ),
+        pytest.param(
+            ["Mon.-", "June", "16"],
+            ["Mon.", "-", "J", "une", "16"],
+            [(0, 0), (1, 1), (0, 0)],
+        ),
+        pytest.param(
+            ["Mon.-", "June 16"],
+            ["Mon.", "-", "June", "16"],
+            [(0, 0), (1, 0)],
+        ),
+    ],
+)
+def test_loss_alignment_example(tokens_predicted, tokens_reference, reference_truths):
+    nlp = Language()
+    predicted = Doc(
+        nlp.vocab, words=tokens_predicted, spaces=[False] * len(tokens_predicted)
+    )
+    reference = Doc(
+        nlp.vocab, words=tokens_reference, spaces=[False] * len(tokens_reference)
+    )
+    example = Example(predicted, reference)
+    example.reference.spans[SPANS_KEY] = [example.reference.char_span(5, 9)]
+    span_finder = nlp.add_pipe("span_finder", config={"spans_key": SPANS_KEY})
+    nlp.initialize()
+    ops = span_finder.model.ops
+    if predicted.text != reference.text:
+        with pytest.raises(
+            ValueError, match="must match between reference and predicted"
+        ):
+            span_finder._get_aligned_truth_scores([example], ops)
+        return
+    truth_scores, masks = span_finder._get_aligned_truth_scores([example], ops)
+    assert len(truth_scores) == len(tokens_predicted)
+    ops.xp.testing.assert_array_equal(truth_scores, ops.xp.asarray(reference_truths))
+
+
+def test_span_finder_model():
+    nlp = Language()
+
+    docs = [nlp("This is an example."), nlp("This is the second example.")]
+    docs[0].spans[SPANS_KEY] = [docs[0][3:4]]
+    docs[1].spans[SPANS_KEY] = [docs[1][3:5]]
+
+    total_tokens = 0
+    for doc in docs:
+        total_tokens += len(doc)
+
+    config = Config().from_str(span_finder_default_config).interpolate()
+    model = registry.resolve(config)["model"]
+
+    model.initialize(X=docs)
+    predictions = model.predict(docs)
+
+    assert len(predictions) == total_tokens
+    assert len(predictions[0]) == 2
+
+
+def test_span_finder_component():
+    nlp = Language()
+
+    docs = [nlp("This is an example."), nlp("This is the second example.")]
+    docs[0].spans[SPANS_KEY] = [docs[0][3:4]]
+    docs[1].spans[SPANS_KEY] = [docs[1][3:5]]
+
+    span_finder = nlp.add_pipe("span_finder", config={"spans_key": SPANS_KEY})
+    nlp.initialize()
+    docs = list(span_finder.pipe(docs))
+
+    assert SPANS_KEY in docs[0].spans
+
+
+@pytest.mark.parametrize(
+    "min_length, max_length, span_count",
+    [(0, 0, 0), (None, None, 8), (2, None, 6), (None, 1, 2), (2, 3, 2)],
+)
+def test_set_annotations_span_lengths(min_length, max_length, span_count):
+    nlp = Language()
+    doc = nlp("Me and Jenny goes together like peas and carrots.")
+    if min_length == 0 and max_length == 0:
+        with pytest.raises(ValueError, match="Both 'min_length' and 'max_length'"):
+            span_finder = nlp.add_pipe(
+                "span_finder",
+                config={
+                    "max_length": max_length,
+                    "min_length": min_length,
+                    "spans_key": SPANS_KEY,
+                },
+            )
+        return
+    span_finder = nlp.add_pipe(
+        "span_finder",
+        config={
+            "max_length": max_length,
+            "min_length": min_length,
+            "spans_key": SPANS_KEY,
+        },
+    )
+    nlp.initialize()
+    # Starts    [Me, Jenny, peas]
+    # Ends      [Jenny, peas, carrots]
+    scores = [
+        (1, 0),
+        (0, 0),
+        (1, 1),
+        (0, 0),
+        (0, 0),
+        (0, 0),
+        (1, 1),
+        (0, 0),
+        (0, 1),
+        (0, 0),
+    ]
+    span_finder.set_annotations([doc], scores)
+
+    assert doc.spans[SPANS_KEY]
+    assert len(doc.spans[SPANS_KEY]) == span_count
+
+    # Assert below will fail when max_length is set to 0
+    if max_length is None:
+        max_length = float("inf")
+    if min_length is None:
+        min_length = 1
+
+    assert all(min_length <= len(span) <= max_length for span in doc.spans[SPANS_KEY])
+
+
+def test_overfitting_IO():
+    # Simple test to try and quickly overfit the span_finder component - ensuring the ML models work correctly
+    fix_random_seed(0)
+    nlp = English()
+    span_finder = nlp.add_pipe("span_finder", config={"spans_key": SPANS_KEY})
+    train_examples = make_examples(nlp)
+    optimizer = nlp.initialize(get_examples=lambda: train_examples)
+    assert span_finder.model.get_dim("nO") == 2
+
+    for i in range(50):
+        losses = {}
+        nlp.update(train_examples, sgd=optimizer, losses=losses)
+    assert losses["span_finder"] < 0.001
+
+    # test the trained model
+    test_text = "I like London and Berlin"
+    doc = nlp(test_text)
+    spans = doc.spans[SPANS_KEY]
+    assert len(spans) == 3
+    assert set([span.text for span in spans]) == {
+        "London",
+        "Berlin",
+        "London and Berlin",
+    }
+
+    # Also test the results are still the same after IO
+    with make_tempdir() as tmp_dir:
+        nlp.to_disk(tmp_dir)
+        nlp2 = util.load_model_from_path(tmp_dir)
+        doc2 = nlp2(test_text)
+        spans2 = doc2.spans[SPANS_KEY]
+        assert len(spans2) == 3
+        assert set([span.text for span in spans2]) == {
+            "London",
+            "Berlin",
+            "London and Berlin",
+        }
+
+    # Test scoring
+    scores = nlp.evaluate(train_examples)
+    assert f"span_finder_{SPANS_KEY}_f" in scores
+    # It's not perfect 1.0 F1 because it's designed to overgenerate for now.
+    assert scores[f"span_finder_{SPANS_KEY}_p"] == 0.75
+    assert scores[f"span_finder_{SPANS_KEY}_r"] == 1.0
+
+    # also test that the spancat works for just a single entity in a sentence
+    doc = nlp("London")
+    assert len(doc.spans[SPANS_KEY]) == 1
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index 199ef2b2a..b7024cf36 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -406,6 +406,21 @@ def test_ngram_sizes(en_tokenizer):
     assert_array_equal(OPS.to_numpy(ngrams_3.lengths), [0, 1, 3, 6, 9])
 
 
+def test_preset_spans_suggester():
+    nlp = Language()
+    docs = [nlp("This is an example."), nlp("This is the second example.")]
+    docs[0].spans[SPAN_KEY] = [docs[0][3:4]]
+    docs[1].spans[SPAN_KEY] = [docs[1][0:4], docs[1][3:5]]
+    suggester = registry.misc.get("spacy.preset_spans_suggester.v1")(spans_key=SPAN_KEY)
+    candidates = suggester(docs)
+    assert type(candidates) == Ragged
+    assert len(candidates) == 2
+    assert list(candidates.dataXd[0]) == [3, 4]
+    assert list(candidates.dataXd[1]) == [0, 4]
+    assert list(candidates.dataXd[2]) == [3, 5]
+    assert list(candidates.lengths) == [1, 2]
+
+
 def test_overfitting_IO():
     # Simple test to try and quickly overfit the spancat component - ensuring the ML models work correctly
     fix_random_seed(0)
@@ -428,7 +443,7 @@ def test_overfitting_IO():
     spans = doc.spans[SPAN_KEY]
     assert len(spans) == 2
     assert len(spans.attrs["scores"]) == 2
-    assert min(spans.attrs["scores"]) > 0.9
+    assert min(spans.attrs["scores"]) > 0.8
     assert set([span.text for span in spans]) == {"London", "Berlin"}
     assert set([span.label_ for span in spans]) == {"LOC"}
 
@@ -440,7 +455,7 @@ def test_overfitting_IO():
         spans2 = doc2.spans[SPAN_KEY]
         assert len(spans2) == 2
         assert len(spans2.attrs["scores"]) == 2
-        assert min(spans2.attrs["scores"]) > 0.9
+        assert min(spans2.attrs["scores"]) > 0.8
         assert set([span.text for span in spans2]) == {"London", "Berlin"}
         assert set([span.label_ for span in spans2]) == {"LOC"}
 
diff --git a/spacy/tests/serialize/test_resource_warning.py b/spacy/tests/serialize/test_resource_warning.py
index 38701c6d9..befd05635 100644
--- a/spacy/tests/serialize/test_resource_warning.py
+++ b/spacy/tests/serialize/test_resource_warning.py
@@ -72,7 +72,7 @@ def entity_linker():
 
     def create_kb(vocab):
         kb = InMemoryLookupKB(vocab, entity_vector_length=1)
-        kb.add_entity("test", 0.0, zeros((1, 1), dtype="f"))
+        kb.add_entity("test", 0.0, zeros((1,), dtype="f"))
         return kb
 
     entity_linker = nlp.add_pipe("entity_linker")
diff --git a/spacy/tests/test_cli_app.py b/spacy/tests/test_cli_app.py
index 9ba4f0e5c..5ff4dfa26 100644
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@@ -103,6 +103,8 @@ def test_debug_data_trainable_lemmatizer_cli(en_vocab):
 
 # project tests
 
+CFG_FILE = "myconfig.cfg"
+
 SAMPLE_PROJECT = {
     "title": "Sample project",
     "description": "This is a project for testing",
@@ -128,13 +130,8 @@ SAMPLE_PROJECT = {
         {
             "name": "create",
             "help": "make a file",
-            "script": ["touch abc.txt"],
-            "outputs": ["abc.txt"],
-        },
-        {
-            "name": "clean",
-            "help": "remove test file",
-            "script": ["rm abc.txt"],
+            "script": [f"python -m spacy init config {CFG_FILE}"],
+            "outputs": [f"{CFG_FILE}"],
         },
     ],
 }
@@ -175,7 +172,7 @@ def test_project_assets(project_dir):
 
 def test_project_run(project_dir):
     # make sure dry run works
-    test_file = project_dir / "abc.txt"
+    test_file = project_dir / CFG_FILE
     result = CliRunner().invoke(
         app, ["project", "run", "--dry", "create", str(project_dir)]
     )
@@ -223,14 +220,13 @@ def test_project_push_pull(project_dir):
         proj_text = srsly.yaml_dumps(proj)
         (project_dir / "project.yml").write_text(proj_text)
 
-        test_file = project_dir / "abc.txt"
+        test_file = project_dir / CFG_FILE
         result = CliRunner().invoke(app, ["project", "run", "create", str(project_dir)])
         assert result.exit_code == 0
         assert test_file.is_file()
         result = CliRunner().invoke(app, ["project", "push", remote, str(project_dir)])
         assert result.exit_code == 0
-        result = CliRunner().invoke(app, ["project", "run", "clean", str(project_dir)])
-        assert result.exit_code == 0
+        test_file.unlink()
         assert not test_file.exists()
         result = CliRunner().invoke(app, ["project", "pull", remote, str(project_dir)])
         assert result.exit_code == 0
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index 4b2d22986..f95c44149 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -438,14 +438,14 @@ def test_score_spans():
         return doc.spans[span_key]
 
     # Predict exactly the same, but overlapping spans will be discarded
-    pred.spans[key] = spans
+    pred.spans[key] = gold.spans[key].copy(doc=pred)
     eg = Example(pred, gold)
     scores = Scorer.score_spans([eg], attr=key, getter=span_getter)
     assert scores[f"{key}_p"] == 1.0
     assert scores[f"{key}_r"] < 1.0
 
     # Allow overlapping, now both precision and recall should be 100%
-    pred.spans[key] = spans
+    pred.spans[key] = gold.spans[key].copy(doc=pred)
     eg = Example(pred, gold)
     scores = Scorer.score_spans([eg], attr=key, getter=span_getter, allow_overlap=True)
     assert scores[f"{key}_p"] == 1.0
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index a54b4ad3c..6c196ad78 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1264,12 +1264,14 @@ cdef class Doc:
         other.user_span_hooks = dict(self.user_span_hooks)
         other.length = self.length
         other.max_length = self.max_length
-        other.spans = self.spans.copy(doc=other)
         buff_size = other.max_length + (PADDING*2)
         assert buff_size > 0
         tokens = <TokenC*>other.mem.alloc(buff_size, sizeof(TokenC))
         memcpy(tokens, self.c - PADDING, buff_size * sizeof(TokenC))
         other.c = &tokens[PADDING]
+        # copy spans after setting tokens so that SpanGroup.copy can verify
+        # that the start/end offsets are valid
+        other.spans = self.spans.copy(doc=other)
         return other
 
     def to_disk(self, path, *, exclude=tuple()):
diff --git a/spacy/tokens/span_group.pyx b/spacy/tokens/span_group.pyx
index 608dda283..c748fa256 100644
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@@ -52,6 +52,8 @@ cdef class SpanGroup:
         if len(spans) :
             self.c.reserve(len(spans))
         for span in spans:
+            if doc is not span.doc:
+                raise ValueError(Errors.E855.format(obj="span"))
             self.push_back(span.c)
 
     def __repr__(self):
@@ -261,11 +263,22 @@ cdef class SpanGroup:
         """
         if doc is None:
             doc = self.doc
+        if doc is self.doc:
+            spans = list(self)
+        else:
+            spans = [doc.char_span(span.start_char, span.end_char, label=span.label_, kb_id=span.kb_id, span_id=span.id) for span in self]
+            for i, span in enumerate(spans):
+                if span is None:
+                    raise ValueError(Errors.E1052.format(i=i))
+                if span.kb_id in self.doc.vocab.strings:
+                    doc.vocab.strings.add(span.kb_id_)
+                if span.id in span.doc.vocab.strings:
+                    doc.vocab.strings.add(span.id_)
         return SpanGroup(
             doc,
             name=self.name,
             attrs=deepcopy(self.attrs),
-            spans=list(self),
+            spans=spans,
         )
 
     def _concat(
diff --git a/spacy/ty.py b/spacy/ty.py
index 8f2903d78..7e79a3d4d 100644
--- a/spacy/ty.py
+++ b/spacy/ty.py
@@ -1,11 +1,13 @@
 from typing import TYPE_CHECKING
 from typing import Optional, Any, Iterable, Dict, Callable, Sequence, List
+
 from .compat import Protocol, runtime_checkable
 
 from thinc.api import Optimizer, Model
 
 if TYPE_CHECKING:
     from .training import Example
+    from .language import Language
 
 
 @runtime_checkable
@@ -32,7 +34,7 @@ class InitializableComponent(Protocol):
     def initialize(
         self,
         get_examples: Callable[[], Iterable["Example"]],
-        nlp: Iterable["Example"],
+        nlp: "Language",
         **kwargs: Any
     ):
         ...
diff --git a/website/docs/api/spancategorizer.mdx b/website/docs/api/spancategorizer.mdx
index f54a8687b..81a473ac2 100644
--- a/website/docs/api/spancategorizer.mdx
+++ b/website/docs/api/spancategorizer.mdx
@@ -105,7 +105,7 @@ architectures and their arguments and hyperparameters.
 >
 > # Construction via add_pipe with custom model
 > config = {"model": {"@architectures": "my_spancat"}}
-> parser = nlp.add_pipe("spancat", config=config)
+> spancat = nlp.add_pipe("spancat", config=config)
 >
 > # Construction from class
 > from spacy.pipeline import SpanCategorizer
@@ -524,3 +524,22 @@ has two columns, indicating the start and end position.
 | `min_size`  | The minimal phrase lengths to suggest (inclusive). ~~[int]~~                 |
 | `max_size`  | The maximal phrase lengths to suggest (exclusive). ~~[int]~~                 |
 | **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~ |
+
+### spacy.preset_spans_suggester.v1 {id="preset_spans_suggester"}
+
+> #### Example Config
+>
+> ```ini
+> [components.spancat.suggester]
+> @misc = "spacy.preset_spans_suggester.v1"
+> spans_key = "my_spans"
+> ```
+
+Suggest all spans that are already stored in doc.spans[spans_key]. This is
+useful when an upstream component is used to set the spans on the Doc such as a
+[`SpanRuler`](/api/spanruler) or [`SpanFinder`](/api/spanfinder).
+
+| Name        | Description                                                                   |
+| ----------- | ----------------------------------------------------------------------------- |
+| `spans_key` | Key of [`Doc.spans`](/api/doc/#spans) that provides spans to suggest. ~~str~~ |
+| **CREATES** | The suggester function. ~~Callable[[Iterable[Doc], Optional[Ops]], Ragged]~~  |
diff --git a/website/docs/api/spanfinder.mdx b/website/docs/api/spanfinder.mdx
new file mode 100644
index 000000000..ca3104c85
--- /dev/null
+++ b/website/docs/api/spanfinder.mdx
@@ -0,0 +1,372 @@
+---
+title: SpanFinder
+tag: class,experimental
+source: spacy/pipeline/span_finder.py
+version: 3.6
+teaser:
+  'Pipeline component for identifying potentially overlapping spans of text'
+api_base_class: /api/pipe
+api_string_name: span_finder
+api_trainable: true
+---
+
+The span finder identifies potentially overlapping, unlabeled spans. It
+identifies tokens that start or end spans and annotates unlabeled spans between
+starts and ends, with optional filters for min and max span length. It is
+intended for use in combination with a component like
+[`SpanCategorizer`](/api/spancategorizer) that may further filter or label the
+spans. Predicted spans will be saved in a [`SpanGroup`](/api/spangroup) on the
+doc under `doc.spans[spans_key]`, where `spans_key` is a component config
+setting.
+
+## Assigned Attributes {id="assigned-attributes"}
+
+Predictions will be saved to `Doc.spans[spans_key]` as a
+[`SpanGroup`](/api/spangroup).
+
+`spans_key` defaults to `"sc"`, but can be passed as a parameter. The
+`span_finder` component will overwrite any existing spans under the spans key
+`doc.spans[spans_key]`.
+
+| Location               | Value                              |
+| ---------------------- | ---------------------------------- |
+| `Doc.spans[spans_key]` | The unlabeled spans. ~~SpanGroup~~ |
+
+## Config and implementation {id="config"}
+
+The default config is defined by the pipeline component factory and describes
+how the component should be configured. You can override its settings via the
+`config` argument on [`nlp.add_pipe`](/api/language#add_pipe) or in your
+[`config.cfg` for training](/usage/training#config). See the
+[model architectures](/api/architectures) documentation for details on the
+architectures and their arguments and hyperparameters.
+
+> #### Example
+>
+> ```python
+> from spacy.pipeline.span_finder import DEFAULT_SPAN_FINDER_MODEL
+> config = {
+>     "threshold": 0.5,
+>     "spans_key": "my_spans",
+>     "max_length": None,
+>     "min_length": None,
+>     "model": DEFAULT_SPAN_FINDER_MODEL,
+> }
+> nlp.add_pipe("span_finder", config=config)
+> ```
+
+| Setting      | Description                                                                                                                                                                                                            |
+| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `model`      | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~                                                                                           |
+| `spans_key`  | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
+| `threshold`  | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
+| `max_length` | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `min_length` | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
+| `scorer`     | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |
+
+```python
+%%GITHUB_SPACY/spacy/pipeline/span_finder.py
+```
+
+## SpanFinder.\_\_init\_\_ {id="init",tag="method"}
+
+> #### Example
+>
+> ```python
+> # Construction via add_pipe with default model
+> span_finder = nlp.add_pipe("span_finder")
+>
+> # Construction via add_pipe with custom model
+> config = {"model": {"@architectures": "my_span_finder"}}
+> span_finder = nlp.add_pipe("span_finder", config=config)
+>
+> # Construction from class
+> from spacy.pipeline import SpanFinder
+> span_finder = SpanFinder(nlp.vocab, model)
+> ```
+
+Create a new pipeline instance. In your application, you would normally use a
+shortcut for this and instantiate the component using its string name and
+[`nlp.add_pipe`](/api/language#create_pipe).
+
+| Name           | Description                                                                                                                                                                                                            |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`        | The shared vocabulary. ~~Vocab~~                                                                                                                                                                                       |
+| `model`        | A model instance that is given a list of documents and predicts a probability for each token. ~~Model[List[Doc], Floats2d]~~                                                                                           |
+| `name`         | String name of the component instance. Used to add entries to the `losses` during training. ~~str~~                                                                                                                    |
+| _keyword-only_ |                                                                                                                                                                                                                        |
+| `spans_key`    | Key of the [`Doc.spans`](/api/doc#spans) dict to save the spans under. During initialization and training, the component will look for spans on the reference document under the same key. Defaults to `"sc"`. ~~str~~ |
+| `threshold`    | Minimum probability to consider a prediction positive. Defaults to `0.5`. ~~float~~                                                                                                                                    |
+| `max_length`   | Maximum length of the produced spans, defaults to `None` meaning unlimited length. ~~Optional[int]~~                                                                                                                   |
+| `min_length`   | Minimum length of the produced spans, defaults to `None` meaning shortest span length is 1. ~~Optional[int]~~                                                                                                          |
+| `scorer`       | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~                                                      |
+
+## SpanFinder.\_\_call\_\_ {id="call",tag="method"}
+
+Apply the pipe to one document. The document is modified in place, and returned.
+This usually happens under the hood when the `nlp` object is called on a text
+and all pipeline components are applied to the `Doc` in order. Both
+[`__call__`](/api/spanfinder#call) and [`pipe`](/api/spanfinder#pipe) delegate
+to the [`predict`](/api/spanfinder#predict) and
+[`set_annotations`](/api/spanfinder#set_annotations) methods.
+
+> #### Example
+>
+> ```python
+> doc = nlp("This is a sentence.")
+> span_finder = nlp.add_pipe("span_finder")
+> # This usually happens under the hood
+> processed = span_finder(doc)
+> ```
+
+| Name        | Description                      |
+| ----------- | -------------------------------- |
+| `doc`       | The document to process. ~~Doc~~ |
+| **RETURNS** | The processed document. ~~Doc~~  |
+
+## SpanFinder.pipe {id="pipe",tag="method"}
+
+Apply the pipe to a stream of documents. This usually happens under the hood
+when the `nlp` object is called on a text and all pipeline components are
+applied to the `Doc` in order. Both [`__call__`](/api/spanfinder#call) and
+[`pipe`](/api/spanfinder#pipe) delegate to the
+[`predict`](/api/spanfinder#predict) and
+[`set_annotations`](/api/spanfinder#set_annotations) methods.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> for doc in span_finder.pipe(docs, batch_size=50):
+>     pass
+> ```
+
+| Name           | Description                                                   |
+| -------------- | ------------------------------------------------------------- |
+| `stream`       | A stream of documents. ~~Iterable[Doc]~~                      |
+| _keyword-only_ |                                                               |
+| `batch_size`   | The number of documents to buffer. Defaults to `128`. ~~int~~ |
+| **YIELDS**     | The processed documents in order. ~~Doc~~                     |
+
+## SpanFinder.initialize {id="initialize",tag="method"}
+
+Initialize the component for training. `get_examples` should be a function that
+returns an iterable of [`Example`](/api/example) objects. **At least one example
+should be supplied.** The data examples are used to **initialize the model** of
+the component and can either be the full training data or a representative
+sample. Initialization includes validating the network and
+[inferring missing shapes](https://thinc.ai/docs/usage-models#validation) This
+method is typically called by [`Language.initialize`](/api/language#initialize)
+and lets you customize arguments it receives via the
+[`[initialize.components]`](/api/data-formats#config-initialize) block in the
+config.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> span_finder.initialize(lambda: examples, nlp=nlp)
+> ```
+
+| Name           | Description                                                                                                                                                                |
+| -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `get_examples` | Function that returns gold-standard annotations in the form of [`Example`](/api/example) objects. Must contain at least one `Example`. ~~Callable[[], Iterable[Example]]~~ |
+| _keyword-only_ |                                                                                                                                                                            |
+| `nlp`          | The current `nlp` object. Defaults to `None`. ~~Optional[Language]~~                                                                                                       |
+
+## SpanFinder.predict {id="predict",tag="method"}
+
+Apply the component's model to a batch of [`Doc`](/api/doc) objects without
+modifying them.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> scores = span_finder.predict([doc1, doc2])
+> ```
+
+| Name        | Description                                 |
+| ----------- | ------------------------------------------- |
+| `docs`      | The documents to predict. ~~Iterable[Doc]~~ |
+| **RETURNS** | The model's prediction for each document.   |
+
+## SpanFinder.set_annotations {id="set_annotations",tag="method"}
+
+Modify a batch of [`Doc`](/api/doc) objects using pre-computed scores.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> scores = span_finder.predict(docs)
+> span_finder.set_annotations(docs, scores)
+> ```
+
+| Name     | Description                                          |
+| -------- | ---------------------------------------------------- |
+| `docs`   | The documents to modify. ~~Iterable[Doc]~~           |
+| `scores` | The scores to set, produced by `SpanFinder.predict`. |
+
+## SpanFinder.update {id="update",tag="method"}
+
+Learn from a batch of [`Example`](/api/example) objects containing the
+predictions and gold-standard annotations, and update the component's model.
+Delegates to [`predict`](/api/spanfinder#predict) and
+[`get_loss`](/api/spanfinder#get_loss).
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> optimizer = nlp.initialize()
+> losses = span_finder.update(examples, sgd=optimizer)
+> ```
+
+| Name           | Description                                                                                                              |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| `examples`     | A batch of [`Example`](/api/example) objects to learn from. ~~Iterable[Example]~~                                        |
+| _keyword-only_ |                                                                                                                          |
+| `drop`         | The dropout rate. ~~float~~                                                                                              |
+| `sgd`          | An optimizer. Will be created via [`create_optimizer`](#create_optimizer) if not set. ~~Optional[Optimizer]~~            |
+| `losses`       | Optional record of the loss during training. Updated using the component name as the key. ~~Optional[Dict[str, float]]~~ |
+| **RETURNS**    | The updated `losses` dictionary. ~~Dict[str, float]~~                                                                    |
+
+## SpanFinder.get_loss {id="get_loss",tag="method"}
+
+Find the loss and gradient of loss for the batch of documents and their
+predicted scores.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> scores = span_finder.predict([eg.predicted for eg in examples])
+> loss, d_loss = span_finder.get_loss(examples, scores)
+> ```
+
+| Name           | Description                                                                    |
+| -------------- | ------------------------------------------------------------------------------ |
+| `examples`     | The batch of examples. ~~Iterable[Example]~~                                   |
+| `spans_scores` | Scores representing the model's predictions. ~~Tuple[Ragged, Floats2d]~~       |
+| **RETURNS**    | The loss and the gradient, i.e. `(loss, gradient)`. ~~Tuple[float, Floats2d]~~ |
+
+## SpanFinder.create_optimizer {id="create_optimizer",tag="method"}
+
+Create an optimizer for the pipeline component.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> optimizer = span_finder.create_optimizer()
+> ```
+
+| Name        | Description                  |
+| ----------- | ---------------------------- |
+| **RETURNS** | The optimizer. ~~Optimizer~~ |
+
+## SpanFinder.use_params {id="use_params",tag="method, contextmanager"}
+
+Modify the pipe's model to use the given parameter values.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> with span_finder.use_params(optimizer.averages):
+>     span_finder.to_disk("/best_model")
+> ```
+
+| Name     | Description                                        |
+| -------- | -------------------------------------------------- |
+| `params` | The parameter values to use in the model. ~~dict~~ |
+
+## SpanFinder.to_disk {id="to_disk",tag="method"}
+
+Serialize the pipe to disk.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> span_finder.to_disk("/path/to/span_finder")
+> ```
+
+| Name           | Description                                                                                                                                |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| `path`         | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
+| _keyword-only_ |                                                                                                                                            |
+| `exclude`      | String names of [serialization fields](#serialization-fields) to exclude. ~~Iterable[str]~~                                                |
+
+## SpanFinder.from_disk {id="from_disk",tag="method"}
+
+Load the pipe from disk. Modifies the object in place and returns it.
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> span_finder.from_disk("/path/to/span_finder")
+> ```
+
+| Name           | Description                                                                                     |
+| -------------- | ----------------------------------------------------------------------------------------------- |
+| `path`         | A path to a directory. Paths may be either strings or `Path`-like objects. ~~Union[str, Path]~~ |
+| _keyword-only_ |                                                                                                 |
+| `exclude`      | String names of [serialization fields](#serialization-fields) to exclude. ~~Iterable[str]~~     |
+| **RETURNS**    | The modified `SpanFinder` object. ~~SpanFinder~~                                                |
+
+## SpanFinder.to_bytes {id="to_bytes",tag="method"}
+
+> #### Example
+>
+> ```python
+> span_finder = nlp.add_pipe("span_finder")
+> span_finder_bytes = span_finder.to_bytes()
+> ```
+
+Serialize the pipe to a bytestring.
+
+| Name           | Description                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------- |
+| _keyword-only_ |                                                                                             |
+| `exclude`      | String names of [serialization fields](#serialization-fields) to exclude. ~~Iterable[str]~~ |
+| **RETURNS**    | The serialized form of the `SpanFinder` object. ~~bytes~~                                   |
+
+## SpanFinder.from_bytes {id="from_bytes",tag="method"}
+
+Load the pipe from a bytestring. Modifies the object in place and returns it.
+
+> #### Example
+>
+> ```python
+> span_finder_bytes = span_finder.to_bytes()
+> span_finder = nlp.add_pipe("span_finder")
+> span_finder.from_bytes(span_finder_bytes)
+> ```
+
+| Name           | Description                                                                                 |
+| -------------- | ------------------------------------------------------------------------------------------- |
+| `bytes_data`   | The data to load from. ~~bytes~~                                                            |
+| _keyword-only_ |                                                                                             |
+| `exclude`      | String names of [serialization fields](#serialization-fields) to exclude. ~~Iterable[str]~~ |
+| **RETURNS**    | The `SpanFinder` object. ~~SpanFinder~~                                                     |
+
+## Serialization fields {id="serialization-fields"}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = span_finder.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name    | Description                                                    |
+| ------- | -------------------------------------------------------------- |
+| `vocab` | The shared [`Vocab`](/api/vocab).                              |
+| `cfg`   | The config file. You usually don't want to exclude this.       |
+| `model` | The binary model data. You usually don't want to exclude this. |
diff --git a/website/meta/sidebars.json b/website/meta/sidebars.json
index b5c555da6..12c3fce35 100644
--- a/website/meta/sidebars.json
+++ b/website/meta/sidebars.json
@@ -106,6 +106,7 @@
                     { "text": "SentenceRecognizer", "url": "/api/sentencerecognizer" },
                     { "text": "Sentencizer", "url": "/api/sentencizer" },
                     { "text": "SpanCategorizer", "url": "/api/spancategorizer" },
+                    { "text": "SpanFinder", "url": "/api/spanfinder" },
                     { "text": "SpanResolver", "url": "/api/span-resolver" },
                     { "text": "SpanRuler", "url": "/api/spanruler" },
                     { "text": "Tagger", "url": "/api/tagger" },
diff --git a/website/meta/universe.json b/website/meta/universe.json
index e5f9eaed0..5d4eb0f14 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -4308,6 +4308,37 @@
             },
             "category": ["pipeline", "research"],
             "tags": ["Thai"]
+        },
+        {
+            "id": "vetiver",
+            "title": "Vetiver",
+            "slogan": "Version, share, deploy, and monitor models.",
+            "description": "The goal of vetiver is to provide fluent tooling to version, deploy, and monitor a trained model. Functions handle creating model objects, versioning models, predicting from a remote API endpoint, deploying Dockerfiles, and more.",
+            "github": "rstudio/vetiver-python",
+            "pip": "vetiver",
+            "code_example": [
+                "import spacy",
+                "from vetiver import VetiverModel, VetiverAPI",
+                "",
+                "# If you use this model, you'll need to download it first:",
+                "# python -m spacy download en_core_web_md",
+                "nlp = spacy.load('en_core_web_md')",
+                "# Create deployable model object with your nlp Language object",
+                "v = VetiverModel(nlp, model_name = 'my_model')",
+                "# Try out your API endpoint locally",
+                "VetiverAPI(v).run()"
+            ],
+            "code_language": "python",
+            "url": "https://vetiver.rstudio.com/",
+            "thumb": "https://raw.githubusercontent.com/rstudio/vetiver-python/main/docs/figures/square-logo.svg",
+            "author": "Posit, PBC",
+            "author_links": {
+                "twitter": "posit_pbc",
+                "github": "rstudio",
+                "website": "https://posit.co/"
+            },
+            "category": ["apis", "standalone"],
+            "tags": ["apis", "deployment"]
         }
     ],