Merge branch 'develop' into merge-develop-into-v4

2026-01-09 18:21:14 +03:00 · 2022-09-07 11:35:47 +02:00 · 2022-09-07 11:35:47 +02:00 · 977b847cce
commit 977b847cce
parent 4a615cacd2 f292569b1a
38 changed files with 752 additions and 208 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -6,7 +6,6 @@ requires = [
    "preshed>=3.0.2,<3.1.0",
    "murmurhash>=0.28.0,<1.1.0",
    "thinc>=8.1.0,<8.2.0",
-    "pathy",
    "numpy>=1.15.0",
 ]
 build-backend = "setuptools.build_meta"
--- a/requirements.txt
+++ b/requirements.txt
@ -1,5 +1,5 @@
 # Our libraries
-spacy-legacy>=3.0.9,<3.1.0
+spacy-legacy>=3.0.10,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-requests
+types-setuptools>=57.0.0
 black>=22.0,<23.0
--- a/setup.cfg
+++ b/setup.cfg
@ -33,7 +33,7 @@ include_package_data = true
 python_requires = >=3.6
 install_requires =
    # Our libraries
-    spacy-legacy>=3.0.9,<3.1.0
+    spacy-legacy>=3.0.10,<3.1.0
    spacy-loggers>=1.0.0,<2.0.0
    murmurhash>=0.28.0,<1.1.0
    cymem>=2.0.2,<2.1.0
@ -42,9 +42,9 @@ install_requires =
    wasabi>=0.9.1,<1.1.0
    srsly>=2.4.3,<3.0.0
    catalogue>=2.0.6,<2.1.0
+    # Third-party dependencies
    typer>=0.3.0,<0.5.0
    pathy>=0.3.5
-    # Third-party dependencies
    tqdm>=4.38.0,<5.0.0
    numpy>=1.15.0
    requests>=2.13.0,<3.0.0
--- a/spacy/init.py
+++ b/spacy/init.py
@ -31,21 +31,21 @@ def load(
    name: Union[str, Path],
    *,
    vocab: Union[Vocab, bool] = True,
-    disable: Iterable[str] = util.SimpleFrozenList(),
-    enable: Iterable[str] = util.SimpleFrozenList(),
-    exclude: Iterable[str] = util.SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = util.SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = util.SimpleFrozenList(),
    config: Union[Dict[str, Any], Config] = util.SimpleFrozenDict(),
 ) -> Language:
    """Load a spaCy model from an installed package or a local path.

    name (str): Package name or model path.
    vocab (Vocab): A Vocab object. If True, a vocab is created.
-    disable (Iterable[str]): Names of pipeline components to disable. Disabled
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
-    enable (Iterable[str]): Names of pipeline components to enable. All other
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (but can be enabled later using nlp.enable_pipe).
-    exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -20,7 +20,7 @@ def download_cli(
    ctx: typer.Context,
    model: str = Arg(..., help="Name of pipeline package to download"),
    direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
-    sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
+    sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
    # fmt: on
 ):
    """
@ -36,7 +36,12 @@ def download_cli(
    download(model, direct, sdist, *ctx.args)


-def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
+def download(
+    model: str,
+    direct: bool = False,
+    sdist: bool = False,
+    *pip_args,
+) -> None:
    if (
        not (is_package("spacy") or is_package("spacy-nightly"))
        and "--no-deps" not in pip_args
@ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
            "dependencies, you'll have to install them manually."
        )
        pip_args = pip_args + ("--no-deps",)
-    suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
-    dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
    if direct:
        components = model.split("-")
        model_name = "".join(components[:-1])
        version = components[-1]
-        download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
    else:
        model_name = model
        if model in OLD_MODEL_SHORTCUTS:
@ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
            model_name = OLD_MODEL_SHORTCUTS[model]
        compatibility = get_compatibility()
        version = get_version(model_name, compatibility)
-        download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
+
+    filename = get_model_filename(model_name, version, sdist)
+
+    download_model(filename, pip_args)
    msg.good(
        "Download and installation successful",
        f"You can now load the package via spacy.load('{model_name}')",
    )


+def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
+    dl_tpl = "{m}-{v}/{m}-{v}{s}"
+    egg_tpl = "#egg={m}=={v}"
+    suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
+    filename = dl_tpl.format(m=model_name, v=version, s=suffix)
+    if sdist:
+        filename += egg_tpl.format(m=model_name, v=version)
+    return filename
+
+
 def get_compatibility() -> dict:
    if is_prerelease_version(about.__version__):
        version: Optional[str] = about.__version__
@ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str:
    return comp[model][0]


+def get_latest_version(model: str) -> str:
+    comp = get_compatibility()
+    return get_version(model, comp)
+
+
 def download_model(
    filename: str, user_pip_args: Optional[Sequence[str]] = None
 ) -> None:
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -1,10 +1,13 @@
 from typing import Optional, Dict, Any, Union, List
 import platform
+import pkg_resources
+import json
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer
 import srsly

 from ._util import app, Arg, Opt, string_to_list
+from .download import get_model_filename, get_latest_version
 from .. import util
 from .. import about

@ -16,6 +19,7 @@ def info_cli(
    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
    silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
    exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
+    url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
    # fmt: on
 ):
    """
@ -23,10 +27,19 @@ def info_cli(
    print its meta information. Flag --markdown prints details in Markdown for easy
    copy-pasting to GitHub issues.

+    Flag --url prints only the download URL of the most recent compatible
+    version of the pipeline.
+
    DOCS: https://spacy.io/api/cli#info
    """
    exclude = string_to_list(exclude)
-    info(model, markdown=markdown, silent=silent, exclude=exclude)
+    info(
+        model,
+        markdown=markdown,
+        silent=silent,
+        exclude=exclude,
+        url=url,
+    )


 def info(
@ -35,11 +48,20 @@ def info(
    markdown: bool = False,
    silent: bool = True,
    exclude: Optional[List[str]] = None,
+    url: bool = False,
 ) -> Union[str, dict]:
    msg = Printer(no_print=silent, pretty=not silent)
    if not exclude:
        exclude = []
-    if model:
+    if url:
+        if model is not None:
+            title = f"Download info for pipeline '{model}'"
+            data = info_model_url(model)
+            print(data["download_url"])
+            return data
+        else:
+            msg.fail("--url option requires a pipeline name", exits=1)
+    elif model:
        title = f"Info about pipeline '{model}'"
        data = info_model(model, silent=silent)
    else:
@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
        meta["source"] = str(model_path.resolve())
    else:
        meta["source"] = str(model_path)
+    download_url = info_installed_model_url(model)
+    if download_url:
+        meta["download_url"] = download_url
    return {
        k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
    }


+def info_installed_model_url(model: str) -> Optional[str]:
+    """Given a pipeline name, get the download URL if available, otherwise
+    return None.
+
+    This is only available for pipelines installed as modules that have
+    dist-info available.
+    """
+    try:
+        dist = pkg_resources.get_distribution(model)
+        data = json.loads(dist.get_metadata("direct_url.json"))
+        return data["url"]
+    except pkg_resources.DistributionNotFound:
+        # no such package
+        return None
+    except Exception:
+        # something else, like no file or invalid JSON
+        return None
+
+def info_model_url(model: str) -> Dict[str, Any]:
+    """Return the download URL for the latest version of a pipeline."""
+    version = get_latest_version(model)
+
+    filename = get_model_filename(model, version)
+    download_url = about.__download_url__ + "/" + filename
+    release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
+    release_url = release_tpl.format(m=model, v=version)
+    return {"download_url": download_url, "release_url": release_url}
+
+
 def get_markdown(
    data: Dict[str, Any],
    title: Optional[str] = None,
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -230,8 +230,9 @@ class Errors(metaclass=ErrorsWithCodes):
            "initialized component.")
    E004 = ("Can't set up pipeline component: a factory for '{name}' already "
            "exists. Existing factory: {func}. New factory: {new_func}")
-    E005 = ("Pipeline component '{name}' returned None. If you're using a "
-            "custom component, maybe you forgot to return the processed Doc?")
+    E005 = ("Pipeline component '{name}' returned {returned_type} instead of a "
+            "Doc. If you're using a custom component, maybe you forgot to "
+            "return the processed Doc?")
    E006 = ("Invalid constraints for adding pipeline component. You can only "
            "set one of the following: before (component name or index), "
            "after (component name or index), first (True) or last (True). "
--- a/spacy/lang/ca/lemmatizer.py
+++ b/spacy/lang/ca/lemmatizer.py
@ -72,10 +72,10 @@ class CatalanLemmatizer(Lemmatizer):
                        oov_forms.append(form)
        if not forms:
            forms.extend(oov_forms)
-        if not forms and string in lookup_table.keys():
-            forms.append(self.lookup_lemmatize(token)[0])
+
+        # use lookups, and fall back to the token itself
        if not forms:
-            forms.append(string)
+            forms.append(lookup_table.get(string, [string])[0])
        forms = list(dict.fromkeys(forms))
        self.cache[cache_key] = forms
        return forms
--- a/spacy/lang/fr/lemmatizer.py
+++ b/spacy/lang/fr/lemmatizer.py
@ -53,11 +53,16 @@ class FrenchLemmatizer(Lemmatizer):
        rules = rules_table.get(univ_pos, [])
        string = string.lower()
        forms = []
+        # first try lookup in table based on upos
        if string in index:
            forms.append(string)
            self.cache[cache_key] = forms
            return forms
+
+        # then add anything in the exceptions table
        forms.extend(exceptions.get(string, []))
+
+        # if nothing found yet, use the rules
        oov_forms = []
        if not forms:
            for old, new in rules:
@ -69,12 +74,14 @@ class FrenchLemmatizer(Lemmatizer):
                        forms.append(form)
                    else:
                        oov_forms.append(form)
+
+        # if still nothing, add the oov forms from rules
        if not forms:
            forms.extend(oov_forms)
-        if not forms and string in lookup_table.keys():
-            forms.append(self.lookup_lemmatize(token)[0])
+
+        # use lookups, which fall back to the token itself
        if not forms:
-            forms.append(string)
+            forms.append(lookup_table.get(string, [string])[0])
        forms = list(dict.fromkeys(forms))
        self.cache[cache_key] = forms
        return forms
--- a/spacy/lang/la/init.py
+++ b/spacy/lang/la/init.py
@ -0,0 +1,18 @@
+from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from .stop_words import STOP_WORDS
+from .lex_attrs import LEX_ATTRS
+
+
+class LatinDefaults(BaseDefaults):
+    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
+    stop_words = STOP_WORDS
+    lex_attr_getters = LEX_ATTRS
+
+
+class Latin(Language):
+    lang = "la"
+    Defaults = LatinDefaults
+
+
+__all__ = ["Latin"]
--- a/spacy/lang/la/lex_attrs.py
+++ b/spacy/lang/la/lex_attrs.py
@ -0,0 +1,34 @@
+from ...attrs import LIKE_NUM
+import re
+
+# cf. Goyvaerts/Levithan 2009; case-insensitive, allow 4
+roman_numerals_compile = re.compile(
+    r"(?i)^(?=[MDCLXVI])M*(C[MD]|D?C{0,4})(X[CL]|L?X{0,4})(I[XV]|V?I{0,4})$"
+)
+
+_num_words = set(
+    """
+unus una unum duo duae tres tria quattuor quinque sex septem octo novem decem
+""".split()
+)
+
+_ordinal_words = set(
+    """
+primus prima primum secundus secunda secundum tertius tertia tertium
+""".split()
+)
+
+
+def like_num(text):
+    if text.isdigit():
+        return True
+    if roman_numerals_compile.match(text):
+        return True
+    if text.lower() in _num_words:
+        return True
+    if text.lower() in _ordinal_words:
+        return True
+    return False
+
+
+LEX_ATTRS = {LIKE_NUM: like_num}
--- a/spacy/lang/la/stop_words.py
+++ b/spacy/lang/la/stop_words.py
@ -0,0 +1,37 @@
+# Corrected Perseus list, cf. https://wiki.digitalclassicist.org/Stopwords_for_Greek_and_Latin
+
+STOP_WORDS = set(
+    """
+ab ac ad adhuc aliqui aliquis an ante apud at atque aut autem 
+
+cum cur 
+
+de deinde dum 
+
+ego enim ergo es est et etiam etsi ex 
+
+fio 
+
+haud hic 
+
+iam idem igitur ille in infra inter interim ipse is ita 
+
+magis modo mox 
+
+nam ne nec necque neque nisi non nos 
+
+o ob 
+
+per possum post pro 
+
+quae quam quare qui quia quicumque quidem quilibet quis quisnam quisquam quisque quisquis quo quoniam 
+
+sed si sic sive sub sui sum super suus 
+
+tam tamen trans tu tum 
+
+ubi uel uero
+
+vel vero
+""".split()
+)
--- a/spacy/lang/la/tokenizer_exceptions.py
+++ b/spacy/lang/la/tokenizer_exceptions.py
@ -0,0 +1,76 @@
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import ORTH
+from ...util import update_exc
+
+
+## TODO: Look into systematically handling u/v
+_exc = {
+    "mecum": [{ORTH: "me"}, {ORTH: "cum"}],
+    "tecum": [{ORTH: "te"}, {ORTH: "cum"}],
+    "nobiscum": [{ORTH: "nobis"}, {ORTH: "cum"}],
+    "vobiscum": [{ORTH: "vobis"}, {ORTH: "cum"}],
+    "uobiscum": [{ORTH: "uobis"}, {ORTH: "cum"}],
+}
+
+for orth in [
+    "A.",
+    "Agr.",
+    "Ap.",
+    "C.",
+    "Cn.",
+    "D.",
+    "F.",
+    "K.",
+    "L.",
+    "M'.",
+    "M.",
+    "Mam.",
+    "N.",
+    "Oct.",
+    "Opet.",
+    "P.",
+    "Paul.",
+    "Post.",
+    "Pro.",
+    "Q.",
+    "S.",
+    "Ser.",
+    "Sert.",
+    "Sex.",
+    "St.",
+    "Sta.",
+    "T.",
+    "Ti.",
+    "V.",
+    "Vol.",
+    "Vop.",
+    "U.",
+    "Uol.",
+    "Uop.",
+    "Ian.",
+    "Febr.",
+    "Mart.",
+    "Apr.",
+    "Mai.",
+    "Iun.",
+    "Iul.",
+    "Aug.",
+    "Sept.",
+    "Oct.",
+    "Nov.",
+    "Nou.",
+    "Dec.",
+    "Non.",
+    "Id.",
+    "A.D.",
+    "Coll.",
+    "Cos.",
+    "Ord.",
+    "Pl.",
+    "S.C.",
+    "Suff.",
+    "Trib.",
+]:
+    _exc[orth] = [{ORTH: orth}]
+
+TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1028,8 +1028,8 @@ class Language:
                raise ValueError(Errors.E109.format(name=name)) from e
            except Exception as e:
                error_handler(name, proc, [doc], e)
-            if doc is None:
-                raise ValueError(Errors.E005.format(name=name))
+            if not isinstance(doc, Doc):
+                raise ValueError(Errors.E005.format(name=name, returned_type=type(doc)))
        return doc

    def disable_pipes(self, *names) -> "DisabledPipes":
@ -1063,7 +1063,7 @@ class Language:
        """
        if enable is None and disable is None:
            raise ValueError(Errors.E991)
-        if disable is not None and isinstance(disable, str):
+        if isinstance(disable, str):
            disable = [disable]
        if enable is not None:
            if isinstance(enable, str):
@ -1698,9 +1698,9 @@ class Language:
        config: Union[Dict[str, Any], Config] = {},
        *,
        vocab: Union[Vocab, bool] = True,
-        disable: Iterable[str] = SimpleFrozenList(),
-        enable: Iterable[str] = SimpleFrozenList(),
-        exclude: Iterable[str] = SimpleFrozenList(),
+        disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+        enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+        exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
        meta: Dict[str, Any] = SimpleFrozenDict(),
        auto_fill: bool = True,
        validate: bool = True,
@ -1711,12 +1711,12 @@ class Language:

        config (Dict[str, Any] / Config): The loaded config.
        vocab (Vocab): A Vocab object. If True, a vocab is created.
-        disable (Iterable[str]): Names of pipeline components to disable.
+        disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
            Disabled pipes will be loaded but they won't be run unless you
            explicitly enable them by calling nlp.enable_pipe.
-        enable (Iterable[str]): Names of pipeline components to enable. All other
+        enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
            pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
-        exclude (Iterable[str]): Names of pipeline components to exclude.
+        exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude.
            Excluded components won't be loaded.
        meta (Dict[str, Any]): Meta overrides for nlp.meta.
        auto_fill (bool): Automatically fill in missing values in config based
@ -1727,6 +1727,12 @@ class Language:

        DOCS: https://spacy.io/api/language#from_config
        """
+        if isinstance(disable, str):
+            disable = [disable]
+        if isinstance(enable, str):
+            enable = [enable]
+        if isinstance(exclude, str):
+            exclude = [exclude]
        if auto_fill:
            config = Config(
                cls.default_config, section_order=CONFIG_SECTION_ORDER
@ -2031,25 +2037,29 @@ class Language:

    @staticmethod
    def _resolve_component_status(
-        disable: Iterable[str], enable: Iterable[str], pipe_names: Collection[str]
+        disable: Union[str, Iterable[str]],
+        enable: Union[str, Iterable[str]],
+        pipe_names: Iterable[str],
    ) -> Tuple[str, ...]:
        """Derives whether (1) `disable` and `enable` values are consistent and (2)
        resolves those to a single set of disabled components. Raises an error in
        case of inconsistency.

-        disable (Iterable[str]): Names of components or serialization fields to disable.
-        enable (Iterable[str]): Names of pipeline components to enable.
+        disable (Union[str, Iterable[str]]): Name(s) of component(s) or serialization fields to disable.
+        enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable.
        pipe_names (Iterable[str]): Names of all pipeline components.

        RETURNS (Tuple[str, ...]): Names of components to exclude from pipeline w.r.t.
                                   specified includes and excludes.
        """

-        if disable is not None and isinstance(disable, str):
+        if isinstance(disable, str):
            disable = [disable]
        to_disable = disable

        if enable:
+            if isinstance(enable, str):
+                enable = [enable]
            to_disable = [
                pipe_name for pipe_name in pipe_names if pipe_name not in enable
            ]
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -1,5 +1,5 @@
 # cython: infer_types=True, cython: profile=True
-from typing import List
+from typing import List, Iterable

 from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int8_t
@ -868,20 +868,27 @@ class _SetPredicate:

    def __call__(self, Token token):
        if self.is_extension:
-            value = get_string_id(token._.get(self.attr))
+            value = token._.get(self.attr)
        else:
            value = get_token_attr_for_matcher(token.c, self.attr)

-        if self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"):
+        if self.predicate in ("IN", "NOT_IN"):
+            if isinstance(value, (str, int)):
+                value = get_string_id(value)
+            else:
+                return False
+        elif self.predicate in ("IS_SUBSET", "IS_SUPERSET", "INTERSECTS"):
+            # ensure that all values are enclosed in a set
            if self.attr == MORPH:
                # break up MORPH into individual Feat=Val values
                value = set(get_string_id(v) for v in MorphAnalysis.from_id(self.vocab, value))
+            elif isinstance(value, (str, int)):
+                value = set((get_string_id(value),))
+            elif isinstance(value, Iterable) and all(isinstance(v, (str, int)) for v in value):
+                value = set(get_string_id(v) for v in value)
            else:
-                # treat a single value as a list
-                if isinstance(value, (str, int)):
-                    value = set([get_string_id(value)])
-                else:
-                    value = set(get_string_id(v) for v in value)
+                return False
+
        if self.predicate == "IN":
            return value in self.value
        elif self.predicate == "NOT_IN":
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -256,6 +256,11 @@ def ko_tokenizer_tokenizer():
    return nlp.tokenizer


+@pytest.fixture(scope="module")
+def la_tokenizer():
+    return get_lang_class("la")().tokenizer
+
+
@pytest.fixture(scope="session")
 def ko_tokenizer_natto():
    pytest.importorskip("natto")
--- a/spacy/tests/lang/la/init.py
+++ b/spacy/tests/lang/la/init.py
--- a/spacy/tests/lang/la/test_exception.py
+++ b/spacy/tests/lang/la/test_exception.py
@ -0,0 +1,8 @@
+import pytest
+
+
+def test_la_tokenizer_handles_exc_in_text(la_tokenizer):
+    text = "scio te omnia facturum, ut nobiscum quam primum sis"
+    tokens = la_tokenizer(text)
+    assert len(tokens) == 11
+    assert tokens[6].text == "nobis"
--- a/spacy/tests/lang/la/test_text.py
+++ b/spacy/tests/lang/la/test_text.py
@ -0,0 +1,35 @@
+import pytest
+from spacy.lang.la.lex_attrs import like_num
+
+
+@pytest.mark.parametrize(
+    "text,match",
+    [
+        ("IIII", True),
+        ("VI", True),
+        ("vi", True),
+        ("IV", True),
+        ("iv", True),
+        ("IX", True),
+        ("ix", True),
+        ("MMXXII", True),
+        ("0", True),
+        ("1", True),
+        ("quattuor", True),
+        ("decem", True),
+        ("tertius", True),
+        ("canis", False),
+        ("MMXX11", False),
+        (",", False),
+    ],
+)
+def test_lex_attrs_like_number(la_tokenizer, text, match):
+    tokens = la_tokenizer(text)
+    assert len(tokens) == 1
+    assert tokens[0].like_num == match
+
+
+@pytest.mark.parametrize("word", ["quinque"])
+def test_la_lex_attrs_capitals(word):
+    assert like_num(word)
+    assert like_num(word.upper())
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@ -368,6 +368,16 @@ def test_matcher_intersect_value_operator(en_vocab):
    doc[0]._.ext = ["A", "B"]
    assert len(matcher(doc)) == 1

+    # INTERSECTS matches nothing for iterables that aren't all str or int
+    matcher = Matcher(en_vocab)
+    pattern = [{"_": {"ext": {"INTERSECTS": ["Abx", "C"]}}}]
+    matcher.add("M", [pattern])
+    doc = Doc(en_vocab, words=["a", "b", "c"])
+    doc[0]._.ext = [["Abx"], "B"]
+    assert len(matcher(doc)) == 0
+    doc[0]._.ext = ["Abx", "B"]
+    assert len(matcher(doc)) == 1
+
    # INTERSECTS with an empty pattern list matches nothing
    matcher = Matcher(en_vocab)
    pattern = [{"_": {"ext": {"INTERSECTS": []}}}]
@ -476,14 +486,22 @@ def test_matcher_extension_set_membership(en_vocab):
    assert len(matches) == 0


-@pytest.mark.xfail(reason="IN predicate must handle sequence values in extensions")
 def test_matcher_extension_in_set_predicate(en_vocab):
    matcher = Matcher(en_vocab)
    Token.set_extension("ext", default=[])
    pattern = [{"_": {"ext": {"IN": ["A", "C"]}}}]
    matcher.add("M", [pattern])
    doc = Doc(en_vocab, words=["a", "b", "c"])
+
+    # The IN predicate expects an exact match between the
+    # extension value and one of the pattern's values.
    doc[0]._.ext = ["A", "B"]
+    assert len(matcher(doc)) == 0
+
+    doc[0]._.ext = ["A"]
+    assert len(matcher(doc)) == 0
+
+    doc[0]._.ext = "A"
    assert len(matcher(doc)) == 1


--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@ -17,6 +17,7 @@ def test_build_dependencies():
        "types-dataclasses",
        "types-mock",
        "types-requests",
+        "types-setuptools",
    ]
    # ignore language-specific packages that shouldn't be installed by all
    libs_ignore_setup = [
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@ -618,6 +618,7 @@ def test_load_disable_enable() -> None:
        base_nlp.to_disk(tmp_dir)
        to_disable = ["parser", "tagger"]
        to_enable = ["tagger", "parser"]
+        single_str = "tagger"

        # Setting only `disable`.
        nlp = spacy.load(tmp_dir, disable=to_disable)
@ -632,6 +633,16 @@ def test_load_disable_enable() -> None:
            ]
        )

+        # Loading with a string representing one component
+        nlp = spacy.load(tmp_dir, exclude=single_str)
+        assert single_str not in nlp.component_names
+
+        nlp = spacy.load(tmp_dir, disable=single_str)
+        assert single_str in nlp.component_names
+        assert single_str not in nlp.pipe_names
+        assert nlp._disabled == {single_str}
+        assert nlp.disabled == [single_str]
+
        # Testing consistent enable/disable combination.
        nlp = spacy.load(
            tmp_dir,
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -670,3 +670,25 @@ def test_dot_in_factory_names(nlp):

    with pytest.raises(ValueError, match="not permitted"):
        Language.factory("my.evil.component.v1", func=evil_component)
+
+
+def test_component_return():
+    """Test that an error is raised if components return a type other than a
+    doc."""
+    nlp = English()
+
+    @Language.component("test_component_good_pipe")
+    def good_pipe(doc):
+        return doc
+
+    nlp.add_pipe("test_component_good_pipe")
+    nlp("text")
+    nlp.remove_pipe("test_component_good_pipe")
+
+    @Language.component("test_component_bad_pipe")
+    def bad_pipe(doc):
+        return doc.text
+
+    nlp.add_pipe("test_component_bad_pipe")
+    with pytest.raises(ValueError, match="instead of a Doc"):
+        nlp("text")
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -10,7 +10,8 @@ from spacy.ml._precomputable_affine import _backprop_precomputable_affine_paddin
 from spacy.util import dot_to_object, SimpleFrozenList, import_file
 from spacy.util import to_ternary_int
 from thinc.api import Config, Optimizer, ConfigValidationError
-from thinc.api import set_current_ops
+from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps
+from thinc.compat import has_cupy_gpu, has_torch_mps_gpu
 from spacy.training.batchers import minibatch_by_words
 from spacy.lang.en import English
 from spacy.lang.nl import Dutch
@ -18,7 +19,6 @@ from spacy.language import DEFAULT_CONFIG_PATH
 from spacy.schemas import ConfigSchemaTraining, TokenPattern, TokenPatternSchema
 from pydantic import ValidationError

-from thinc.api import get_current_ops, NumpyOps, CupyOps

 from .util import get_random_doc, make_tempdir

@ -111,26 +111,25 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):

 def test_prefer_gpu():
    current_ops = get_current_ops()
-    try:
-        import cupy  # noqa: F401
-
-        prefer_gpu()
+    if has_cupy_gpu:
+        assert prefer_gpu()
        assert isinstance(get_current_ops(), CupyOps)
-    except ImportError:
+    elif has_torch_mps_gpu:
+        assert prefer_gpu()
+        assert isinstance(get_current_ops(), MPSOps)
+    else:
        assert not prefer_gpu()
    set_current_ops(current_ops)


 def test_require_gpu():
    current_ops = get_current_ops()
-    try:
-        import cupy  # noqa: F401
-
+    if has_cupy_gpu:
        require_gpu()
        assert isinstance(get_current_ops(), CupyOps)
-    except ImportError:
-        with pytest.raises(ValueError):
-            require_gpu()
+    elif has_torch_mps_gpu:
+        require_gpu()
+        assert isinstance(get_current_ops(), MPSOps)
    set_current_ops(current_ops)


--- a/spacy/tests/training/test_logger.py
+++ b/spacy/tests/training/test_logger.py
@ -0,0 +1,30 @@
+import pytest
+import spacy
+
+from spacy.training import loggers
+
+
+@pytest.fixture()
+def nlp():
+    nlp = spacy.blank("en")
+    nlp.add_pipe("ner")
+    return nlp
+
+
+@pytest.fixture()
+def info():
+    return {
+        "losses": {"ner": 100},
+        "other_scores": {"ENTS_F": 0.85, "ENTS_P": 0.90, "ENTS_R": 0.80},
+        "epoch": 100,
+        "step": 125,
+        "score": 85,
+    }
+
+
+def test_console_logger(nlp, info):
+    console_logger = loggers.console_logger(
+        progress_bar=True, console_output=True, output_file=None
+    )
+    log_step, finalize = console_logger(nlp)
+    log_step(info)
--- a/spacy/tokens/span_group.pyi
+++ b/spacy/tokens/span_group.pyi
@ -1,4 +1,4 @@
-from typing import Any, Dict, Iterable
+from typing import Any, Dict, Iterable, Optional
 from .doc import Doc
 from .span import Span

@ -24,4 +24,4 @@ class SpanGroup:
    def __getitem__(self, i: int) -> Span: ...
    def to_bytes(self) -> bytes: ...
    def from_bytes(self, bytes_data: bytes) -> SpanGroup: ...
-    def copy(self) -> SpanGroup: ...
+    def copy(self, doc: Optional[Doc] = ...) -> SpanGroup: ...
--- a/spacy/tokens/span_group.pyx
+++ b/spacy/tokens/span_group.pyx
@ -244,15 +244,18 @@ cdef class SpanGroup:
    cdef void push_back(self, const shared_ptr[SpanC] &span):
        self.c.push_back(span)

-    def copy(self)  -> SpanGroup:
+    def copy(self, doc: Optional["Doc"] = None) -> SpanGroup:
        """Clones the span group.

+        doc (Doc): New reference document to which the copy is bound.
        RETURNS (SpanGroup): A copy of the span group.

        DOCS: https://spacy.io/api/spangroup#copy
        """
+        if doc is None:
+            doc = self.doc
        return SpanGroup(
-            self.doc,
+            doc,
            name=self.name,
            attrs=deepcopy(self.attrs),
            spans=list(self),
--- a/spacy/tokens/span_groups.py
+++ b/spacy/tokens/span_groups.py
@ -42,7 +42,8 @@ class SpanGroups(UserDict):
    def copy(self, doc: Optional["Doc"] = None) -> "SpanGroups":
        if doc is None:
            doc = self._ensure_doc()
-        return SpanGroups(doc).from_bytes(self.to_bytes())
+        data_copy = ((k, v.copy(doc=doc)) for k, v in self.items())
+        return SpanGroups(doc, items=data_copy)

    def setdefault(self, key, default=None):
        if not isinstance(default, SpanGroup):
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@ -1,10 +1,13 @@
-from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
+from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO, Union
 from wasabi import Printer
+from pathlib import Path
 import tqdm
 import sys
+import srsly

 from ..util import registry
 from ..errors import Errors
+from .. import util

 if TYPE_CHECKING:
    from ..language import Language  # noqa: F401
@ -23,13 +26,44 @@ def setup_table(
    return final_cols, final_widths, ["r" for _ in final_widths]


-@registry.loggers("spacy.ConsoleLogger.v1")
-def console_logger(progress_bar: bool = False):
+@registry.loggers("spacy.ConsoleLogger.v2")
+def console_logger(
+    progress_bar: bool = False,
+    console_output: bool = True,
+    output_file: Optional[Union[str, Path]] = None,
+):
+    """The ConsoleLogger.v2 prints out training logs in the console and/or saves them to a jsonl file.
+    progress_bar (bool): Whether the logger should print the progress bar.
+    console_output (bool): Whether the logger should print the logs on the console.
+    output_file (Optional[Union[str, Path]]): The file to save the training logs to.
+    """
+    _log_exist = False
+    if output_file:
+        output_file = util.ensure_path(output_file)  # type: ignore
+        if output_file.exists():  # type: ignore
+            _log_exist = True
+        if not output_file.parents[0].exists():  # type: ignore
+            output_file.parents[0].mkdir(parents=True)  # type: ignore
+
    def setup_printer(
        nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
    ) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
        write = lambda text: print(text, file=stdout, flush=True)
        msg = Printer(no_print=True)
+
+        nonlocal output_file
+        output_stream = None
+        if _log_exist:
+            write(
+                msg.warn(
+                    f"Saving logs is disabled because {output_file} already exists."
+                )
+            )
+            output_file = None
+        elif output_file:
+            write(msg.info(f"Saving results to {output_file}"))
+            output_stream = open(output_file, "w", encoding="utf-8")
+
        # ensure that only trainable components are logged
        logged_pipes = [
            name
@ -40,13 +74,15 @@ def console_logger(progress_bar: bool = False):
        score_weights = nlp.config["training"]["score_weights"]
        score_cols = [col for col, value in score_weights.items() if value is not None]
        loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
-        spacing = 2
-        table_header, table_widths, table_aligns = setup_table(
-            cols=["E", "#"] + loss_cols + score_cols + ["Score"],
-            widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
-        )
-        write(msg.row(table_header, widths=table_widths, spacing=spacing))
-        write(msg.row(["-" * width for width in table_widths], spacing=spacing))
+
+        if console_output:
+            spacing = 2
+            table_header, table_widths, table_aligns = setup_table(
+                cols=["E", "#"] + loss_cols + score_cols + ["Score"],
+                widths=[3, 6] + [8 for _ in loss_cols] + [6 for _ in score_cols] + [6],
+            )
+            write(msg.row(table_header, widths=table_widths, spacing=spacing))
+            write(msg.row(["-" * width for width in table_widths], spacing=spacing))
        progress = None

        def log_step(info: Optional[Dict[str, Any]]) -> None:
@ -57,12 +93,15 @@ def console_logger(progress_bar: bool = False):
                if progress is not None:
                    progress.update(1)
                return
-            losses = [
-                "{0:.2f}".format(float(info["losses"][pipe_name]))
-                for pipe_name in logged_pipes
-            ]
+
+            losses = []
+            log_losses = {}
+            for pipe_name in logged_pipes:
+                losses.append("{0:.2f}".format(float(info["losses"][pipe_name])))
+                log_losses[pipe_name] = float(info["losses"][pipe_name])

            scores = []
+            log_scores = {}
            for col in score_cols:
                score = info["other_scores"].get(col, 0.0)
                try:
@ -73,6 +112,7 @@ def console_logger(progress_bar: bool = False):
                if col != "speed":
                    score *= 100
                scores.append("{0:.2f}".format(score))
+                log_scores[str(col)] = score

            data = (
                [info["epoch"], info["step"]]
@ -80,20 +120,36 @@ def console_logger(progress_bar: bool = False):
                + scores
                + ["{0:.2f}".format(float(info["score"]))]
            )
+
+            if output_stream:
+                # Write to log file per log_step
+                log_data = {
+                    "epoch": info["epoch"],
+                    "step": info["step"],
+                    "losses": log_losses,
+                    "scores": log_scores,
+                    "score": float(info["score"]),
+                }
+                output_stream.write(srsly.json_dumps(log_data) + "\n")
+
            if progress is not None:
                progress.close()
-            write(
-                msg.row(data, widths=table_widths, aligns=table_aligns, spacing=spacing)
-            )
-            if progress_bar:
-                # Set disable=None, so that it disables on non-TTY
-                progress = tqdm.tqdm(
-                    total=eval_frequency, disable=None, leave=False, file=stderr
+            if console_output:
+                write(
+                    msg.row(
+                        data, widths=table_widths, aligns=table_aligns, spacing=spacing
+                    )
                )
-                progress.set_description(f"Epoch {info['epoch']+1}")
+                if progress_bar:
+                    # Set disable=None, so that it disables on non-TTY
+                    progress = tqdm.tqdm(
+                        total=eval_frequency, disable=None, leave=False, file=stderr
+                    )
+                    progress.set_description(f"Epoch {info['epoch']+1}")

        def finalize() -> None:
-            pass
+            if output_stream:
+                output_stream.close()

        return log_step, finalize

--- a/spacy/util.py
+++ b/spacy/util.py
@ -398,9 +398,9 @@ def load_model(
    name: Union[str, Path],
    *,
    vocab: Union["Vocab", bool] = True,
-    disable: Iterable[str] = SimpleFrozenList(),
-    enable: Iterable[str] = SimpleFrozenList(),
-    exclude: Iterable[str] = SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
    """Load a model from a package or data path.
@ -408,9 +408,9 @@ def load_model(
    name (str): Package name or model path.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
-    disable (Iterable[str]): Names of pipeline components to disable.
-    enable (Iterable[str]): Names of pipeline components to enable. All others will be disabled.
-    exclude (Iterable[str]):  Names of pipeline components to exclude.
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable.
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All others will be disabled.
+    exclude (Union[str, Iterable[str]]):  Name(s) of pipeline component(s) to exclude.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
    RETURNS (Language): The loaded nlp object.
@ -440,9 +440,9 @@ def load_model_from_package(
    name: str,
    *,
    vocab: Union["Vocab", bool] = True,
-    disable: Iterable[str] = SimpleFrozenList(),
-    enable: Iterable[str] = SimpleFrozenList(),
-    exclude: Iterable[str] = SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
    """Load a model from an installed package.
@ -450,12 +450,12 @@ def load_model_from_package(
    name (str): The package name.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
-    disable (Iterable[str]): Names of pipeline components to disable. Disabled
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
-    enable (Iterable[str]): Names of pipeline components to enable. All other
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
-    exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
@ -470,9 +470,9 @@ def load_model_from_path(
    *,
    meta: Optional[Dict[str, Any]] = None,
    vocab: Union["Vocab", bool] = True,
-    disable: Iterable[str] = SimpleFrozenList(),
-    enable: Iterable[str] = SimpleFrozenList(),
-    exclude: Iterable[str] = SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
    """Load a model from a data directory path. Creates Language class with
@ -482,12 +482,12 @@ def load_model_from_path(
    meta (Dict[str, Any]): Optional model meta.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
-    disable (Iterable[str]): Names of pipeline components to disable. Disabled
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
-    enable (Iterable[str]): Names of pipeline components to enable. All other
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
-    exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
@ -516,9 +516,9 @@ def load_model_from_config(
    *,
    meta: Dict[str, Any] = SimpleFrozenDict(),
    vocab: Union["Vocab", bool] = True,
-    disable: Iterable[str] = SimpleFrozenList(),
-    enable: Iterable[str] = SimpleFrozenList(),
-    exclude: Iterable[str] = SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
    auto_fill: bool = False,
    validate: bool = True,
 ) -> "Language":
@ -529,12 +529,12 @@ def load_model_from_config(
    meta (Dict[str, Any]): Optional model meta.
    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
-    disable (Iterable[str]): Names of pipeline components to disable. Disabled
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
-    enable (Iterable[str]): Names of pipeline components to enable. All other
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
-    exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    auto_fill (bool): Whether to auto-fill config with missing defaults.
    validate (bool): Whether to show config validation errors.
@ -616,9 +616,9 @@ def load_model_from_init_py(
    init_file: Union[Path, str],
    *,
    vocab: Union["Vocab", bool] = True,
-    disable: Iterable[str] = SimpleFrozenList(),
-    enable: Iterable[str] = SimpleFrozenList(),
-    exclude: Iterable[str] = SimpleFrozenList(),
+    disable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    enable: Union[str, Iterable[str]] = SimpleFrozenList(),
+    exclude: Union[str, Iterable[str]] = SimpleFrozenList(),
    config: Union[Dict[str, Any], Config] = SimpleFrozenDict(),
 ) -> "Language":
    """Helper function to use in the `load()` method of a model package's
@ -626,12 +626,12 @@ def load_model_from_init_py(

    vocab (Vocab / True): Optional vocab to pass in on initialization. If True,
        a new Vocab object will be created.
-    disable (Iterable[str]): Names of pipeline components to disable. Disabled
+    disable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to disable. Disabled
        pipes will be loaded but they won't be run unless you explicitly
        enable them by calling nlp.enable_pipe.
-    enable (Iterable[str]): Names of pipeline components to enable. All other
+    enable (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to enable. All other
        pipes will be disabled (and can be enabled using `nlp.enable_pipe`).
-    exclude (Iterable[str]): Names of pipeline components to exclude. Excluded
+    exclude (Union[str, Iterable[str]]): Name(s) of pipeline component(s) to exclude. Excluded
        components won't be loaded.
    config (Dict[str, Any] / Config): Config overrides as nested dict or dict
        keyed by section values in dot notation.
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude]
 $ python -m spacy info [model] [--markdown] [--silent] [--exclude]
 ```

-| Name                                             | Description                                                                                   |
-| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
-| `model`                                          | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~         |
-| `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                                |
-| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                                 |
-| `--exclude`, `-e`                                | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
-| `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                    |
-| **PRINTS**                                       | Information about your spaCy installation.                                                    |
+| Name                                             | Description                                                                                                             |
+| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
+| `model`                                          | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~                                   |
+| `--markdown`, `-md`                              | Print information as Markdown. ~~bool (flag)~~                                                                          |
+| `--silent`, `-s` <Tag variant="new">2.0.12</Tag> | Don't print anything, just return the values. ~~bool (flag)~~                                                           |
+| `--exclude`, `-e`                                | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~                           |
+| `--url`, `-u` <Tag variant="new">3.5.0</Tag>     | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
+| `--help`, `-h`                                   | Show help message and available arguments. ~~bool (flag)~~                                                              |
+| **PRINTS**                                       | Information about your spaCy installation.                                                                              |

 ## validate {#validate new="2" tag="command"}

--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -63,17 +63,18 @@ spaCy loads a model under the hood based on its
 > nlp = Language.from_config(config)
 > ```

-| Name           | Description                                                                                                                                                                                                                                      |
-| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `config`       | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                             |
-| _keyword-only_ |                                                                                                                                                                                                                                                  |
-| `vocab`        | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                              |
-| `disable`      | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude`      | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                             |
-| `meta`         | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                |
-| `auto_fill`    | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                 |
-| `validate`     | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                   |
-| **RETURNS**    | The initialized object. ~~Language~~                                                                                                                                                                                                             |
+| Name                                  | Description                                                                                                                                                                                                                                                          |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `config`                              | The loaded config. ~~Union[Dict[str, Any], Config]~~                                                                                                                                                                                                                 |
+| _keyword-only_                        |                                                                                                                                                                                                                                                                      |
+| `vocab`                               | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~                                                                                                                                                                  |
+| `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  |
+| `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             |
+| `meta`                                | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~                                                                                                                                                                                                    |
+| `auto_fill`                           | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~                                                                                                                     |
+| `validate`                            | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~                                                                                                                                       |
+| **RETURNS**                           | The initialized object. ~~Language~~                                                                                                                                                                                                                                 |

 ## Language.component {#component tag="classmethod" new="3"}

@ -695,8 +696,8 @@ As of spaCy v3.0, the `disable_pipes` method has been renamed to `select_pipes`:
 | Name           | Description                                                                                            |
 | -------------- | ------------------------------------------------------------------------------------------------------ |
 | _keyword-only_ |                                                                                                        |
-| `disable`      | Name(s) of pipeline components to disable. ~~Optional[Union[str, Iterable[str]]]~~                     |
-| `enable`       | Name(s) of pipeline components that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~      |
+| `disable`      | Name(s) of pipeline component(s) to disable. ~~Optional[Union[str, Iterable[str]]]~~                   |
+| `enable`       | Name(s) of pipeline component(s) that will not be disabled. ~~Optional[Union[str, Iterable[str]]]~~    |
 | **RETURNS**    | The disabled pipes that can be restored by calling the object's `.restore()` method. ~~DisabledPipes~~ |

 ## Language.get_factory_meta {#get_factory_meta tag="classmethod" new="3"}
--- a/website/docs/api/legacy.md
+++ b/website/docs/api/legacy.md
@ -248,6 +248,59 @@ added to an existing vectors table. See more details in

 ## Loggers {#loggers}

+These functions are available from `@spacy.registry.loggers`.
+
+### spacy.ConsoleLogger.v1 {#ConsoleLogger_v1}
+
+> #### Example config
+>
+> ```ini
+> [training.logger]
+> @loggers = "spacy.ConsoleLogger.v1"
+> progress_bar = true
+> ```
+
+Writes the results of a training step to the console in a tabular format.
+
+<Accordion title="Example console output" spaced>
+
+```cli
+$ python -m spacy train config.cfg
+```
+
+```
+ℹ Using CPU
+ℹ Loading config and nlp from: config.cfg
+ℹ Pipeline: ['tok2vec', 'tagger']
+ℹ Start training
+ℹ Training. Initial learn rate: 0.0
+
+E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE
+---   ------   ------------   -----------   -------   ------
+  0        0           0.00         86.20      0.22     0.00
+  0      200           3.08      18968.78     34.00     0.34
+  0      400          31.81      22539.06     33.64     0.34
+  0      600          92.13      22794.91     43.80     0.44
+  0      800         183.62      21541.39     56.05     0.56
+  0     1000         352.49      25461.82     65.15     0.65
+  0     1200         422.87      23708.82     71.84     0.72
+  0     1400         601.92      24994.79     76.57     0.77
+  0     1600         662.57      22268.02     80.20     0.80
+  0     1800        1101.50      28413.77     82.56     0.83
+  0     2000        1253.43      28736.36     85.00     0.85
+  0     2200        1411.02      28237.53     87.42     0.87
+  0     2400        1605.35      28439.95     88.70     0.89
+```
+
+Note that the cumulative loss keeps increasing within one epoch, but should
+start decreasing across epochs.
+
+ </Accordion>
+
+| Name           | Description                                               |
+| -------------- | --------------------------------------------------------- |
+| `progress_bar` | Whether the logger should print the progress bar ~~bool~~ |
+
 Logging utilities for spaCy are implemented in the
 [`spacy-loggers`](https://github.com/explosion/spacy-loggers) repo, and the
 functions are typically available from `@spacy.registry.loggers`.
--- a/website/docs/api/spangroup.md
+++ b/website/docs/api/spangroup.md
@ -255,9 +255,10 @@ Return a copy of the span group.
 > new_group = doc.spans["errors"].copy()
 > ```

-| Name        | Description                                     |
-| ----------- | ----------------------------------------------- |
-| **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~ |
+| Name        | Description                                                                                        |
+| ----------- | -------------------------------------------------------------------------------------------------- |
+| `doc`       | The document to which the copy is bound. Defaults to `None` for the current doc. ~~Optional[Doc]~~ |
+| **RETURNS** | A copy of the `SpanGroup` object. ~~SpanGroup~~                                                    |

 ## SpanGroup.to_bytes {#to_bytes tag="method"}

--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@ -45,16 +45,16 @@ specified separately using the new `exclude` keyword argument.
 > nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"])
 > ```

-| Name                                 | Description                                                                                                                                                                                                                                    |
-| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `name`                               | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                              |
-| _keyword-only_                       |                                                                                                                                                                                                                                                |
-| `vocab`                              | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                          |
-| `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
-| `enable`                             | Names of pipeline components to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~List[str]~~                                                                                                               |
-| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           |
-| `config` <Tag variant="new">3</Tag>  | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                             |
-| **RETURNS**                          | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                     |
+| Name                                  | Description                                                                                                                                                                                                                                                        |
+| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `name`                                | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~                                                                                                                                                                                                  |
+| _keyword-only_                        |                                                                                                                                                                                                                                                                    |
+| `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                              |
+| `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~                                                                                                               |
+| `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                           |
+| `config` <Tag variant="new">3</Tag>   | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 |
+| **RETURNS**                           | A `Language` object with the loaded pipeline. ~~Language~~                                                                                                                                                                                                         |

 Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's
 [`config.cfg`](/api/data-formats#config), uses the language and pipeline
@ -275,8 +275,8 @@ Render a dependency parse tree or named entity visualization.

 ### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"}

-Generate dependency parse in `{'words': [], 'arcs': []}` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate dependency parse in `{'words': [], 'arcs': []}` format. For use with
+the `manual=True` argument in `displacy.render`.

 > #### Example
 >
@ -297,8 +297,8 @@ For use with the `manual=True` argument in `displacy.render`.

 ### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"}

-Generate named entities in `[{start: i, end: i, label: 'label'}]` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate named entities in `[{start: i, end: i, label: 'label'}]` format. For
+use with the `manual=True` argument in `displacy.render`.

 > #### Example
 >
@ -319,8 +319,8 @@ For use with the `manual=True` argument in `displacy.render`.

 ### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"}

-Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format.
-For use with the `manual=True` argument in `displacy.render`.
+Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. For
+use with the `manual=True` argument in `displacy.render`.

 > #### Example
 >
@ -451,7 +451,7 @@ factories.
 | Registry name     | Description                                                                                                                                                                                                                                        |
 | ----------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `architectures`   | Registry for functions that create [model architectures](/api/architectures). Can be used to register custom model architectures and reference them in the `config.cfg`.                                                                           |
-| `augmenters`      | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators.                                                                                                                       |
+| `augmenters`      | Registry for functions that create [data augmentation](#augmenters) callbacks for corpora and other training data iterators.                                                                                                                       |
 | `batchers`        | Registry for training and evaluation [data batchers](#batchers).                                                                                                                                                                                   |
 | `callbacks`       | Registry for custom callbacks to [modify the `nlp` object](/usage/training#custom-code-nlp-callbacks) before training.                                                                                                                             |
 | `displacy_colors` | Registry for custom color scheme for the [`displacy` NER visualizer](/usage/visualizers). Automatically reads from [entry points](/usage/saving-loading#entry-points).                                                                             |
@ -505,7 +505,7 @@ finished. To log each training step, a
 and the accuracy scores on the development set.

 The built-in, default logger is the ConsoleLogger, which prints results to the
-console in tabular format. The
+console in tabular format and saves them to a `jsonl` file. The
 [spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
 a dependency of spaCy, enables other loggers, such as one that sends results to
 a [Weights & Biases](https://www.wandb.com/) dashboard.
@ -513,16 +513,20 @@ a [Weights & Biases](https://www.wandb.com/) dashboard.
 Instead of using one of the built-in loggers, you can
 [implement your own](/usage/training#custom-logging).

-#### spacy.ConsoleLogger.v1 {#ConsoleLogger tag="registered function"}
+#### spacy.ConsoleLogger.v2 {#ConsoleLogger tag="registered function"}

 > #### Example config
 >
 > ```ini
 > [training.logger]
-> @loggers = "spacy.ConsoleLogger.v1"
+> @loggers = "spacy.ConsoleLogger.v2"
+> progress_bar = true
+> console_output = true
+> output_file = "training_log.jsonl"
 > ```

-Writes the results of a training step to the console in a tabular format.
+Writes the results of a training step to the console in a tabular format and
+saves them to a `jsonl` file.

 <Accordion title="Example console output" spaced>

@ -536,22 +540,23 @@ $ python -m spacy train config.cfg
 ℹ Pipeline: ['tok2vec', 'tagger']
 ℹ Start training
 ℹ Training. Initial learn rate: 0.0
+ℹ Saving results to training_log.jsonl

 E     #        LOSS TOK2VEC   LOSS TAGGER   TAG_ACC   SCORE
 ---   ------   ------------   -----------   -------   ------
-  1        0           0.00         86.20      0.22     0.00
-  1      200           3.08      18968.78     34.00     0.34
-  1      400          31.81      22539.06     33.64     0.34
-  1      600          92.13      22794.91     43.80     0.44
-  1      800         183.62      21541.39     56.05     0.56
-  1     1000         352.49      25461.82     65.15     0.65
-  1     1200         422.87      23708.82     71.84     0.72
-  1     1400         601.92      24994.79     76.57     0.77
-  1     1600         662.57      22268.02     80.20     0.80
-  1     1800        1101.50      28413.77     82.56     0.83
-  1     2000        1253.43      28736.36     85.00     0.85
-  1     2200        1411.02      28237.53     87.42     0.87
-  1     2400        1605.35      28439.95     88.70     0.89
+  0        0           0.00         86.20      0.22     0.00
+  0      200           3.08      18968.78     34.00     0.34
+  0      400          31.81      22539.06     33.64     0.34
+  0      600          92.13      22794.91     43.80     0.44
+  0      800         183.62      21541.39     56.05     0.56
+  0     1000         352.49      25461.82     65.15     0.65
+  0     1200         422.87      23708.82     71.84     0.72
+  0     1400         601.92      24994.79     76.57     0.77
+  0     1600         662.57      22268.02     80.20     0.80
+  0     1800        1101.50      28413.77     82.56     0.83
+  0     2000        1253.43      28736.36     85.00     0.85
+  0     2200        1411.02      28237.53     87.42     0.87
+  0     2400        1605.35      28439.95     88.70     0.89
 ```

 Note that the cumulative loss keeps increasing within one epoch, but should
@ -559,6 +564,12 @@ start decreasing across epochs.

 </Accordion>

+| Name             | Description                                                           |
+| ---------------- | --------------------------------------------------------------------- |
+| `progress_bar`   | Whether the logger should print the progress bar ~~bool~~             |
+| `console_output` | Whether the logger should print the logs on the console. ~~bool~~     |
+| `output_file`    | The file to save the training logs to. ~~Optional[Union[str, Path]]~~ |
+
 ## Readers {#readers}

 ### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
@ -1038,15 +1049,16 @@ and create a `Language` object. The model data will then be loaded in via
 > nlp = util.load_model("/path/to/data")
 > ```

-| Name                                 | Description                                                                                                                                                                                                                                      |
-| ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `name`                               | Package name or path. ~~str~~                                                                                                                                                                                                                    |
-| _keyword-only_                       |                                                                                                                                                                                                                                                  |
-| `vocab`                              | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                            |
-| `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                             |
-| `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                   |
-| **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                          |
+| Name                                  | Description                                                                                                                                                                                                                                                          |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name`                                | Package name or path. ~~str~~                                                                                                                                                                                                                                        |
+| _keyword-only_                        |                                                                                                                                                                                                                                                                      |
+| `vocab`                               | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                                |
+| `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  |
+| `exclude`                             | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             |
+| `config` <Tag variant="new">3</Tag>   | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                                       |
+| **RETURNS**                           | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                                              |

 ### util.load_model_from_init_py {#util.load_model_from_init_py tag="function" new="2"}

@ -1062,15 +1074,16 @@ A helper function to use in the `load()` method of a pipeline package's
 >     return load_model_from_init_py(__file__, **overrides)
 > ```

-| Name                                 | Description                                                                                                                                                                                                                                    |
-| ------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `init_file`                          | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                         |
-| _keyword-only_                       |                                                                                                                                                                                                                                                |
-| `vocab` <Tag variant="new">3</Tag>   | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                          |
-| `disable`                            | Names of pipeline components to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~List[str]~~ |
-| `exclude` <Tag variant="new">3</Tag> | Names of pipeline components to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~List[str]~~                                                                                                           |
-| `config` <Tag variant="new">3</Tag>  | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                 |
-| **RETURNS**                          | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                        |
+| Name                                  | Description                                                                                                                                                                                                                                                          |
+| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `init_file`                           | Path to package's `__init__.py`, i.e. `__file__`. ~~Union[str, Path]~~                                                                                                                                                                                               |
+| _keyword-only_                        |                                                                                                                                                                                                                                                                      |
+| `vocab` <Tag variant="new">3</Tag>    | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~                                                                                                                                |
+| `disable`                             | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `enable` <Tag variant="new">3.4</Tag> | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~                                  |
+| `exclude` <Tag variant="new">3</Tag>  | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~                                                                                                             |
+| `config` <Tag variant="new">3</Tag>   | Config overrides as nested dict or flat dict keyed by section values in dot notation, e.g. `"nlp.pipeline"`. ~~Union[Dict[str, Any], Config]~~                                                                                                                       |
+| **RETURNS**                           | `Language` class with the loaded pipeline. ~~Language~~                                                                                                                                                                                                              |

 ### util.load_config {#util.load_config tag="function" new="3"}

--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@ -396,15 +396,32 @@ pipeline package can be found.
 To download a trained pipeline directly using
 [pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
 path of the wheel file or archive. Installing the wheel is usually more
-efficient. To find the direct link to a package, head over to the
-[releases](https://github.com/explosion/spacy-models/releases), right click on
-the archive link and copy it to your clipboard.
+efficient.
+
+> #### Pipeline Package URLs {#pipeline-urls}
+>
+> Pretrained pipeline distributions are hosted on
+> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
+> can find download links there, as well as on the model page. You can also get
+> URLs directly from the command line by using `spacy info` with the `--url`
+> flag, which may be useful for automation.
+>
+> ```bash
+> spacy info en_core_web_sm --url
+> ```
+>
+> This command will print the URL for the latest version of a pipeline
+> compatible with the version of spaCy you're using. Note that in order to look
+> up the compatibility information an internet connection is required.

 ```bash
 # With external URL
 $ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
 $ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz

+# Using spacy info to get the external URL
+$ pip install $(spacy info en_core_web_sm --url)
+
 # With local file
 $ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
 $ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
@ -545,21 +562,16 @@ should be specifying them directly.
 Because pipeline packages are valid Python packages, you can add them to your
 application's `requirements.txt`. If you're running your own internal PyPi
 installation, you can upload the pipeline packages there. pip's
-[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
-supports both package names to download via a PyPi server, as well as direct
-URLs.
+[requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
+supports both package names to download via a PyPi server, as well as
+[direct URLs](#pipeline-urls).

 ```text
 ### requirements.txt
 spacy>=3.0.0,<4.0.0
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
 ```

-Specifying `#egg=` with the package name tells pip which package to expect from
-the download URL. This way, the package won't be re-downloaded and overwritten
-if it's already installed - just like when you're downloading a package from
-PyPi.
-
 All pipeline packages are versioned and specify their spaCy dependency. This
 ensures cross-compatibility and lets you specify exact version requirements for
 each pipeline. If you've [trained](/usage/training) your own pipeline, you can
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@ -1192,7 +1192,7 @@
            "slogan": "Fast, flexible and transparent sentiment analysis",
            "description": "Asent is a rule-based sentiment analysis library for Python made using spaCy. It is inspired by VADER, but uses a more modular ruleset, that allows the user to change e.g. the method for finding negations. Furthermore it includes visualisers to visualize the model predictions, making the model easily interpretable.",
            "github": "kennethenevoldsen/asent",
-            "pip": "aseny",
+            "pip": "asent",
            "code_example": [
                "import spacy",
                "import asent",
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@ -76,6 +76,7 @@ const MODEL_META = {
    benchmark_ner: 'NER accuracy',
    benchmark_speed: 'Speed',
    compat: 'Latest compatible package version for your spaCy installation',
+    download_link: 'Download link for the pipeline',
 }

 const LABEL_SCHEME_META = {
@ -138,6 +139,13 @@ function formatAccuracy(data, lang) {
        .filter(item => item)
 }

+function formatDownloadLink(lang, name, version) {
+  const fullName = `${lang}_${name}-${version}`
+  const filename = `${fullName}-py3-none-any.whl`
+  const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
+  return <Link to={url} hideIcon>{filename}</Link>
+}
+
 function formatModelMeta(data) {
    return {
        fullName: `${data.lang}_${data.name}-${data.version}`,
@ -154,6 +162,7 @@ function formatModelMeta(data) {
        labels: isEmptyObj(data.labels) ? null : data.labels,
        vectors: formatVectors(data.vectors),
        accuracy: formatAccuracy(data.performance, data.lang),
+        download_link: formatDownloadLink(data.lang, data.name, data.version),
    }
 }

@ -244,6 +253,7 @@ const Model = ({
        { label: 'Components', content: components, help: MODEL_META.components },
        { label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
        { label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
+        { label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link },
        { label: 'Sources', content: sources, help: MODEL_META.sources },
        { label: 'Author', content: author },
        { label: 'License', content: license },