Update CLI to use radicli

2025-08-05 21:00:19 +03:00 · 2023-02-08 16:17:10 +01:00 · 2023-02-08 16:17:10 +01:00 · d292c6fc78
commit d292c6fc78
parent eec5ccd72f
31 changed files with 677 additions and 750 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -9,9 +9,9 @@ murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer>=0.3.0,<0.8.0
 pathy>=0.10.0
 smart-open>=5.2.1,<7.0.0
+radicli>=0.0.1<1.0.0
 # Third party dependencies
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
--- a/setup.cfg
+++ b/setup.cfg
@ -41,8 +41,8 @@ install_requires =
    wasabi>=0.9.1,<1.2.0
    srsly>=2.4.3,<3.0.0
    catalogue>=2.0.6,<2.1.0
+    radicli>=0.0.1<1.0.0
    # Third-party dependencies
-    typer>=0.3.0,<0.8.0
    pathy>=0.10.0
    smart-open>=5.2.1,<7.0.0
    tqdm>=4.38.0,<5.0.0
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -1,10 +1,8 @@
-from wasabi import msg
-
-from ._util import app, setup_cli  # noqa: F401
+from ._util import cli, setup_cli  # noqa: F401

 # These are the actual functions, NOT the wrapped CLI commands. The CLI commands
 # are registered automatically and won't have to be imported here.
-from .benchmark_speed import benchmark_speed_cli  # noqa: F401
+from .benchmark_speed import benchmark_speed  # noqa: F401
 from .download import download  # noqa: F401
 from .info import info  # noqa: F401
 from .package import package  # noqa: F401
@ -25,18 +23,7 @@ from .validate import validate  # noqa: F401
 from .project.clone import project_clone  # noqa: F401
 from .project.assets import project_assets  # noqa: F401
 from .project.run import project_run  # noqa: F401
-from .project.dvc import project_update_dvc  # noqa: F401
 from .project.push import project_push  # noqa: F401
 from .project.pull import project_pull  # noqa: F401
 from .project.document import project_document  # noqa: F401
 from .find_threshold import find_threshold  # noqa: F401
-
-
-@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
-def link(*args, **kwargs):
-    """As of spaCy v3.0, symlinks like "en" are not supported anymore. You can load trained
-    pipeline packages using their full names or from a directory path."""
-    msg.warn(
-        "As of spaCy v3.0, model symlinks are not supported anymore. You can load trained "
-        "pipeline packages using their full names or from a directory path."
-    )
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -6,15 +6,13 @@ from pathlib import Path
 from wasabi import msg, Printer
 import srsly
 import hashlib
-import typer
-from click import NoSuchOption
-from click.parser import split_arg_string
-from typer.main import get_command
+import shlex
 from contextlib import contextmanager
 from thinc.api import Config, ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
 from configparser import InterpolationError
 import os
+import radicli

 from ..schemas import ProjectConfigSchema, validate
 from ..util import import_file, run_command, make_tempdir, registry, logger
@ -37,6 +35,8 @@ HELP = """spaCy Command-line Interface

 DOCS: https://spacy.io/api/cli
 """
+
+# TODO: need to find a way to inject these now
 PROJECT_HELP = f"""Command-line interface for spaCy projects and templates.
 You'd typically start by cloning a project template to a local directory and
 fetching its assets like datasets etc. See the project's {PROJECT_FILE} for the
@ -49,29 +49,14 @@ and custom model implementations.
 BENCHMARK_HELP = """Commands for benchmarking pipelines."""
 INIT_HELP = """Commands for initializing configs and pipeline packages."""

-# Wrappers for Typer's annotations. Initially created to set defaults and to
-# keep the names short, but not needed at the moment.
-Arg = typer.Argument
-Opt = typer.Option
-
-app = typer.Typer(name=NAME, help=HELP)
-benchmark_cli = typer.Typer(name="benchmark", help=BENCHMARK_HELP, no_args_is_help=True)
-project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
-debug_cli = typer.Typer(name="debug", help=DEBUG_HELP, no_args_is_help=True)
-init_cli = typer.Typer(name="init", help=INIT_HELP, no_args_is_help=True)
-
-app.add_typer(project_cli)
-app.add_typer(debug_cli)
-app.add_typer(benchmark_cli)
-app.add_typer(init_cli)
+# CLI
+cli = radicli.Radicli(prog=COMMAND, help=HELP)


 def setup_cli() -> None:
    # Make sure the entry-point for CLI runs, so that they get imported.
    registry.cli.get_all()
-    # Ensure that the help messages always display the correct prompt
-    command = get_command(app)
-    command(prog_name=COMMAND)
+    cli.run()


 def parse_config_overrides(
@ -106,7 +91,7 @@ def _parse_overrides(args: List[str], is_cli: bool = False) -> Dict[str, Any]:
            opt = opt.replace("--", "")
            if "." not in opt:
                if is_cli:
-                    raise NoSuchOption(orig_opt)
+                    raise radicli.CliParseError(f"unrecognized argument: {orig_opt}")
                else:
                    msg.fail(f"{err}: can't override top-level sections", exits=1)
            if "=" in opt:  # we have --opt=value
@ -510,7 +495,7 @@ def get_git_version(
    """
    try:
        ret = run_command("git --version", capture=True)
-    except:
+    except Exception:
        raise RuntimeError(error)
    stdout = ret.stdout.strip()
    if not stdout or not stdout.startswith("git version"):
@ -580,6 +565,18 @@ def string_to_list(value: str, intify: bool = False) -> Union[List[str], List[in
    return result


+def convert_string_list(value: str) -> List[str]:
+    return string_to_list(value)
+
+
+def convert_int_list(value: str) -> List[int]:
+    return string_to_list(value, intify=True)
+
+
+def convert_path_list(value: str) -> List[Path]:
+    return [Path(p) for p in string_to_list(value)]
+
+
 def setup_gpu(use_gpu: int, silent=None) -> None:
    """Configure the GPU and log info."""
    if silent is None:
@ -629,3 +626,20 @@ def _format_number(number: Union[int, float], ndigits: int = 2) -> str:
        return f"{number:.{ndigits}f}"
    else:
        return str(number)
+
+
+def split_arg_string(string: str) -> List[str]:
+    # Adapted from: https://github.com/pallets/click/blob/8b48450d5d63c747600e069d4c3e2274f41c8360/src/click/parser.py#L125
+    lex = shlex.shlex(string, posix=True)
+    lex.whitespace_split = True
+    lex.commenters = ""
+    out = []
+    try:
+        for token in lex:
+            out.append(token)
+    except ValueError:
+        # Raised when end-of-string is reached in an invalid state. Use
+        # the partial token as-is. The quote or escape character is in
+        # lex.state, not lex.token.
+        out.append(lex.token)
+    return out
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@ -1,14 +1,12 @@
 import tqdm
 import srsly
-
 from itertools import chain
 from pathlib import Path
 from typing import Optional, List, Iterable, cast, Union
-
 from wasabi import msg
+from radicli import Arg, ExistingPath, ExistingFilePath

-from ._util import app, Arg, Opt, setup_gpu, import_code, walk_directory
-
+from ._util import cli, setup_gpu, import_code, walk_directory
 from ..tokens import Doc, DocBin
 from ..vocab import Vocab
 from ..util import ensure_path, load_model
@ -37,49 +35,30 @@ force_msg = (
 DocOrStrStream = Union[Iterable[str], Iterable[Doc]]


-def _stream_docbin(path: Path, vocab: Vocab) -> Iterable[Doc]:
-    """
-    Stream Doc objects from DocBin.
-    """
-    docbin = DocBin().from_disk(path)
-    for doc in docbin.get_docs(vocab):
-        yield doc
-
-
-def _stream_jsonl(path: Path, field: str) -> Iterable[str]:
-    """
-    Stream "text" field from JSONL. If the field "text" is
-    not found it raises error.
-    """
-    for entry in srsly.read_jsonl(path):
-        if field not in entry:
-            msg.fail(f"{path} does not contain the required '{field}' field.", exits=1)
-        else:
-            yield entry[field]
-
-
-def _stream_texts(paths: Iterable[Path]) -> Iterable[str]:
-    """
-    Yields strings from text files in paths.
-    """
-    for path in paths:
-        with open(path, "r") as fin:
-            text = fin.read()
-            yield text
-
-
-@app.command("apply")
-def apply_cli(
+@cli.command(
+    "apply",
    # fmt: off
-    model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help=path_help, exists=True),
-    output_file: Path = Arg(..., help=out_help, dir_okay=False),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help=code_help),
-    text_key: str = Opt("text", "--text-key", "-tk", help="Key containing text string for JSONL"),
-    force_overwrite: bool = Opt(False, "--force", "-F", help="Force overwriting the output file"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU."),
-    batch_size: int = Opt(1, "--batch-size", "-b", help="Batch size."),
-    n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use.")
+    model=Arg(help="Model name or path"),
+    data_path=Arg(help=path_help),
+    output_file=Arg(help=out_help),
+    code_path=Arg("--code", "-c", help=code_help),
+    text_key=Arg("--text-key", "-tk", help="Key containing text string for JSONL"),
+    force_overwrite=Arg("--force", "-F", help="Force overwriting the output file"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    batch_size=Arg("--batch-size", "-b", help="Batch size"),
+    n_process=Arg("--n-process", "-n", help="Number of processors to use"),
+    # fmt: on
+)
+def apply_cli(
+    model: str,
+    data_path: ExistingPath,
+    output_file: Path,
+    code_path: Optional[ExistingFilePath] = None,
+    text_key: str = "text",
+    force_overwrite: bool = False,
+    use_gpu: int = -1,
+    batch_size: int = 1,
+    n_process: int = 1,
 ):
    """
    Apply a trained pipeline to documents to get predictions.
@ -122,7 +101,6 @@ def apply(
        )
        return
    nlp = load_model(model)
-    msg.good(f"Loaded model {model}")
    vocab = nlp.vocab
    streams: List[DocOrStrStream] = []
    text_files = []
@ -141,3 +119,32 @@ def apply(
    if output_file.suffix == "":
        output_file = output_file.with_suffix(".spacy")
    docbin.to_disk(output_file)
+
+
+def _stream_docbin(path: Path, vocab: Vocab) -> Iterable[Doc]:
+    """
+    Stream Doc objects from DocBin.
+    """
+    docbin = DocBin().from_disk(path)
+    for doc in docbin.get_docs(vocab):
+        yield doc
+
+
+def _stream_jsonl(path: Path, field: str) -> Iterable[str]:
+    """
+    Stream "text" field from JSONL. If the field "text" is
+    not found it raises error.
+    """
+    for entry in srsly.read_jsonl(path):
+        if field not in entry:
+            msg.fail(f"{path} does not contain the required '{field}' field.", exits=1)
+        else:
+            yield entry[field]
+
+
+def _stream_texts(paths: Iterable[Path]) -> Iterable[str]:
+    """Yields strings from text files in paths."""
+    for path in paths:
+        with open(path, "r") as fin:
+            text = fin.read()
+            yield text
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -1,27 +1,30 @@
-from typing import Optional
+from typing import Optional, List
 from pathlib import Path
 from wasabi import msg
-import typer
 import logging
+from radicli import Arg, ExistingFilePathOrDash, ExistingFilePath

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import cli, parse_config_overrides, show_validation_error
 from ._util import import_code
 from .. import util
 from ..util import get_sourced_components, load_model_from_config


-@app.command(
+@cli.command_with_extra(
    "assemble",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    output_path=Arg(help="Output directory to store assembled pipeline in"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    # fmt: on
 )
 def assemble_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Path = Arg(..., help="Output directory to store assembled pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    output_path: Optional[Path] = None,
+    code_path: Optional[ExistingFilePath] = None,
+    verbose: bool = False,
+    _extra: List[str] = [],
 ):
    """
    Assemble a spaCy pipeline from a config file. The config file includes
@ -37,7 +40,7 @@ def assemble_cli(
    # Make sure all files and paths exists if they are needed
    if not config_path or (str(config_path) != "-" and not config_path.exists()):
        msg.fail("Config file not found", config_path, exits=1)
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    with show_validation_error(config_path):
        config = util.load_config(config_path, overrides=overrides, interpolate=False)
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@ -2,56 +2,56 @@ from typing import Iterable, List, Optional
 import random
 from itertools import islice
 import numpy
-from pathlib import Path
 import time
 from tqdm import tqdm
-import typer
 from wasabi import msg
+from radicli import Arg, ExistingPath

 from .. import util
 from ..language import Language
 from ..tokens import Doc
 from ..training import Corpus
-from ._util import Arg, Opt, benchmark_cli, setup_gpu
+from ._util import cli, setup_gpu


-@benchmark_cli.command(
+@cli.subcommand(
+    "benchmark",
    "speed",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-)
-def benchmark_speed_cli(
    # fmt: off
-    ctx: typer.Context,
-    model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    batch_size: Optional[int] = Opt(None, "--batch-size", "-b", min=1, help="Override the pipeline batch size"),
-    no_shuffle: bool = Opt(False, "--no-shuffle", help="Do not shuffle benchmark data"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
-    warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
+    model=Arg(help="Model name or path"),
+    data_path=Arg(help="Location of binary evaluation data in .spacy format"),
+    batch_size=Arg("--batch-size", "-b", help="Override the pipeline batch size"),
+    no_shuffle=Arg("--no-shuffle", help="Do not shuffle benchmark data"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    n_batches=Arg("--batches", help="Minimum number of batches to benchmark"),
+    warmup_epochs=Arg("--warmup", "-w", help="Number of iterations over the data for warmup"),
    # fmt: on
+)
+def benchmark_speed(
+    model: str,
+    data_path: ExistingPath,
+    batch_size: Optional[int] = None,
+    no_shuffle: bool = False,
+    use_gpu: int = -1,
+    n_batches: int = 50,
+    warmup_epochs: int = 3,
 ):
    """
    Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark
    data in the binary .spacy format.
    """
    setup_gpu(use_gpu=use_gpu, silent=False)
-
    nlp = util.load_model(model)
    batch_size = batch_size if batch_size is not None else nlp.batch_size
    corpus = Corpus(data_path)
    docs = [eg.predicted for eg in corpus(nlp)]
-
    if len(docs) == 0:
        msg.fail("Cannot benchmark speed using an empty corpus.", exits=1)
-
    print(f"Warming up for {warmup_epochs} epochs...")
    warmup(nlp, docs, warmup_epochs, batch_size)
-
    print()
    print(f"Benchmarking {n_batches} batches...")
    wps = benchmark(nlp, docs, n_batches, batch_size, not no_shuffle)
-
    print()
    print_outliers(wps)
    print_mean_with_ci(wps)
@ -120,7 +120,6 @@ def benchmark(
            nlp.make_doc(docs[i % len(docs)].text)
            for i in range(n_batches * batch_size)
        ]
-
    return annotate(nlp, bench_docs, batch_size)


@ -143,17 +142,14 @@ def print_mean_with_ci(sample: numpy.ndarray):
    mean = numpy.mean(sample)
    bootstrap_means = bootstrap(sample)
    bootstrap_means.sort()
-
    # 95% confidence interval
    low = bootstrap_means[int(len(bootstrap_means) * 0.025)]
    high = bootstrap_means[int(len(bootstrap_means) * 0.975)]
-
    print(f"Mean: {mean:.1f} words/s (95% CI: {low-mean:.1f} +{high-mean:.1f})")


 def print_outliers(sample: numpy.ndarray):
    quartiles = Quartiles(sample)
-
    n_outliers = numpy.sum(
        (sample < (quartiles.q1 - 1.5 * quartiles.iqr))
        | (sample > (quartiles.q3 + 1.5 * quartiles.iqr))
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -1,4 +1,4 @@
-from typing import Callable, Iterable, Mapping, Optional, Any, Union
+from typing import Callable, Iterable, Mapping, Optional, Any, Union, Literal
 from enum import Enum
 from pathlib import Path
 from wasabi import Printer
@ -6,8 +6,9 @@ import srsly
 import re
 import sys
 import itertools
+from radicli import Arg, ExistingFilePath, ExistingPathOrDash, ExistingDirPathOrDash

-from ._util import app, Arg, Opt, _handle_renamed_language_codes, walk_directory
+from ._util import cli, _handle_renamed_language_codes, walk_directory
 from ..training import docs_to_json
 from ..tokens import Doc, DocBin
 from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
@ -27,8 +28,8 @@ CONVERTERS: Mapping[str, Callable[..., Iterable[Doc]]] = {
    "iob": iob_to_docs,
    "json": json_to_docs,
 }
-
 AUTO = "auto"
+ConvertersType = Literal["auto", "conllubio", "conllu", "conll", "ner", "iob", "json"]


 # File types that can be written to stdout
@ -40,22 +41,36 @@ class FileTypes(str, Enum):
    spacy = "spacy"


-@app.command("convert")
-def convert_cli(
+@cli.command(
+    "convert",
    # fmt: off
-    input_path: str = Arg(..., help="Input file or directory", exists=True),
-    output_dir: Path = Arg("-", help="Output directory. '-' for stdout.", allow_dash=True, exists=True),
-    file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"),
-    n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
-    seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"),
-    model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
-    morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"),
-    merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
-    converter: str = Opt(AUTO, "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"),
-    ner_map: Optional[Path] = Opt(None, "--ner-map", "-nm", help="NER tag mapping (as JSON-encoded dict of entity types)", exists=True),
-    lang: Optional[str] = Opt(None, "--lang", "-l", help="Language (if tokenizer required)"),
-    concatenate: bool = Opt(None, "--concatenate", "-C", help="Concatenate output to a single file"),
+    input_path=Arg(help="Input file or directory"),
+    output_dir=Arg(help="Output directory. '-' for stdout."),
+    file_type=Arg("--file-type", "-t", help="Type of data to produce"),
+    n_sents=Arg("--n-sents", "-n", help="Number of sentences per doc (0 to disable)"),
+    seg_sents=Arg("--seg-sents", "-s", help="Segment sentences (for -c ner)"),
+    model=Arg("--model", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"),
+    morphology=Arg("--morphology", "-m", help="Enable appending morphology to tags"),
+    merge_subtokens=Arg("--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"),
+    converter=Arg("--converter", "-c", help=f"Converter to use"),
+    ner_map=Arg("--ner-map", "-nm", help="NER tag mapping (as JSON-encoded dict of entity types)"),
+    lang=Arg("--lang", "-l", help="Language (if tokenizer required)"),
+    concatenate=Arg("--concatenate", "-C", help="Concatenate output to a single file"),
    # fmt: on
+)
+def convert_cli(
+    input_path: ExistingPathOrDash,
+    output_dir: ExistingDirPathOrDash = "-",
+    file_type: Literal["json", "spacy"] = "spacy",
+    n_sents: int = 1,
+    seg_sents: bool = False,
+    model: Optional[str] = None,
+    morphology: bool = False,
+    merge_subtokens: bool = False,
+    converter: ConvertersType = AUTO,
+    ner_map: Optional[ExistingFilePath] = None,
+    lang: Optional[str] = None,
+    concatenate: bool = False,
 ):
    """
    Convert files into json or DocBin format for training. The resulting .spacy
@ -69,15 +84,14 @@ def convert_cli(
    DOCS: https://spacy.io/api/cli#convert
    """
    input_path = Path(input_path)
-    output_dir: Union[str, Path] = "-" if output_dir == Path("-") else output_dir
    silent = output_dir == "-"
    msg = Printer(no_print=silent)
    converter = _get_converter(msg, converter, input_path)
-    verify_cli_args(msg, input_path, output_dir, file_type.value, converter, ner_map)
+    verify_cli_args(msg, input_path, output_dir, file_type, converter, ner_map)
    convert(
        input_path,
        output_dir,
-        file_type=file_type.value,
+        file_type=file_type,
        n_sents=n_sents,
        seg_sents=seg_sents,
        model=model,
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@ -3,27 +3,31 @@ from pathlib import Path
 from wasabi import msg, table
 from thinc.api import Config
 from thinc.config import VARIABLE_RE
-import typer
+from radicli import Arg, ExistingFilePathOrDash, ExistingFilePath

-from ._util import Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from ._util import cli, show_validation_error, parse_config_overrides
+from ._util import import_code
 from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
 from ..util import registry
 from .. import util


-@debug_cli.command(
+@cli.subcommand_with_extra(
+    "debug",
    "config",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    show_funcs=Arg("--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
+    show_vars=Arg("--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI"),
+    # fmt: on
 )
 def debug_config_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    show_funcs: bool = Opt(False, "--show-functions", "-F", help="Show an overview of all registered functions used in the config and where they come from (modules, files etc.)"),
-    show_vars: bool = Opt(False, "--show-variables", "-V", help="Show an overview of all variables referenced in the config and their values. This will also reflect variables overwritten on the CLI.")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    code_path: Optional[ExistingFilePath] = None,
+    show_funcs: bool = False,
+    show_vars: bool = False,
+    _extra: List[str] = [],
 ):
    """Debug a config file and show validation errors. The command will
    create all objects in the tree and validate them. Note that some config
@ -36,7 +40,7 @@ def debug_config_cli(

    DOCS: https://spacy.io/api/cli#debug-config
    """
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    debug_config(
        config_path, overrides=overrides, show_funcs=show_funcs, show_vars=show_vars
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -5,11 +5,11 @@ from collections import Counter
 import sys
 import srsly
 from wasabi import Printer, MESSAGES, msg
-import typer
 import math
+from radicli import Arg, ExistingFilePathOrDash, ExistingFilePath

-from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli, _format_number
+from ._util import cli, show_validation_error, parse_config_overrides
+from ._util import import_code, _format_number
 from ..training import Example, remove_bilu_prefix
 from ..training.initialize import get_sourced_components
 from ..schemas import ConfigSchemaTraining
@ -40,23 +40,24 @@ BOUNDARY_DISTINCT_THRESHOLD = 1
 SPAN_LENGTH_THRESHOLD_PERCENTAGE = 90


-@debug_cli.command(
-    "data", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
-)
-@app.command(
-    "debug-data",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-    hidden=True,  # hide this from main CLI help but still allow it to work with warning
+@cli.subcommand_with_extra(
+    "debug",
+    "data",
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    ignore_warnings=Arg("--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
+    verbose=Arg("--verbose", "-V", help="Print additional information and explanations"),
+    no_format=Arg("--no-format", "-NF", help="Don't pretty-print the results"),
+    # fmt: on
 )
 def debug_data_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    ignore_warnings: bool = Opt(False, "--ignore-warnings", "-IW", help="Ignore warnings, only show stats and errors"),
-    verbose: bool = Opt(False, "--verbose", "-V", help="Print additional information and explanations"),
-    no_format: bool = Opt(False, "--no-format", "-NF", help="Don't pretty-print the results"),
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    code_path: Optional[ExistingFilePath] = None,
+    ignore_warnings: bool = False,
+    verbose: bool = False,
+    no_format: bool = False,
+    _extra: List[str] = [],
 ):
    """
    Analyze, debug and validate your training and development data. Outputs
@ -65,13 +66,7 @@ def debug_data_cli(

    DOCS: https://spacy.io/api/cli#debug-data
    """
-    if ctx.command.name == "debug-data":
-        msg.warn(
-            "The debug-data command is now available via the 'debug data' "
-            "subcommand (without the hyphen). You can run python -m spacy debug "
-            "--help for an overview of the other available debugging commands."
-        )
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    debug_data(
        config_path,
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@ -1,29 +1,32 @@
 from typing import Optional
-
-import typer
 from wasabi import Printer, diff_strings, MarkdownRenderer
 from pathlib import Path
-from thinc.api import Config
+from radicli import Arg, ExistingFilePathOrDash, ExistingFilePath

-from ._util import debug_cli, Arg, Opt, show_validation_error, parse_config_overrides
+from ._util import cli, show_validation_error
+from .init_config import init_config, OptimizationsType
 from ..util import load_config
-from .init_config import init_config, Optimizations


-@debug_cli.command(
+@cli.subcommand(
+    "debug",
    "diff-config",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    compare_to=Arg(help="Path to a config file to diff against, or `None` to compare against default settings"),
+    optimize=Arg("--optimize", "-o", help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config"),
+    gpu=Arg("--gpu", "-G", help="Whether the original config can run on a GPU. Only relevant when comparing against the default config"),
+    pretraining=Arg("--pretraining", "--pt", help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config"),
+    markdown=Arg("--markdown", "-md", help="Generate Markdown for GitHub issues"),
+    # fmt: on
 )
 def debug_diff_cli(
-    # fmt: off
-    ctx: typer.Context,
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    compare_to: Optional[Path] = Opt(None, help="Path to a config file to diff against, or `None` to compare against default settings", exists=True, allow_dash=True),
-    optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether the user config was optimized for efficiency or accuracy. Only relevant when comparing against the default config."),
-    gpu: bool = Opt(False, "--gpu", "-G", help="Whether the original config can run on a GPU. Only relevant when comparing against the default config."),
-    pretraining: bool = Opt(False, "--pretraining", "--pt", help="Whether to compare on a config with pretraining involved. Only relevant when comparing against the default config."),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    compare_to: Optional[ExistingFilePath] = None,
+    optimize: OptimizationsType = "efficiency",
+    gpu: bool = False,
+    pretraining: bool = False,
+    markdown: bool = False,
 ):
    """Show a diff of a config file with respect to spaCy's defaults or another config file. If
    additional settings were used in the creation of the config file, then you
@ -50,7 +53,7 @@ def debug_diff(
    config_path: Path,
    compare_to: Optional[Path],
    gpu: bool,
-    optimize: Optimizations,
+    optimize: OptimizationsType,
    pretraining: bool,
    markdown: bool,
 ):
@ -68,7 +71,7 @@ def debug_diff(
            other_config = init_config(
                lang=lang,
                pipeline=pipeline,
-                optimize=optimize.value,
+                optimize=optimize,
                gpu=gpu,
                pretraining=pretraining,
                silent=True,
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -1,41 +1,51 @@
-from typing import Dict, Any, Optional
-from pathlib import Path
+from typing import Dict, Any, Optional, List
 import itertools
-
 from spacy.training import Example
 from spacy.util import resolve_dot_names
 from wasabi import msg
 from thinc.api import fix_random_seed, set_dropout_rate
 from thinc.api import Model, data_validation, set_gpu_allocator
-import typer
+from radicli import Arg, ExistingFilePathOrDash

-from ._util import Arg, Opt, debug_cli, show_validation_error
-from ._util import parse_config_overrides, string_to_list, setup_gpu
+from ._util import cli, show_validation_error
+from ._util import parse_config_overrides, convert_int_list, setup_gpu
 from ..schemas import ConfigSchemaTraining
 from ..util import registry
 from .. import util


-@debug_cli.command(
+@cli.subcommand_with_extra(
+    "debug",
    "model",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file",),
+    component=Arg(help="Name of the pipeline component of which the model should be analyzed"),
+    layers=Arg(help="Comma-separated names of layer IDs to print", converter=convert_int_list),
+    dimensions=Arg("--dimensions", "-DIM", help="Show dimensions"),
+    parameters=Arg("--parameters", "-PAR", help="Show parameters"),
+    gradients=Arg("--gradients", "-GRAD", help="Show gradients"),
+    attributes=Arg("--attributes", "-ATTR", help="Show attributes"),
+    P0=Arg("--print-step0", "-P0", help="Print model before training"),
+    P1=Arg("--print-step1", "-P1", help="Print model after initialization"),
+    P2=Arg("--print-step2", "-P2", help="Print model after training"),
+    P3=Arg("--print-step3", "-P3", help="Print final predictions"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    # fmt: on
 )
 def debug_model_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    component: str = Arg(..., help="Name of the pipeline component of which the model should be analysed"),
-    layers: str = Opt("", "--layers", "-l", help="Comma-separated names of layer IDs to print"),
-    dimensions: bool = Opt(False, "--dimensions", "-DIM", help="Show dimensions"),
-    parameters: bool = Opt(False, "--parameters", "-PAR", help="Show parameters"),
-    gradients: bool = Opt(False, "--gradients", "-GRAD", help="Show gradients"),
-    attributes: bool = Opt(False, "--attributes", "-ATTR", help="Show attributes"),
-    P0: bool = Opt(False, "--print-step0", "-P0", help="Print model before training"),
-    P1: bool = Opt(False, "--print-step1", "-P1", help="Print model after initialization"),
-    P2: bool = Opt(False, "--print-step2", "-P2", help="Print model after training"),
-    P3: bool = Opt(False, "--print-step3", "-P3", help="Print final predictions"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    component: str,
+    layers: List[int] = [],
+    dimensions: bool = False,
+    parameters: bool = False,
+    gradients: bool = False,
+    attributes: bool = False,
+    P0: bool = False,
+    P1: bool = False,
+    P2: bool = False,
+    P3: bool = False,
+    use_gpu: int = -1,
+    _extra: List[str] = [],
 ):
    """
    Analyze a Thinc model implementation. Includes checks for internal structure
@ -44,7 +54,6 @@ def debug_model_cli(
    DOCS: https://spacy.io/api/cli#debug-model
    """
    setup_gpu(use_gpu)
-    layers = string_to_list(layers, intify=True)
    print_settings = {
        "dimensions": dimensions,
        "parameters": parameters,
@ -56,7 +65,7 @@ def debug_model_cli(
        "print_after_training": P2,
        "print_prediction": P3,
    }
-    config_overrides = parse_config_overrides(ctx.args)
+    config_overrides = parse_config_overrides(_extra)
    with show_validation_error(config_path):
        raw_config = util.load_config(
            config_path, overrides=config_overrides, interpolate=False
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -1,27 +1,26 @@
-from typing import Optional, Sequence
+from typing import Optional, Sequence, List
 import requests
 import sys
 from wasabi import msg
-import typer
+from radicli import Arg

-from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
+from ._util import cli, WHEEL_SUFFIX, SDIST_SUFFIX
 from .. import about
 from ..util import is_package, get_minor_version, run_command
 from ..util import is_prerelease_version, get_installed_models
 from ..util import get_package_version


-@app.command(
+@cli.command_with_extra(
    "download",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    model=Arg(help="Name of pipeline package to download"),
+    direct=Arg("--direct", "-D", help="Force direct download of name + version"),
+    sdist=Arg("--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
+    # fmt: on
 )
 def download_cli(
-    # fmt: off
-    ctx: typer.Context,
-    model: str = Arg(..., help="Name of pipeline package to download"),
-    direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
-    sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
-    # fmt: on
+    model: str, direct: bool = False, sdist: bool = False, _extra: List[str] = []
 ):
    """
    Download compatible trained pipeline from the default download path using
@ -33,7 +32,7 @@ def download_cli(
    DOCS: https://spacy.io/api/cli#download
    AVAILABLE PACKAGES: https://spacy.io/models
    """
-    download(model, direct, sdist, *ctx.args)
+    download(model, direct, sdist, *_extra)


 def download(
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -1,33 +1,42 @@
-from typing import Optional, List, Dict, Any, Union
+from typing import Optional, List, Dict, Any
 from wasabi import Printer
 from pathlib import Path
 import re
 import srsly
 from thinc.api import fix_random_seed
+from radicli import Arg, ExistingPath, ExistingDirPath, ExistingFilePath

 from ..training import Corpus
 from ..tokens import Doc
-from ._util import app, Arg, Opt, setup_gpu, import_code, benchmark_cli
-from ..scorer import Scorer
+from ._util import cli, setup_gpu, import_code
 from .. import util
 from .. import displacy

-
-@benchmark_cli.command(
-    "accuracy",
-)
-@app.command("evaluate")
-def evaluate_cli(
+args = dict(
    # fmt: off
-    model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    output: Optional[Path] = Opt(None, "--output", "-o", help="Output JSON file for metrics", dir_okay=False),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
-    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
-    displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
+    model=Arg(help="Model name or path"),
+    data_path=Arg(help="Location of binary evaluation data in .spacy format"),
+    output=Arg("--output", "-o", help="Output JSON file for metrics"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    gold_preproc=Arg("--gold-preproc", "-G", help="Use gold preprocessing"),
+    displacy_path=Arg("--displacy-path", "-dp", help="Directory to output rendered parses as HTML"),
+    displacy_limit=Arg("--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
    # fmt: on
+)
+
+
+@cli.subcommand("benchmark", "accuracy", **args)
+@cli.command("evaluate", **args)
+def evaluate_cli(
+    model: str,
+    data_path: ExistingPath,
+    output: Optional[ExistingFilePath] = None,
+    code_path: Optional[ExistingFilePath] = None,
+    use_gpu: int = -1,
+    gold_preproc: bool = False,
+    displacy_path: Optional[ExistingDirPath] = None,
+    displacy_limit: int = 25,
 ):
    """
    Evaluate a trained pipeline. Expects a loadable spaCy pipeline and evaluation
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -3,14 +3,14 @@ import operator
 from pathlib import Path
 import logging
 from typing import Optional, Tuple, Any, Dict, List
-
 import numpy
 import wasabi.tables
+from radicli import Arg, ExistingPath, ExistingFilePath

 from ..pipeline import TextCategorizer, MultiLabel_TextCategorizer
 from ..errors import Errors
 from ..training import Corpus
-from ._util import app, Arg, Opt, import_code, setup_gpu
+from ._util import cli, import_code, setup_gpu
 from .. import util

 _DEFAULTS = {
@ -20,23 +20,32 @@ _DEFAULTS = {
 }


-@app.command(
+@cli.command(
    "find-threshold",
-    context_settings={"allow_extra_args": False, "ignore_unknown_options": True},
+    # fmt: off
+    model=Arg(help="Model name or path"),
+    data_path=Arg(help="Location of binary evaluation data in .spacy format"),
+    pipe_name=Arg(help="Name of pipe to examine thresholds for"),
+    threshold_key=Arg(help="Key of threshold attribute in component's configuration"),
+    scores_key=Arg(help="Metric to optimize"),
+    n_trials=Arg("--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    gold_preproc=Arg("--gold-preproc", "-G", help="Use gold preprocessing"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    # fmt: on
 )
 def find_threshold_cli(
-    # fmt: off
-    model: str = Arg(..., help="Model name or path"),
-    data_path: Path = Arg(..., help="Location of binary evaluation data in .spacy format", exists=True),
-    pipe_name: str = Arg(..., help="Name of pipe to examine thresholds for"),
-    threshold_key: str = Arg(..., help="Key of threshold attribute in component's configuration"),
-    scores_key: str = Arg(..., help="Metric to optimize"),
-    n_trials: int = Opt(_DEFAULTS["n_trials"], "--n_trials", "-n", help="Number of trials to determine optimal thresholds"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
-    # fmt: on
+    model: str,
+    data_path: ExistingPath,
+    pipe_name: str,
+    threshold_key: str,
+    scores_key: str,
+    n_trials: int = _DEFAULTS["n_trials"],
+    code_path: Optional[ExistingFilePath] = None,
+    use_gpu: int = _DEFAULTS["use_gpu"],
+    gold_preproc: bool = _DEFAULTS["gold_preproc"],
+    verbose: bool = False,
 ):
    """
    Runs prediction trials for a trained model with varying tresholds to maximize
@ -52,7 +61,6 @@ def find_threshold_cli(

    DOCS: https://spacy.io/api/cli#find-threshold
    """
-
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
    import_code(code_path)
    find_threshold(
@ -110,19 +118,16 @@ def find_threshold(
    pipe = nlp.get_pipe(pipe_name)
    if not hasattr(pipe, "scorer"):
        raise AttributeError(Errors.E1045)
-
    if type(pipe) == TextCategorizer:
        wasabi.msg.warn(
            "The `textcat` component doesn't use a threshold as it's not applicable to the concept of "
            "exclusive classes. All thresholds will yield the same results."
        )
-
    if not silent:
        wasabi.msg.info(
            title=f"Optimizing for {scores_key} for component '{pipe_name}' with {n_trials} "
            f"trials."
        )
-
    # Load evaluation corpus.
    corpus = Corpus(data_path, gold_preproc=gold_preproc)
    dev_dataset = list(corpus(nlp))
@ -209,9 +214,7 @@ def find_threshold(
                widths=table_col_widths,
            )
        )
-
    best_threshold = max(scores.keys(), key=(lambda key: scores[key]))
-
    # If all scores are identical, emit warning.
    if len(set(scores.values())) == 1:
        wasabi.msg.warn(
@ -223,7 +226,6 @@ def find_threshold(
            )
            else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`.",
        )
-
    else:
        if not silent:
            print(
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -5,23 +5,31 @@ import json
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer
 import srsly
+from radicli import Arg

-from ._util import app, Arg, Opt, string_to_list
+from ._util import cli
 from .download import get_model_filename, get_latest_version
 from .. import util
 from .. import about


-@app.command("info")
-def info_cli(
+@cli.command(
+    "info",
    # fmt: off
-    model: Optional[str] = Arg(None, help="Optional loadable spaCy pipeline"),
-    markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
-    silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
-    exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
-    url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
+    model=Arg(help="Optional loadable spaCy pipeline"),
+    markdown=Arg("--markdown", "-md", help="Generate Markdown for GitHub issues"),
+    silent=Arg("--silent", "-S", help="Don't print anything (just return)"),
+    exclude=Arg("--exclude", "-e", help="Keys to exclude from the print-out"),
+    url=Arg("--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
    # fmt: on
-):
+)
+def info(
+    model: Optional[str] = None,
+    markdown: bool = False,
+    silent: bool = False,
+    exclude: List[str] = [],
+    url: bool = False,
+) -> Union[str, dict]:
    """
    Print info about spaCy installation. If a pipeline is specified as an argument,
    print its meta information. Flag --markdown prints details in Markdown for easy
@ -32,24 +40,6 @@ def info_cli(

    DOCS: https://spacy.io/api/cli#info
    """
-    exclude = string_to_list(exclude)
-    info(
-        model,
-        markdown=markdown,
-        silent=silent,
-        exclude=exclude,
-        url=url,
-    )
-
-
-def info(
-    model: Optional[str] = None,
-    *,
-    markdown: bool = False,
-    silent: bool = True,
-    exclude: Optional[List[str]] = None,
-    url: bool = False,
-) -> Union[str, dict]:
    msg = Printer(no_print=silent, pretty=not silent)
    if not exclude:
        exclude = []
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@ -1,18 +1,18 @@
-from typing import Optional, List, Tuple
-from enum import Enum
+from typing import Optional, List, Tuple, Literal
 from pathlib import Path
 from wasabi import Printer, diff_strings
 from thinc.api import Config
 import srsly
 import re
 from jinja2 import Template
+from radicli import Arg, PathOrDash, ExistingFilePath

 from .. import util
 from ..language import DEFAULT_CONFIG_DISTILL_PATH, DEFAULT_CONFIG_PRETRAIN_PATH
 from ..schemas import RecommendationSchema
 from ..util import SimpleFrozenList
-from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
-from ._util import string_to_list, import_code, _handle_renamed_language_codes
+from ._util import cli, convert_string_list, show_validation_error, COMMAND
+from ._util import import_code, _handle_renamed_language_codes


 ROOT = Path(__file__).parent / "templates"
@ -20,9 +20,7 @@ TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
 RECOMMENDATIONS = srsly.read_yaml(ROOT / "quickstart_training_recommendations.yml")


-class Optimizations(str, Enum):
-    efficiency = "efficiency"
-    accuracy = "accuracy"
+OptimizationsType = Literal["efficiency", "accuracy"]


 class InitValues:
@ -33,23 +31,33 @@ class InitValues:

    lang = "en"
    pipeline = SimpleFrozenList(["tagger", "parser", "ner"])
-    optimize = Optimizations.efficiency
+    optimize = "efficiency"
    gpu = False
    pretraining = False
    force_overwrite = False


-@init_cli.command("config")
-def init_config_cli(
+@cli.subcommand(
+    "init",
+    "config",
    # fmt: off
-    output_file: Path = Arg(..., help="File to save the config to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
-    lang: str = Opt(InitValues.lang, "--lang", "-l", help="Code of the language to use"),
-    pipeline: str = Opt(",".join(InitValues.pipeline), "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')"),
-    optimize: Optimizations = Opt(InitValues.optimize, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    gpu: bool = Opt(InitValues.gpu, "--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
-    pretraining: bool = Opt(InitValues.pretraining, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    force_overwrite: bool = Opt(InitValues.force_overwrite, "--force", "-F", help="Force overwriting the output file"),
+    output_file=Arg(help="File to save the config to or - for stdout (will only output config and no additional logging info)"),
+    lang=Arg("--lang", "-l", help="Code of the language to use"),
+    pipeline=Arg("--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include (without 'tok2vec' or 'transformer')", converter=convert_string_list),
+    optimize=Arg("--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
+    gpu=Arg("--gpu", "-G", help="Whether the model can run on GPU. This will impact the choice of architecture, pretrained weights and related hyperparameters."),
+    pretraining=Arg("--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
+    force_overwrite=Arg("--force", "-F", help="Force overwriting the output file"),
    # fmt: on
+)
+def init_config_cli(
+    output_file: PathOrDash,
+    lang: str = InitValues.lang,
+    pipeline: List[str] = InitValues.pipeline,
+    optimize: OptimizationsType = InitValues.optimize,
+    gpu: bool = InitValues.gpu,
+    pretraining: bool = InitValues.pretraining,
+    force_overwrite: bool = InitValues.force_overwrite,
 ):
    """
    Generate a starter config file for training. Based on your requirements
@ -59,8 +67,7 @@ def init_config_cli(

    DOCS: https://spacy.io/api/cli#init-config
    """
-    pipeline = string_to_list(pipeline)
-    is_stdout = str(output_file) == "-"
+    is_stdout = output_file == "-"
    if not is_stdout and output_file.exists() and not force_overwrite:
        msg = Printer()
        msg.fail(
@ -70,7 +77,7 @@ def init_config_cli(
    config = init_config(
        lang=lang,
        pipeline=pipeline,
-        optimize=optimize.value,
+        optimize=optimize,
        gpu=gpu,
        pretraining=pretraining,
        silent=is_stdout,
@ -78,16 +85,25 @@ def init_config_cli(
    save_config(config, output_file, is_stdout=is_stdout)


-@init_cli.command("fill-config")
-def init_fill_config_cli(
+@cli.subcommand(
+    "init",
+    "fill-config",
    # fmt: off
-    base_path: Path = Arg(..., help="Path to base config to fill", exists=True, dir_okay=False),
-    output_file: Path = Arg("-", help="Path to output .cfg file (or - for stdout)", allow_dash=True),
-    distillation: bool = Opt(False, "--distillation", "-dt", help="Include config for distillation (with 'spacy distill')"),
-    pretraining: bool = Opt(False, "--pretraining", "-pt", help="Include config for pretraining (with 'spacy pretrain')"),
-    diff: bool = Opt(False, "--diff", "-D", help="Print a visual diff highlighting the changes"),
-    code_path: Optional[Path] = Opt(None, "--code-path", "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    base_path=Arg(help="Path to base config to fill"),
+    output_file=Arg(help="Path to output .cfg file (or - for stdout)"),
+    distillation=Arg("--distillation", "-dt", help="Include config for distillation (with 'spacy distill')"),
+    pretraining=Arg("--pretraining", "-pt", help="Include config for pretraining (with `spacy pretrain`)"),
+    diff=Arg("--diff", "-D", help="Print a visual diff highlighting the changes"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
    # fmt: on
+)
+def init_fill_config_cli(
+    base_path: ExistingFilePath,
+    output_file: PathOrDash = "-",
+    distillation: bool = False,
+    pretraining: bool = False,
+    diff: bool = False,
+    code_path: Optional[ExistingFilePath] = None,
 ):
    """
    Fill partial config file with default values. Will add all missing settings
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@ -1,30 +1,42 @@
-from typing import Optional
+from typing import Optional, Literal, List
 import logging
 from pathlib import Path
 from wasabi import msg
-import typer
 import srsly
+from radicli import Arg, ExistingPath, ExistingFilePathOrDash, ExistingFilePath

 from .. import util
 from ..training.initialize import init_nlp, convert_vectors
 from ..language import Language
-from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import cli, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu, _handle_renamed_language_codes


-@init_cli.command("vectors")
-def init_vectors_cli(
+@cli.subcommand(
+    "init",
+    "vectors",
    # fmt: off
-    lang: str = Arg(..., help="The language of the nlp object to create"),
-    vectors_loc: Path = Arg(..., help="Vectors file in Word2Vec format", exists=True),
-    output_dir: Path = Arg(..., help="Pipeline output directory"),
-    prune: int = Opt(-1, "--prune", "-p", help="Optional number of vectors to prune to"),
-    truncate: int = Opt(0, "--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
-    mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
+    lang=Arg(help="The language of the nlp object to create"),
+    vectors_loc=Arg(help="Vectors file in Word2Vec format"),
+    output_dir=Arg(help="Pipeline output directory"),
+    prune=Arg("--prune", "-p", help="Optional number of vectors to prune to"),
+    truncate=Arg("--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
+    mode=Arg("--mode", "-m", help="Vectors mode: default or floret"),
+    name=Arg("--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    jsonl_loc=Arg("--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file"),
    # fmt: on
+)
+def init_vectors_cli(
+    lang: str,
+    vectors_loc: ExistingPath,
+    output_dir: Path,
+    prune: int = -1,
+    truncate: int = 0,
+    mode: Literal["default", "floret"] = "default",
+    name: Optional[str] = None,
+    verbose: bool = False,
+    jsonl_loc: Optional[Path] = None,
 ):
    """Convert word vectors for use with spaCy. Will export an nlp object that
    you can use in the [initialize] block of your config to initialize
@ -66,23 +78,28 @@ def update_lexemes(nlp: Language, jsonl_loc: Path) -> None:
        lexeme.set_attrs(**attrs)


-@init_cli.command(
+@cli.subcommand_with_extra(
+    "init",
    "nlp",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-    hidden=True,
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    output_path=Arg(help="Output directory for the prepared data"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    # fmt: on
 )
 def init_pipeline_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Path = Arg(..., help="Output directory for the prepared data"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    output_path: Path,
+    code_path: Optional[ExistingFilePath] = None,
+    verbose: bool = False,
+    use_gpu: int = -1,
+    _extra: List[str] = [],
 ):
+    """Initialize a pipeline."""
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    setup_gpu(use_gpu)
    with show_validation_error(config_path):
@ -93,19 +110,24 @@ def init_pipeline_cli(
    msg.good(f"Saved initialized pipeline to {output_path}")


-@init_cli.command(
+@cli.subcommand_with_extra(
+    "init",
    "labels",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    output_path=Arg(help="Output directory for the labels"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    # fmt: on
 )
 def init_labels_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Path = Arg(..., help="Output directory for the labels"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    output_path: Path,
+    code_path: Optional[ExistingFilePath] = None,
+    verbose: bool = False,
+    use_gpu: int = -1,
+    _extra: List[str] = [],
 ):
    """Generate JSON files for the labels in the data. This helps speed up the
    training process, since spaCy won't have to preprocess the data to
@ -113,7 +135,7 @@ def init_labels_cli(
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
    if not output_path.exists():
        output_path.mkdir(parents=True)
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    setup_gpu(use_gpu)
    with show_validation_error(config_path):
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -1,4 +1,4 @@
-from typing import Optional, Union, Any, Dict, List, Tuple, cast
+from typing import Optional, Union, Any, Dict, List, Tuple, Literal, cast
 import shutil
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer, get_raw_input
@ -8,26 +8,38 @@ from catalogue import RegistryError
 import srsly
 import sys
 import re
+from radicli import Arg, ExistingDirPath, ExistingFilePath

-from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
+from ._util import cli, convert_path_list, WHEEL_SUFFIX, SDIST_SUFFIX
 from ..schemas import validate, ModelMetaSchema
 from .. import util
 from .. import about


-@app.command("package")
-def package_cli(
+@cli.command(
+    "package",
    # fmt: off
-    input_dir: Path = Arg(..., help="Directory with pipeline data", exists=True, file_okay=False),
-    output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
-    code_paths: str = Opt("", "--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package"),
-    meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
-    create_meta: bool = Opt(False, "--create-meta", "-C", help="Create meta.json, even if one exists"),
-    name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
-    version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
-    build: str = Opt("sdist", "--build", "-b", help="Comma-separated formats to build: sdist and/or wheel, or none."),
-    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
+    input_dir=Arg(help="Directory with pipeline data"),
+    output_dir=Arg(help="Output parent directory"),
+    code_paths=Arg("--code", "-c", help="Comma-separated paths to Python file with additional code (registered functions) to be included in the package", converter=convert_path_list),
+    meta_path=Arg("--meta", "-m", help="Path to meta.json"),
+    create_meta=Arg("--create-meta", "-C", help="Create meta.json, even if one exists"),
+    name=Arg("--name", "-n", help="Package name to override meta"),
+    version=Arg("--version", "-v", help="Package version to override meta"),
+    build=Arg("--build", "-b", help="Artifact to build. Can be set multiple times, 'sdist', 'wheel' or 'none'"),
+    force=Arg("--force", "-F", help="Force overwriting existing data in output directory"),
    # fmt: on
+)
+def package_cli(
+    input_dir: ExistingDirPath,
+    output_dir: ExistingDirPath,
+    code_paths: List[Path] = [],
+    meta_path: Optional[ExistingFilePath] = None,
+    create_meta: bool = False,
+    name: Optional[str] = None,
+    version: Optional[str] = None,
+    build: List[Literal["sdist", "wheel", "none"]] = ["sdist"],
+    force: bool = False,
 ):
    """
    Generate an installable Python package for a pipeline. Includes binary data,
@ -44,8 +56,6 @@ def package_cli(

    DOCS: https://spacy.io/api/cli#package
    """
-    create_sdist, create_wheel = get_build_formats(string_to_list(build))
-    code_paths = [Path(p.strip()) for p in string_to_list(code_paths)]
    package(
        input_dir,
        output_dir,
@ -54,8 +64,8 @@ def package_cli(
        name=name,
        version=version,
        create_meta=create_meta,
-        create_sdist=create_sdist,
-        create_wheel=create_wheel,
+        create_sdist="sdist" in build and "none" not in build,
+        create_wheel="sdist" in build and "none" not in build,
        force=force,
        silent=False,
    )
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -1,29 +1,34 @@
-from typing import Optional
+from typing import Optional, List
 from pathlib import Path
 from wasabi import msg
-import typer
 import re
+from radicli import Arg, ExistingFilePathOrDash, ExistingFilePath

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import cli, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu
 from ..training.pretrain import pretrain
 from ..util import load_config


-@app.command(
+@cli.command_with_extra(
    "pretrain",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    output_dir=Arg(help="Directory to write weights to on each epoch"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    resume_path=Arg("--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
+    epoch_resume=Arg("--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    # fmt: on
 )
 def pretrain_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False, allow_dash=True),
-    output_dir: Path = Arg(..., help="Directory to write weights to on each epoch"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
-    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    output_dir: Path,
+    code_path: Optional[ExistingFilePath] = None,
+    resume_path: Optional[ExistingFilePath] = None,
+    epoch_resume: Optional[int] = None,
+    use_gpu: int = -1,
+    _extra: List[str] = [],
 ):
    """
    Pre-train the 'token-to-vector' (tok2vec) layer of pipeline components,
@ -46,7 +51,7 @@ def pretrain_cli(

    DOCS: https://spacy.io/api/cli#pretrain
    """
-    config_overrides = parse_config_overrides(ctx.args)
+    config_overrides = parse_config_overrides(_extra)
    import_code(code_path)
    verify_cli_args(config_path, output_dir, resume_path, epoch_resume)
    setup_gpu(use_gpu)
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@ -7,23 +7,25 @@ import pstats
 import sys
 import itertools
 from wasabi import msg, Printer
-import typer
+from radicli import Arg, ExistingPathOrDash

-from ._util import app, debug_cli, Arg, Opt, NAME
+from ._util import cli
 from ..language import Language
 from ..util import load_model


-@debug_cli.command("profile")
-@app.command("profile", hidden=True)
-def profile_cli(
+@cli.subcommand(
+    "debug",
+    "profile",
    # fmt: off
-    ctx: typer.Context,  # This is only used to read current calling context
-    model: str = Arg(..., help="Trained pipeline to load"),
-    inputs: Optional[Path] = Arg(None, help="Location of input file. '-' for stdin.", exists=True, allow_dash=True),
-    n_texts: int = Opt(10000, "--n-texts", "-n", help="Maximum number of texts to use if available"),
+    model=Arg(help="Trained pipeline to load"),
+    inputs=Arg(help="Location of input file. '-' for stdin."),
+    n_texts=Arg("--n-texts", "-n", help="Maximum number of texts to use if available"),
    # fmt: on
-):
+)
+def profile(
+    model: str, inputs: Optional[ExistingPathOrDash] = None, n_texts: int = 10000
+) -> None:
    """
    Profile which functions take the most time in a spaCy pipeline.
    Input should be formatted as one JSON object per line with a key "text".
@ -32,16 +34,6 @@ def profile_cli(

    DOCS: https://spacy.io/api/cli#debug-profile
    """
-    if ctx.parent.command.name == NAME:  # type: ignore[union-attr]    # called as top-level command
-        msg.warn(
-            "The profile command is now available via the 'debug profile' "
-            "subcommand. You can run python -m spacy debug --help for an "
-            "overview of the other available debugging commands."
-        )
-    profile(model, inputs=inputs, n_texts=n_texts)
-
-
-def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> None:
    if inputs is not None:
        texts = _read_inputs(inputs, msg)
        texts = list(itertools.islice(texts, n_texts))
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@ -1,14 +1,14 @@
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, List
 from pathlib import Path
 from wasabi import msg
 import os
 import re
 import shutil
 import requests
-import typer
+from radicli import Arg, ExistingDirPath

 from ...util import ensure_path, working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
+from .._util import cli, PROJECT_FILE, load_project_config
 from .._util import get_checksum, download_file, git_checkout, get_git_version
 from .._util import SimpleFrozenDict, parse_config_overrides

@ -16,17 +16,20 @@ from .._util import SimpleFrozenDict, parse_config_overrides
 EXTRA_DEFAULT = False


-@project_cli.command(
+@cli.subcommand(
+    "project",
    "assets",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
+    # fmt: off
+    project_dir=Arg(help="Path to cloned project. Defaults to current working directory"),
+    sparse_checkout=Arg("--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+"),
+    extra=Arg("--extra", "-e", help="Download all assets, including those marked as 'extra'"),
+    # fmt: on
 )
 def project_assets_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
-    sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+."),
-    extra: bool = Opt(False, "--extra", "-e", help="Download all assets, including those marked as 'extra'.")
-    # fmt: on
+    project_dir: ExistingDirPath = Path.cwd(),
+    sparse_checkout: bool = False,
+    extra: bool = False,
+    _extra: List[str] = [],
 ):
    """Fetch project assets like datasets and pretrained weights. Assets are
    defined in the "assets" section of the project.yml. If a checksum is
@ -35,7 +38,7 @@ def project_assets_cli(

    DOCS: https://spacy.io/api/cli#project-assets
    """
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    project_assets(
        project_dir,
        overrides=overrides,
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@ -3,10 +3,11 @@ from pathlib import Path
 from wasabi import msg
 import subprocess
 import re
+from radicli import Arg

 from ... import about
 from ...util import ensure_path
-from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
+from .._util import cli, COMMAND, PROJECT_FILE
 from .._util import git_checkout, get_git_version, git_repo_branch_exists

 DEFAULT_REPO = about.__projects__
@ -14,15 +15,23 @@ DEFAULT_PROJECTS_BRANCH = about.__projects_branch__
 DEFAULT_BRANCHES = ["main", "master"]


-@project_cli.command("clone")
-def project_clone_cli(
+@cli.subcommand(
+    "project",
+    "clone",
    # fmt: off
-    name: str = Arg(..., help="The name of the template to clone"),
-    dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
-    repo: str = Opt(DEFAULT_REPO, "--repo", "-r", help="The repository to clone from"),
-    branch: Optional[str] = Opt(None, "--branch", "-b", help=f"The branch to clone from. If not provided, will attempt {', '.join(DEFAULT_BRANCHES)}"),
-    sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+.")
+    name=Arg(help="The name of the template to clone"),
+    dest=Arg(help="Where to clone the project. Defaults to current working directory"),
+    repo=Arg("--repo", "-r", help="The repository to clone from"),
+    branch=Arg("--branch", "-b", help=f"The branch to clone from. If not provided, will attempt {', '.join(DEFAULT_BRANCHES)}"),
+    sparse_checkout=Arg("--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+"),
    # fmt: on
+)
+def project_clone_cli(
+    name: str,
+    dest: Optional[Path] = None,
+    repo: str = DEFAULT_REPO,
+    branch: Optional[str] = None,
+    sparse_checkout: bool = False,
 ):
    """Clone a project template from a repository. Calls into "git" and will
    only download the files from the given subdirectory. The GitHub repo
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@ -1,8 +1,9 @@
 from pathlib import Path
 from wasabi import msg, MarkdownRenderer
+from radicli import Arg, ExistingDirPath, PathOrDash

 from ...util import working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
+from .._util import cli, PROJECT_FILE, load_project_config


 DOCS_URL = "https://spacy.io"
@ -27,14 +28,20 @@ MARKER_END = "<!-- SPACY PROJECT: AUTO-GENERATED DOCS END (do not remove) -->"
 MARKER_IGNORE = "<!-- SPACY PROJECT: IGNORE -->"


-@project_cli.command("document")
-def project_document_cli(
+@cli.subcommand(
+    "project",
+    "document",
    # fmt: off
-    project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False),
-    output_file: Path = Opt("-", "--output", "-o", help="Path to output Markdown file for output. Defaults to - for standard output"),
-    no_emoji: bool = Opt(False, "--no-emoji", "-NE", help="Don't use emoji")
+    project_dir=Arg(help="Path to cloned project. Defaults to current working directory."),
+    output_file=Arg("--output", "-o", help="Path to output Markdown file for output. Defaults to - for standard output"),
+    no_emoji=Arg("--no-emoji", "-NE", help="Don't use emoji"),
    # fmt: on
-):
+)
+def project_document(
+    project_dir: ExistingDirPath = Path.cwd(),
+    output_file: PathOrDash = "-",
+    no_emoji: bool = False,
+) -> None:
    """
    Auto-generate a README.md for a project. If the content is saved to a file,
    hidden markers are added so you can add custom content before or after the
@ -43,13 +50,7 @@ def project_document_cli(

    DOCS: https://spacy.io/api/cli#project-document
    """
-    project_document(project_dir, output_file, no_emoji=no_emoji)
-
-
-def project_document(
-    project_dir: Path, output_file: Path, *, no_emoji: bool = False
-) -> None:
-    is_stdout = str(output_file) == "-"
+    is_stdout = output_file == "-"
    config = load_project_config(project_dir)
    md = MarkdownRenderer(no_emoji=no_emoji)
    md.add(MARKER_START)
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@ -1,207 +0,0 @@
-"""This module contains helpers and subcommands for integrating spaCy projects
-with Data Version Controk (DVC). https://dvc.org"""
-from typing import Dict, Any, List, Optional, Iterable
-import subprocess
-from pathlib import Path
-from wasabi import msg
-
-from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
-from .._util import Arg, Opt, NAME, COMMAND
-from ...util import working_dir, split_command, join_command, run_command
-from ...util import SimpleFrozenList
-
-
-DVC_CONFIG = "dvc.yaml"
-DVC_DIR = ".dvc"
-UPDATE_COMMAND = "dvc"
-DVC_CONFIG_COMMENT = f"""# This file is auto-generated by spaCy based on your {PROJECT_FILE}. If you've
-# edited your {PROJECT_FILE}, you can regenerate this file by running:
-# {COMMAND} project {UPDATE_COMMAND}"""
-
-
-@project_cli.command(UPDATE_COMMAND)
-def project_update_dvc_cli(
-    # fmt: off
-    project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
-    workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."),
-    verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"),
-    quiet: bool = Opt(False, "--quiet", "-q", help="Print less info"),
-    force: bool = Opt(False, "--force", "-F", help="Force update DVC config"),
-    # fmt: on
-):
-    """Auto-generate Data Version Control (DVC) config. A DVC
-    project can only define one pipeline, so you need to specify one workflow
-    defined in the project.yml. If no workflow is specified, the first defined
-    workflow is used. The DVC config will only be updated if the project.yml
-    changed.
-
-    DOCS: https://spacy.io/api/cli#project-dvc
-    """
-    project_update_dvc(project_dir, workflow, verbose=verbose, quiet=quiet, force=force)
-
-
-def project_update_dvc(
-    project_dir: Path,
-    workflow: Optional[str] = None,
-    *,
-    verbose: bool = False,
-    quiet: bool = False,
-    force: bool = False,
-) -> None:
-    """Update the auto-generated Data Version Control (DVC) config file. A DVC
-    project can only define one pipeline, so you need to specify one workflow
-    defined in the project.yml. Will only update the file if the checksum changed.
-
-    project_dir (Path): The project directory.
-    workflow (Optional[str]): Optional name of workflow defined in project.yml.
-        If not set, the first workflow will be used.
-    verbose (bool): Print more info.
-    quiet (bool): Print less info.
-    force (bool): Force update DVC config.
-    """
-    config = load_project_config(project_dir)
-    updated = update_dvc_config(
-        project_dir, config, workflow, verbose=verbose, quiet=quiet, force=force
-    )
-    help_msg = "To execute the workflow with DVC, run: dvc repro"
-    if updated:
-        msg.good(f"Updated DVC config from {PROJECT_FILE}", help_msg)
-    else:
-        msg.info(f"No changes found in {PROJECT_FILE}, no update needed", help_msg)
-
-
-def update_dvc_config(
-    path: Path,
-    config: Dict[str, Any],
-    workflow: Optional[str] = None,
-    verbose: bool = False,
-    quiet: bool = False,
-    force: bool = False,
-) -> bool:
-    """Re-run the DVC commands in dry mode and update dvc.yaml file in the
-    project directory. The file is auto-generated based on the config. The
-    first line of the auto-generated file specifies the hash of the config
-    dict, so if any of the config values change, the DVC config is regenerated.
-
-    path (Path): The path to the project directory.
-    config (Dict[str, Any]): The loaded project.yml.
-    verbose (bool): Whether to print additional info (via DVC).
-    quiet (bool): Don't output anything (via DVC).
-    force (bool): Force update, even if hashes match.
-    RETURNS (bool): Whether the DVC config file was updated.
-    """
-    ensure_dvc(path)
-    workflows = config.get("workflows", {})
-    workflow_names = list(workflows.keys())
-    check_workflows(workflow_names, workflow)
-    if not workflow:
-        workflow = workflow_names[0]
-    config_hash = get_hash(config)
-    path = path.resolve()
-    dvc_config_path = path / DVC_CONFIG
-    if dvc_config_path.exists():
-        # Check if the file was generated using the current config, if not, redo
-        with dvc_config_path.open("r", encoding="utf8") as f:
-            ref_hash = f.readline().strip().replace("# ", "")
-        if ref_hash == config_hash and not force:
-            return False  # Nothing has changed in project.yml, don't need to update
-        dvc_config_path.unlink()
-    dvc_commands = []
-    config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
-
-    # some flags that apply to every command
-    flags = []
-    if verbose:
-        flags.append("--verbose")
-    if quiet:
-        flags.append("--quiet")
-
-    for name in workflows[workflow]:
-        command = config_commands[name]
-        deps = command.get("deps", [])
-        outputs = command.get("outputs", [])
-        outputs_no_cache = command.get("outputs_no_cache", [])
-        if not deps and not outputs and not outputs_no_cache:
-            continue
-        # Default to the working dir as the project path since dvc.yaml is auto-generated
-        # and we don't want arbitrary paths in there
-        project_cmd = ["python", "-m", NAME, "project", "run", name]
-        deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl]
-        outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl]
-        outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl]
-
-        dvc_cmd = ["run", *flags, "-n", name, "-w", str(path), "--no-exec"]
-        if command.get("no_skip"):
-            dvc_cmd.append("--always-changed")
-        full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd]
-        dvc_commands.append(join_command(full_cmd))
-
-    if not dvc_commands:
-        # If we don't check for this, then there will be an error when reading the
-        # config, since DVC wouldn't create it.
-        msg.fail(
-            "No usable commands for DVC found. This can happen if none of your "
-            "commands have dependencies or outputs.",
-            exits=1,
-        )
-
-    with working_dir(path):
-        for c in dvc_commands:
-            dvc_command = "dvc " + c
-            run_command(dvc_command)
-    with dvc_config_path.open("r+", encoding="utf8") as f:
-        content = f.read()
-        f.seek(0, 0)
-        f.write(f"# {config_hash}\n{DVC_CONFIG_COMMENT}\n{content}")
-    return True
-
-
-def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None:
-    """Validate workflows provided in project.yml and check that a given
-    workflow can be used to generate a DVC config.
-
-    workflows (List[str]): Names of the available workflows.
-    workflow (Optional[str]): The name of the workflow to convert.
-    """
-    if not workflows:
-        msg.fail(
-            f"No workflows defined in {PROJECT_FILE}. To generate a DVC config, "
-            f"define at least one list of commands.",
-            exits=1,
-        )
-    if workflow is not None and workflow not in workflows:
-        msg.fail(
-            f"Workflow '{workflow}' not defined in {PROJECT_FILE}. "
-            f"Available workflows: {', '.join(workflows)}",
-            exits=1,
-        )
-    if not workflow:
-        msg.warn(
-            f"No workflow specified for DVC pipeline. Using the first workflow "
-            f"defined in {PROJECT_FILE}: '{workflows[0]}'"
-        )
-
-
-def ensure_dvc(project_dir: Path) -> None:
-    """Ensure that the "dvc" command is available and that the current project
-    directory is an initialized DVC project.
-    """
-    try:
-        subprocess.run(["dvc", "--version"], stdout=subprocess.DEVNULL)
-    except Exception:
-        msg.fail(
-            "To use spaCy projects with DVC (Data Version Control), DVC needs "
-            "to be installed and the 'dvc' command needs to be available",
-            "You can install the Python package from pip (pip install dvc) or "
-            "conda (conda install -c conda-forge dvc). For more details, see the "
-            "documentation: https://dvc.org/doc/install",
-            exits=1,
-        )
-    if not (project_dir / ".dvc").exists():
-        msg.fail(
-            "Project not initialized as a DVC project",
-            "To initialize a DVC project, you can run 'dvc init' in the project "
-            "directory. For more details, see the documentation: "
-            "https://dvc.org/doc/command-reference/init",
-            exits=1,
-        )
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@ -1,18 +1,23 @@
 from pathlib import Path
 from wasabi import msg
+from radicli import Arg, ExistingDirPath
+
 from .remote_storage import RemoteStorage
 from .remote_storage import get_command_hash
-from .._util import project_cli, Arg, logger
-from .._util import load_project_config
+from .._util import cli, load_project_config, logger
 from .run import update_lockfile


-@project_cli.command("pull")
-def project_pull_cli(
+@cli.subcommand(
+    "project",
+    "pull",
    # fmt: off
-    remote: str = Arg("default", help="Name or path of remote storage"),
-    project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+    remote=Arg(help="Name or path of remote storage"),
+    project_dir=Arg(help="Location of project directory. Defaults to current working directory."),
    # fmt: on
+)
+def project_pull_cli(
+    remote: str = "default", project_dir: ExistingDirPath = Path.cwd()
 ):
    """Retrieve available precomputed outputs from a remote storage.
    You can alias remotes in your project.yml by mapping them to storage paths.
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@ -1,19 +1,25 @@
 from pathlib import Path
 from wasabi import msg
+from radicli import Arg, ExistingDirPath
+
 from .remote_storage import RemoteStorage
 from .remote_storage import get_content_hash, get_command_hash
-from .._util import load_project_config
-from .._util import project_cli, Arg, logger
+from .._util import cli, load_project_config, logger


-@project_cli.command("push")
-def project_push_cli(
+@cli.subcommand(
+    "project",
+    "push",
    # fmt: off
-    remote: str = Arg("default", help="Name or path of remote storage"),
-    project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
+    remote=Arg(help="Name or path of remote storage"),
+    project_dir=Arg(help="Location of project directory. Defaults to current working directory."),
    # fmt: on
+)
+def project_push_cli(
+    remote: str = "default", project_dir: ExistingDirPath = Path.cwd()
 ):
-    """Persist outputs to a remote storage. You can alias remotes in your
+    """
+    Persist outputs to a remote storage. You can alias remotes in your
    project.yml by mapping them to storage paths. A storage can be anything that
    the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH,
    local directories etc.
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -1,13 +1,12 @@
 from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
 import os.path
 from pathlib import Path
-
 import pkg_resources
 from wasabi import msg
 from wasabi.util import locale_escape
 import sys
 import srsly
-import typer
+from radicli import Arg, ExistingDirPath

 from ... import about
 from ...git_info import GIT_VERSION
@ -15,21 +14,27 @@ from ...util import working_dir, run_command, split_command, is_cwd, join_comman
 from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
 from ...util import check_bool_env_var, SimpleFrozenDict
 from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
-from .._util import get_checksum, project_cli, Arg, Opt, COMMAND, parse_config_overrides
+from .._util import cli, get_checksum, COMMAND, parse_config_overrides


-@project_cli.command(
-    "run", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
+@cli.subcommand_with_extra(
+    "project",
+    "run",
+    # fmt: off
+    subcommand=Arg(help=f"Name of command defined in the {PROJECT_FILE}"),
+    project_dir=Arg(help="Location of project directory. Defaults to current working directory."),
+    force=Arg("--force", "-F", help="Force re-running steps, even if nothing changed"),
+    dry=Arg("--dry", "-D", help="Perform a dry run and don't execute scripts"),
+    show_help=Arg("--help", help="Show help message and available subcommands"),
+    # fmt: on
 )
 def project_run_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    subcommand: str = Arg(None, help=f"Name of command defined in the {PROJECT_FILE}"),
-    project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
-    force: bool = Opt(False, "--force", "-F", help="Force re-running steps, even if nothing changed"),
-    dry: bool = Opt(False, "--dry", "-D", help="Perform a dry run and don't execute scripts"),
-    show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
-    # fmt: on
+    subcommand: Optional[str] = None,
+    project_dir: ExistingDirPath = Path.cwd(),
+    force: bool = False,
+    dry: bool = False,
+    show_help: bool = False,
+    _extra: List[str] = [],
 ):
    """Run a named command or workflow defined in the project.yml. If a workflow
    name is specified, all commands in the workflow are run, in order. If
@ -41,7 +46,7 @@ def project_run_cli(
    if show_help or not subcommand:
        print_run_help(project_dir, subcommand)
    else:
-        overrides = parse_config_overrides(ctx.args)
+        overrides = parse_config_overrides(_extra)
        project_run(project_dir, subcommand, overrides=overrides, force=force, dry=dry)


--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -1,29 +1,34 @@
-from typing import Optional, Dict, Any, Union
+from typing import Optional, Dict, Any, Union, List
 from pathlib import Path
 from wasabi import msg
-import typer
 import logging
 import sys
+from radicli import Arg, ExistingFilePathOrDash

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
+from ._util import cli, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu
 from ..training.loop import train as train_nlp
 from ..training.initialize import init_nlp
 from .. import util


-@app.command(
-    "train", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
+@cli.command_with_extra(
+    "train",
+    # fmt: off
+    config_path=Arg(help="Path to config file"),
+    output_path=Arg("--output", "-o", help="Output directory to store trained pipeline in"),
+    code_path=Arg("--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
+    verbose=Arg("--verbose", "-V", help="Display more information for debugging purposes"),
+    use_gpu=Arg("--gpu-id", "-g", help="GPU ID or -1 for CPU"),
+    # fmt: on
 )
 def train_cli(
-    # fmt: off
-    ctx: typer.Context,  # This is only used to read additional arguments
-    config_path: Path = Arg(..., help="Path to config file", exists=True, allow_dash=True),
-    output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store trained pipeline in"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
-    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
-    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU")
-    # fmt: on
+    config_path: ExistingFilePathOrDash,
+    output_path: Optional[Path] = None,
+    code_path: Optional[Path] = None,
+    verbose: bool = False,
+    use_gpu: int = -1,
+    _extra: List[str] = [],
 ):
    """
    Train or update a spaCy pipeline. Requires data in spaCy's binary format. To
@ -40,7 +45,7 @@ def train_cli(
    DOCS: https://spacy.io/api/cli#train
    """
    util.logger.setLevel(logging.DEBUG if verbose else logging.INFO)
-    overrides = parse_config_overrides(ctx.args)
+    overrides = parse_config_overrides(_extra)
    import_code(code_path)
    train(config_path, output_path, use_gpu=use_gpu, overrides=overrides)

--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -5,14 +5,14 @@ import requests
 from wasabi import msg, Printer
 import warnings

-from ._util import app
+from ._util import cli
 from .. import about
 from ..util import get_package_version, get_installed_models, get_minor_version
 from ..util import get_package_path, get_model_meta, is_compatible_version


-@app.command("validate")
-def validate_cli():
+@cli.command("validate")
+def validate() -> None:
    """
    Validate the currently installed pipeline packages and spaCy version. Checks
    if the installed packages are compatible and shows upgrade instructions if
@ -20,10 +20,6 @@ def validate_cli():

    DOCS: https://spacy.io/api/cli#validate
    """
-    validate()
-
-
-def validate() -> None:
    model_pkgs, compat = get_model_pkgs()
    spacy_version = get_minor_version(about.__version__)
    current_compat = compat.get(spacy_version, {})
--- a/spacy/tests/test_cli_app.py
+++ b/spacy/tests/test_cli_app.py
@ -1,20 +1,39 @@
+import pytest
 import os
 from pathlib import Path
-from typer.testing import CliRunner
 from spacy.tokens import DocBin, Doc
+from spacy.cli._util import cli

-from spacy.cli._util import app
 from .util import make_tempdir, normalize_whitespace


-def test_convert_auto():
+@pytest.fixture(scope="session")
+def all_commands():
+    result = [*cli.commands.values()]
+    for subcommands in cli.subcommands.values():
+        result.extend(subcommands.values())
+    return result
+
+
+def test_help_texts(all_commands):
+    """Test that all commands provide docstrings and argument help texts."""
+    for command in all_commands:
+        assert command.description, f"no docstring for {command.display_name}"
+        for arg in command.args:
+            if arg.id == cli.extra_key:
+                continue
+            assert arg.arg.help, f"no help text for {command.display_name} -> {arg.id}"
+
+
+def test_convert_auto(capsys):
    with make_tempdir() as d_in, make_tempdir() as d_out:
        for f in ["data1.iob", "data2.iob", "data3.iob"]:
            Path(d_in / f).touch()

        # ensure that "automatic" suffix detection works
-        result = CliRunner().invoke(app, ["convert", str(d_in), str(d_out)])
-        assert "Generated output file" in result.stdout
+        cli.run(["spacy", "convert", str(d_in), str(d_out)])
+        captured = capsys.readouterr()
+        assert "Generated output file" in captured.out
        out_files = os.listdir(d_out)
        assert len(out_files) == 3
        assert "data1.spacy" in out_files
@ -22,28 +41,36 @@ def test_convert_auto():
        assert "data3.spacy" in out_files


-def test_convert_auto_conflict():
+def test_convert_auto_conflict(capsys):
    with make_tempdir() as d_in, make_tempdir() as d_out:
        for f in ["data1.iob", "data2.iob", "data3.json"]:
            Path(d_in / f).touch()

        # ensure that "automatic" suffix detection warns when there are different file types
-        result = CliRunner().invoke(app, ["convert", str(d_in), str(d_out)])
-        assert "All input files must be same type" in result.stdout
+        with pytest.raises(SystemExit):
+            cli.run(["spacy", "convert", str(d_in), str(d_out)])
+        captured = capsys.readouterr()
+        assert "All input files must be same type" in captured.out
        out_files = os.listdir(d_out)
        assert len(out_files) == 0


-def test_benchmark_accuracy_alias():
+def test_benchmark_accuracy_alias(capsys):
    # Verify that the `evaluate` alias works correctly.
-    result_benchmark = CliRunner().invoke(app, ["benchmark", "accuracy", "--help"])
-    result_evaluate = CliRunner().invoke(app, ["evaluate", "--help"])
-    assert normalize_whitespace(result_benchmark.stdout) == normalize_whitespace(
-        result_evaluate.stdout.replace("spacy evaluate", "spacy benchmark accuracy")
+    with pytest.raises(SystemExit):
+        cli.run(["spacy", "benchmark", "accuracy", "--help"])
+    captured = capsys.readouterr()
+    result_benchmark = normalize_whitespace(str(captured.out))
+    with pytest.raises(SystemExit):
+        cli.run(["spacy", "evaluate", "--help"])
+    captured = capsys.readouterr()
+    result_evaluate = normalize_whitespace(str(captured.out))
+    assert result_benchmark == result_evaluate.replace(
+        "spacy evaluate", "spacy benchmark accuracy"
    )


-def test_debug_data_trainable_lemmatizer_cli(en_vocab):
+def test_debug_data_trainable_lemmatizer_cli(en_vocab, capsys):
    train_docs = [
        Doc(en_vocab, words=["I", "like", "cats"], lemmas=["I", "like", "cat"]),
        Doc(
@ -62,30 +89,30 @@ def test_debug_data_trainable_lemmatizer_cli(en_vocab):
        dev_bin = DocBin(docs=dev_docs)
        dev_bin.to_disk(d_in / "dev.spacy")
        # `debug data` requires an input pipeline config
-        CliRunner().invoke(
-            app,
-            [
-                "init",
-                "config",
-                f"{d_in}/config.cfg",
-                "--lang",
-                "en",
-                "--pipeline",
-                "trainable_lemmatizer",
-            ],
-        )
-        result_debug_data = CliRunner().invoke(
-            app,
-            [
-                "debug",
-                "data",
-                f"{d_in}/config.cfg",
-                "--paths.train",
-                f"{d_in}/train.spacy",
-                "--paths.dev",
-                f"{d_in}/dev.spacy",
-            ],
-        )
+        args = [
+            "spacy",
+            "init",
+            "config",
+            f"{d_in}/config.cfg",
+            "--lang",
+            "en",
+            "--pipeline",
+            "trainable_lemmatizer",
+        ]
+        cli.run(args)
+        args = [
+            "spacy",
+            "debug",
+            "data",
+            f"{d_in}/config.cfg",
+            "--paths.train",
+            f"{d_in}/train.spacy",
+            "--paths.dev",
+            f"{d_in}/dev.spacy",
+        ]
+        with pytest.raises(SystemExit):
+            cli.run(args)
+        captured = capsys.readouterr()
        # Instead of checking specific wording of the output, which may change,
        # we'll check that this section of the debug output is present.
-        assert "= Trainable Lemmatizer =" in result_debug_data.stdout
+        assert "= Trainable Lemmatizer =" in captured.out