diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 664fc2aaf..cc22cbba6 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -1,4 +1,4 @@
-from typing import Optional, Dict, Any
+from typing import Optional, Dict, Any, Union
 from pathlib import Path
 from wasabi import msg
 import typer
@@ -46,12 +46,14 @@ def train_cli(
 
 
 def train(
-    config_path: Path,
-    output_path: Optional[Path] = None,
+    config_path: Union[str, Path],
+    output_path: Optional[Union[str, Path]] = None,
     *,
     use_gpu: int = -1,
     overrides: Dict[str, Any] = util.SimpleFrozenDict(),
 ):
+    config_path = util.ensure_path(config_path)
+    output_path = util.ensure_path(output_path)
     # Make sure all files and paths exists if they are needed
     if not config_path or (str(config_path) != "-" and not config_path.exists()):
         msg.fail("Config file not found", config_path, exits=1)
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index 268ea0703..a4462af56 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -819,6 +819,29 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [--gpu-id]
 | overrides         | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--paths.train ./train.spacy`. ~~Any (option/flag)~~                         |
 | **CREATES**       | The final trained pipeline and the best trained pipeline.                                                                                                                                                          |
 
+### Calling the training function from Python {#train-function new="3.2"}
+
+The training CLI exposes a `train` helper function that lets you run the
+training just like `spacy train`. Usually it's easier to use the command line
+directly, but if you need to kick off training from code this is how to do it.
+
+> #### Example
+>
+> ```python
+> from spacy.cli.train import train
+>
+> train("./config.cfg", overrides={"paths.train": "./train.spacy", "paths.dev": "./dev.spacy"})
+>
+> ```
+
+| Name           | Description                                                                                                                   |
+| -------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| `config_path`  | Path to the config to use for training. ~~Union[str, Path]~~                                                                  |
+| `output_path`  | Optional name of directory to save output model in. If not provided a model will not be saved. ~~Optional[Union[str, Path]]~~ |
+| _keyword-only_ |                                                                                                                               |
+| `use_gpu`      | Which GPU to use. Defaults to -1 for no GPU. ~~int~~                                                                          |
+| `overrides`    | Values to override config settings. ~~Dict[str, Any]~~                                                                        |
+
 ## pretrain {#pretrain new="2.1" tag="command,experimental"}
 
 Pretrain the "token to vector" ([`Tok2vec`](/api/tok2vec)) layer of pipeline
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index f6910bd5b..c78a1de03 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -826,17 +826,17 @@ from the specified model. Intended for use in `[initialize.before_init]`.
 > after_pipeline_creation = {"@callbacks":"spacy.models_with_nvtx_range.v1"}
 > ```
 
-Recursively wrap the models in each pipe using [NVTX](https://nvidia.github.io/NVTX/)
-range markers. These markers aid in GPU profiling by attributing specific operations
-to a ~~Model~~'s forward or backprop passes.
+Recursively wrap the models in each pipe using
+[NVTX](https://nvidia.github.io/NVTX/) range markers. These markers aid in GPU
+profiling by attributing specific operations to a ~~Model~~'s forward or
+backprop passes.
 
 | Name             | Description                                                                                                                  |
-|------------------|------------------------------------------------------------------------------------------------------------------------------|
+| ---------------- | ---------------------------------------------------------------------------------------------------------------------------- |
 | `forward_color`  | Color identifier for forward passes. Defaults to `-1`. ~~int~~                                                               |
 | `backprop_color` | Color identifier for backpropagation passes. Defaults to `-1`. ~~int~~                                                       |
 | **CREATES**      | A function that takes the current `nlp` and wraps forward/backprop passes in NVTX ranges. ~~Callable[[Language], Language]~~ |
 
-
 ## Training data and alignment {#gold source="spacy/training"}
 
 ### training.offsets_to_biluo_tags {#offsets_to_biluo_tags tag="function"}
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 94fdad209..bd5ea7751 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -301,8 +301,6 @@ fly without having to save to and load from disk.
 $ python -m spacy init config - --lang en --pipeline ner,textcat --optimize accuracy | python -m spacy train - --paths.train ./corpus/train.spacy --paths.dev ./corpus/dev.spacy
 ```
 
-<!-- TODO: add reference to Prodigy's commands once Prodigy nightly is available -->
-
 ### Using variable interpolation {#config-interpolation}
 
 Another very useful feature of the config system is that it supports variable
@@ -1647,7 +1645,7 @@ workers are stuck waiting for it to complete before they can continue.
 
 ## Internal training API {#api}
 
-<Infobox variant="warning">
+<Infobox variant="danger">
 
 spaCy gives you full control over the training loop. However, for most use
 cases, it's recommended to train your pipelines via the
@@ -1659,6 +1657,32 @@ typically give you everything you need to train fully custom pipelines with
 
 </Infobox>
 
+### Training from a Python script {#api-train new="3.2"}
+
+If you want to run the training from a Python script instead of using the
+[`spacy train`](/api/cli#train) CLI command, you can call into the
+[`train`](/api/cli#train-function) helper function directly. It takes the path
+to the config file, an optional output directory and an optional dictionary of
+[config overrides](#config-overrides).
+
+```python
+from spacy.cli.train import train
+
+train("./config.cfg", overrides={"paths.train": "./train.spacy", "paths.dev": "./dev.spacy"})
+```
+
+### Internal training loop API {#api-loop}
+
+<Infobox variant="warning">
+
+This section documents how the training loop and updates to the `nlp` object
+work internally. You typically shouldn't have to implement this in Python unless
+you're writing your own trainable components. To train a pipeline, use
+[`spacy train`](/api/cli#train) or the [`train`](/api/cli#train-function) helper
+function instead.
+
+</Infobox>
+
 The [`Example`](/api/example) object contains annotated training data, also
 called the **gold standard**. It's initialized with a [`Doc`](/api/doc) object
 that will hold the predictions, and another `Doc` object that holds the