diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 9d57219ca..cc0247b3a 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -10,6 +10,7 @@ steps:
     inputs:
       versionSpec: ${{ parameters.python_version }}
       architecture: ${{ parameters.architecture }}
+      allowUnstable: true
 
   - bash: |
       echo "##vso[task.setvariable variable=python_version]${{ parameters.python_version }}"
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b959262e3..df59697b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
       language_version: python3.7
       additional_dependencies: ['click==8.0.4']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 5.0.4
     hooks:
     - id: flake8
       args:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 2f5201614..357cce835 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -85,6 +85,15 @@ jobs:
         Python310Mac:
           imageName: "macos-latest"
           python.version: "3.10"
+        Python311Linux:
+          imageName: 'ubuntu-latest'
+          python.version: '3.11.0-rc.2'
+        Python311Windows:
+          imageName: 'windows-latest'
+          python.version: '3.11.0-rc.2'
+        Python311Mac:
+          imageName: 'macos-latest'
+          python.version: '3.11.0-rc.2'
       maxParallel: 4
     pool:
       vmImage: $(imageName)
diff --git a/requirements.txt b/requirements.txt
index 446560c06..9d6bbb2c4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ pathy>=0.3.5
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
 jinja2
 langcodes>=3.2.0,<4.0.0
 # Official Python utilities
@@ -28,7 +28,7 @@ cython>=0.25,<3.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<3.10.0
+flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
 mypy>=0.980,<0.990; platform_machine != "aarch64" and python_version >= "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"
diff --git a/setup.cfg b/setup.cfg
index 74bffaa0f..587af7e64 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,7 +48,7 @@ install_requires =
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
     jinja2
     # Official Python utilities
     setuptools
diff --git a/spacy/about.py b/spacy/about.py
index 843c15aba..ce86e6294 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.4.1"
+__version__ = "3.4.2"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
index 83dc5efbf..a15353855 100644
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@@ -25,6 +25,7 @@ def project_update_dvc_cli(
     project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
     workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."),
     verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"),
+    quiet: bool = Opt(False, "--quiet", "-q", help="Print less info"),
     force: bool = Opt(False, "--force", "-F", help="Force update DVC config"),
     # fmt: on
 ):
@@ -36,7 +37,7 @@ def project_update_dvc_cli(
 
     DOCS: https://spacy.io/api/cli#project-dvc
     """
-    project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
+    project_update_dvc(project_dir, workflow, verbose=verbose, quiet=quiet, force=force)
 
 
 def project_update_dvc(
@@ -44,6 +45,7 @@ def project_update_dvc(
     workflow: Optional[str] = None,
     *,
     verbose: bool = False,
+    quiet: bool = False,
     force: bool = False,
 ) -> None:
     """Update the auto-generated Data Version Control (DVC) config file. A DVC
@@ -54,11 +56,12 @@ def project_update_dvc(
     workflow (Optional[str]): Optional name of workflow defined in project.yml.
         If not set, the first workflow will be used.
     verbose (bool): Print more info.
+    quiet (bool): Print less info.
     force (bool): Force update DVC config.
     """
     config = load_project_config(project_dir)
     updated = update_dvc_config(
-        project_dir, config, workflow, verbose=verbose, force=force
+        project_dir, config, workflow, verbose=verbose, quiet=quiet, force=force
     )
     help_msg = "To execute the workflow with DVC, run: dvc repro"
     if updated:
@@ -72,7 +75,7 @@ def update_dvc_config(
     config: Dict[str, Any],
     workflow: Optional[str] = None,
     verbose: bool = False,
-    silent: bool = False,
+    quiet: bool = False,
     force: bool = False,
 ) -> bool:
     """Re-run the DVC commands in dry mode and update dvc.yaml file in the
@@ -83,7 +86,7 @@ def update_dvc_config(
     path (Path): The path to the project directory.
     config (Dict[str, Any]): The loaded project.yml.
     verbose (bool): Whether to print additional info (via DVC).
-    silent (bool): Don't output anything (via DVC).
+    quiet (bool): Don't output anything (via DVC).
     force (bool): Force update, even if hashes match.
     RETURNS (bool): Whether the DVC config file was updated.
     """
@@ -105,6 +108,14 @@ def update_dvc_config(
         dvc_config_path.unlink()
     dvc_commands = []
     config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
+
+    # some flags that apply to every command
+    flags = []
+    if verbose:
+        flags.append("--verbose")
+    if quiet:
+        flags.append("--quiet")
+
     for name in workflows[workflow]:
         command = config_commands[name]
         deps = command.get("deps", [])
@@ -118,14 +129,26 @@ def update_dvc_config(
         deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl]
         outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl]
         outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl]
-        dvc_cmd = ["run", "-n", name, "-w", str(path), "--no-exec"]
+
+        dvc_cmd = ["run", *flags, "-n", name, "-w", str(path), "--no-exec"]
         if command.get("no_skip"):
             dvc_cmd.append("--always-changed")
         full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd]
         dvc_commands.append(join_command(full_cmd))
+
+    if not dvc_commands:
+        # If we don't check for this, then there will be an error when reading the
+        # config, since DVC wouldn't create it.
+        msg.fail(
+            "No usable commands for DVC found. This can happen if none of your "
+            "commands have dependencies or outputs.",
+            exits=1,
+        )
+
     with working_dir(path):
-        dvc_flags = {"--verbose": verbose, "--quiet": silent}
-        run_dvc_commands(dvc_commands, flags=dvc_flags)
+        for c in dvc_commands:
+            dvc_command = "dvc " + c
+            run_command(dvc_command)
     with dvc_config_path.open("r+", encoding="utf8") as f:
         content = f.read()
         f.seek(0, 0)
@@ -133,26 +156,6 @@ def update_dvc_config(
     return True
 
 
-def run_dvc_commands(
-    commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {}
-) -> None:
-    """Run a sequence of DVC commands in a subprocess, in order.
-
-    commands (List[str]): The string commands without the leading "dvc".
-    flags (Dict[str, bool]): Conditional flags to be added to command. Makes it
-        easier to pass flags like --quiet that depend on a variable or
-        command-line setting while avoiding lots of nested conditionals.
-    """
-    for c in commands:
-        command = split_command(c)
-        dvc_command = ["dvc", *command]
-        # Add the flags if they are set to True
-        for flag, is_active in flags.items():
-            if is_active:
-                dvc_command.append(flag)
-        run_command(dvc_command)
-
-
 def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None:
     """Validate workflows provided in project.yml and check that a given
     workflow can be used to generate a DVC config.
diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py
index 720d3a8cb..c37a3a91a 100644
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@@ -23,7 +23,7 @@ class RussianLemmatizer(Lemmatizer):
         overwrite: bool = False,
         scorer: Optional[Callable] = lemmatizer_score,
     ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
             try:
                 from pymorphy2 import MorphAnalyzer
             except ImportError:
diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py
index 97ee80479..8337e7328 100644
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@@ -18,7 +18,7 @@ class UkrainianLemmatizer(RussianLemmatizer):
         overwrite: bool = False,
         scorer: Optional[Callable] = lemmatizer_score,
     ) -> None:
-        if mode == "pymorphy2":
+        if mode in {"pymorphy2", "pymorphy2_lookup"}:
             try:
                 from pymorphy2 import MorphAnalyzer
             except ImportError:
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 37aa9663b..9676e2194 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,7 +1,6 @@
-from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Sequence, Tuple, Union
+from typing import cast, Any, Callable, Dict, Iterable, List, Optional, Union
+from typing import Tuple
 from collections import Counter
-from copy import deepcopy
 from itertools import islice
 import numpy as np
 
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index c517991f5..88f50d964 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -30,17 +30,17 @@ scorer = {"@layers": "spacy.LinearLogistic.v1"}
 hidden_size = 128
 
 [model.tok2vec]
-@architectures = "spacy.Tok2Vec.v1"
+@architectures = "spacy.Tok2Vec.v2"
 
 [model.tok2vec.embed]
-@architectures = "spacy.MultiHashEmbed.v1"
+@architectures = "spacy.MultiHashEmbed.v2"
 width = 96
 rows = [5000, 2000, 1000, 1000]
 attrs = ["ORTH", "PREFIX", "SUFFIX", "SHAPE"]
 include_static_vectors = false
 
 [model.tok2vec.encode]
-@architectures = "spacy.MaxoutWindowEncoder.v1"
+@architectures = "spacy.MaxoutWindowEncoder.v2"
 width = ${model.tok2vec.embed.width}
 window_size = 1
 maxout_pieces = 3
@@ -139,6 +139,9 @@ def make_spancat(
     spans_key (str): Key of the doc.spans dict to save the spans under. During
         initialization and training, the component will look for spans on the
         reference document under the same key.
+    scorer (Optional[Callable]): The scoring method. Defaults to
+        Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+        spans allowed.
     threshold (float): Minimum probability to consider a prediction positive.
         Spans with a positive prediction will be saved on the Doc. Defaults to
         0.5.
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 3a6dd0b0c..a69937a0c 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -19,7 +19,7 @@ multi_label_default_config = """
 @architectures = "spacy.TextCatEnsemble.v2"
 
 [model.tok2vec]
-@architectures = "spacy.Tok2Vec.v1"
+@architectures = "spacy.Tok2Vec.v2"
 
 [model.tok2vec.embed]
 @architectures = "spacy.MultiHashEmbed.v2"
@@ -29,7 +29,7 @@ attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
 include_static_vectors = false
 
 [model.tok2vec.encode]
-@architectures = "spacy.MaxoutWindowEncoder.v1"
+@architectures = "spacy.MaxoutWindowEncoder.v2"
 width = ${model.tok2vec.embed.width}
 window_size = 1
 maxout_pieces = 3
@@ -98,7 +98,7 @@ def make_multilabel_textcat(
     threshold: float,
     scorer: Optional[Callable],
     save_activations: bool,
-) -> "TextCategorizer":
+) -> "MultiLabel_TextCategorizer":
     """Create a TextCategorizer component. The text categorizer predicts categories
     over a whole document. It can learn one or more labels, and the labels are considered
     to be non-mutually exclusive, which means that there can be zero or more labels
@@ -107,6 +107,7 @@ def make_multilabel_textcat(
     model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
         scores for each category.
     threshold (float): Cutoff to consider a prediction "positive".
+    scorer (Optional[Callable]): The scoring method.
     """
     return MultiLabel_TextCategorizer(
         nlp.vocab,
@@ -155,7 +156,11 @@ class MultiLabel_TextCategorizer(TextCategorizer):
         name (str): The component instance name, used to add entries to the
             losses during training.
         threshold (float): Cutoff to consider a prediction "positive".
+<<<<<<< HEAD
         save_activations (bool): save model activations in Doc when annotating.
+=======
+        scorer (Optional[Callable]): The scoring method.
+>>>>>>> upstream/master
 
         DOCS: https://spacy.io/api/textcategorizer#init
         """
diff --git a/spacy/schemas.py b/spacy/schemas.py
index a38421fa0..69ce3a396 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -181,12 +181,12 @@ class TokenPatternNumber(BaseModel):
     IS_SUBSET: Optional[List[StrictInt]] = Field(None, alias="is_subset")
     IS_SUPERSET: Optional[List[StrictInt]] = Field(None, alias="is_superset")
     INTERSECTS: Optional[List[StrictInt]] = Field(None, alias="intersects")
-    EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
-    NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
-    GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
-    LEQ: Union[StrictInt, StrictFloat] = Field(None, alias="<=")
-    GT: Union[StrictInt, StrictFloat] = Field(None, alias=">")
-    LT: Union[StrictInt, StrictFloat] = Field(None, alias="<")
+    EQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="==")
+    NEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="!=")
+    GEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">=")
+    LEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<=")
+    GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
+    LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
 
     class Config:
         extra = "forbid"
@@ -430,7 +430,7 @@ class ProjectConfigAssetURL(BaseModel):
     # fmt: off
     dest: StrictStr = Field(..., title="Destination of downloaded asset")
     url: Optional[StrictStr] = Field(None, title="URL of asset")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
     description: StrictStr = Field("", title="Description of asset")
     # fmt: on
 
@@ -438,7 +438,7 @@ class ProjectConfigAssetURL(BaseModel):
 class ProjectConfigAssetGit(BaseModel):
     # fmt: off
     git: ProjectConfigAssetGitItem = Field(..., title="Git repo information")
-    checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+    checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
     description: Optional[StrictStr] = Field(None, title="Description of asset")
     # fmt: on
 
@@ -508,9 +508,9 @@ class DocJSONSchema(BaseModel):
         None, title="Indices of sentences' start and end indices"
     )
     text: StrictStr = Field(..., title="Document text")
-    spans: Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]] = Field(
-        None, title="Span information - end/start indices, label, KB ID"
-    )
+    spans: Optional[
+        Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]
+    ] = Field(None, title="Span information - end/start indices, label, KB ID")
     tokens: List[Dict[StrictStr, Union[StrictStr, StrictInt]]] = Field(
         ..., title="Token information - ID, start, annotations"
     )
@@ -519,9 +519,9 @@ class DocJSONSchema(BaseModel):
         title="Any custom data stored in the document's _ attribute",
         alias="_",
     )
-    underscore_token: Optional[Dict[StrictStr, Dict[StrictStr, Any]]] = Field(
+    underscore_token: Optional[Dict[StrictStr, List[Dict[StrictStr, Any]]]] = Field(
         None, title="Any custom data stored in the token's _ attribute"
     )
-    underscore_span: Optional[Dict[StrictStr, Dict[StrictStr, Any]]] = Field(
+    underscore_span: Optional[Dict[StrictStr, List[Dict[StrictStr, Any]]]] = Field(
         None, title="Any custom data stored in the span's _ attribute"
     )
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index c4f059dde..b1dc77ef0 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -357,6 +357,14 @@ def ru_lemmatizer():
     return get_lang_class("ru")().add_pipe("lemmatizer")
 
 
+@pytest.fixture
+def ru_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    return get_lang_class("ru")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
 @pytest.fixture(scope="session")
 def sa_tokenizer():
     return get_lang_class("sa")().tokenizer
@@ -436,6 +444,15 @@ def uk_lemmatizer():
     return get_lang_class("uk")().add_pipe("lemmatizer")
 
 
+@pytest.fixture
+def uk_lookup_lemmatizer():
+    pytest.importorskip("pymorphy2")
+    pytest.importorskip("pymorphy2_dicts_uk")
+    return get_lang_class("uk")().add_pipe(
+        "lemmatizer", config={"mode": "pymorphy2_lookup"}
+    )
+
+
 @pytest.fixture(scope="session")
 def ur_tokenizer():
     return get_lang_class("ur")().tokenizer
diff --git a/spacy/tests/doc/test_json_doc_conversion.py b/spacy/tests/doc/test_json_doc_conversion.py
index 0d7c061c9..19698cfb2 100644
--- a/spacy/tests/doc/test_json_doc_conversion.py
+++ b/spacy/tests/doc/test_json_doc_conversion.py
@@ -128,7 +128,9 @@ def test_doc_to_json_with_token_span_attributes(doc):
     doc._.json_test1 = "hello world"
     doc._.json_test2 = [1, 2, 3]
     doc[0:1]._.span_test = "span_attribute"
+    doc[0:2]._.span_test = "span_attribute_2"
     doc[0]._.token_test = 117
+    doc[1]._.token_test = 118
     doc.spans["span_group"] = [doc[0:1]]
     json_doc = doc.to_json(
         underscore=["json_test1", "json_test2", "token_test", "span_test"]
@@ -139,8 +141,10 @@ def test_doc_to_json_with_token_span_attributes(doc):
     assert json_doc["_"]["json_test2"] == [1, 2, 3]
     assert "underscore_token" in json_doc
     assert "underscore_span" in json_doc
-    assert json_doc["underscore_token"]["token_test"]["value"] == 117
-    assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
+    assert json_doc["underscore_token"]["token_test"][0]["value"] == 117
+    assert json_doc["underscore_token"]["token_test"][1]["value"] == 118
+    assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
+    assert json_doc["underscore_span"]["span_test"][1]["value"] == "span_attribute_2"
     assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
     assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
 
@@ -161,8 +165,8 @@ def test_doc_to_json_with_custom_user_data(doc):
     assert json_doc["_"]["json_test"] == "hello world"
     assert "underscore_token" in json_doc
     assert "underscore_span" in json_doc
-    assert json_doc["underscore_token"]["token_test"]["value"] == 117
-    assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
+    assert json_doc["underscore_token"]["token_test"][0]["value"] == 117
+    assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
     assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
     assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
 
@@ -181,8 +185,8 @@ def test_doc_to_json_with_token_span_same_identifier(doc):
     assert json_doc["_"]["my_ext"] == "hello world"
     assert "underscore_token" in json_doc
     assert "underscore_span" in json_doc
-    assert json_doc["underscore_token"]["my_ext"]["value"] == 117
-    assert json_doc["underscore_span"]["my_ext"]["value"] == "span_attribute"
+    assert json_doc["underscore_token"]["my_ext"][0]["value"] == 117
+    assert json_doc["underscore_span"]["my_ext"][0]["value"] == "span_attribute"
     assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
     assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
 
@@ -195,10 +199,9 @@ def test_doc_to_json_with_token_attributes_missing(doc):
     doc[0]._.token_test = 117
     json_doc = doc.to_json(underscore=["span_test"])
 
-    assert "underscore_token" in json_doc
     assert "underscore_span" in json_doc
-    assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
-    assert "token_test" not in json_doc["underscore_token"]
+    assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
+    assert "underscore_token" not in json_doc
     assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
 
 
@@ -283,7 +286,9 @@ def test_json_to_doc_with_token_span_attributes(doc):
     doc._.json_test1 = "hello world"
     doc._.json_test2 = [1, 2, 3]
     doc[0:1]._.span_test = "span_attribute"
+    doc[0:2]._.span_test = "span_attribute_2"
     doc[0]._.token_test = 117
+    doc[1]._.token_test = 118
 
     json_doc = doc.to_json(
         underscore=["json_test1", "json_test2", "token_test", "span_test"]
@@ -295,7 +300,9 @@ def test_json_to_doc_with_token_span_attributes(doc):
     assert new_doc._.json_test1 == "hello world"
     assert new_doc._.json_test2 == [1, 2, 3]
     assert new_doc[0]._.token_test == 117
+    assert new_doc[1]._.token_test == 118
     assert new_doc[0:1]._.span_test == "span_attribute"
+    assert new_doc[0:2]._.span_test == "span_attribute_2"
     assert new_doc.user_data == doc.user_data
     assert new_doc.to_bytes(exclude=["user_data"]) == doc.to_bytes(
         exclude=["user_data"]
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 9ca7f441b..e82fd4f8c 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -78,3 +78,17 @@ def test_ru_lemmatizer_punct(ru_lemmatizer):
     assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
     doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
     assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
+
+
+def test_ru_doc_lookup_lemmatization(ru_lookup_lemmatizer):
+    words = ["мама", "мыла", "раму"]
+    pos = ["NOUN", "VERB", "NOUN"]
+    morphs = [
+        "Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
+        "Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+        "Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
+    ]
+    doc = Doc(ru_lookup_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
+    doc = ru_lookup_lemmatizer(doc)
+    lemmas = [token.lemma_ for token in doc]
+    assert lemmas == ["мама", "мыла", "раму"]
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index 57dd4198a..788744aa1 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -9,3 +9,11 @@ def test_uk_lemmatizer(uk_lemmatizer):
     """Check that the default uk lemmatizer runs."""
     doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
     uk_lemmatizer(doc)
+    assert [token.lemma for token in doc]
+
+
+def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer):
+    """Check that the lookup uk lemmatizer runs."""
+    doc = Doc(uk_lookup_lemmatizer.vocab, words=["a", "b", "c"])
+    uk_lookup_lemmatizer(doc)
+    assert [token.lemma for token in doc]
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 65c0ff197..2451f72dd 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1619,24 +1619,20 @@ cdef class Doc:
                 Doc.set_extension(attr)
             self._.set(attr, doc_json["_"][attr])
 
-        if doc_json.get("underscore_token", {}):
-            for token_attr in doc_json["underscore_token"]:
-                token_start = doc_json["underscore_token"][token_attr]["token_start"]
-                value = doc_json["underscore_token"][token_attr]["value"]
-
-                if not Token.has_extension(token_attr):
-                    Token.set_extension(token_attr)
-                self[token_start]._.set(token_attr, value)
+        for token_attr in doc_json.get("underscore_token", {}):
+            if not Token.has_extension(token_attr):
+                Token.set_extension(token_attr)
+            for token_data in doc_json["underscore_token"][token_attr]:
+                start = token_by_char(self.c, self.length, token_data["start"])
+                value = token_data["value"]
+                self[start]._.set(token_attr, value)
                 
-        if doc_json.get("underscore_span", {}):
-            for span_attr in doc_json["underscore_span"]:
-                token_start = doc_json["underscore_span"][span_attr]["token_start"]
-                token_end = doc_json["underscore_span"][span_attr]["token_end"]
-                value = doc_json["underscore_span"][span_attr]["value"]
-
-                if not Span.has_extension(span_attr):
-                    Span.set_extension(span_attr)
-                self[token_start:token_end]._.set(span_attr, value)
+        for span_attr in doc_json.get("underscore_span", {}):
+            if not Span.has_extension(span_attr):
+                Span.set_extension(span_attr)
+            for span_data in doc_json["underscore_span"][span_attr]:
+                value = span_data["value"]
+                self.char_span(span_data["start"], span_data["end"])._.set(span_attr, value)
         return self
 
     def to_json(self, underscore=None):
@@ -1684,30 +1680,34 @@ cdef class Doc:
         if underscore:
             user_keys = set()
             if self.user_data:
-                data["_"] = {}
-                data["underscore_token"] = {}
-                data["underscore_span"] = {}
-                for data_key in self.user_data:
+                for data_key, value in self.user_data.copy().items():
                     if type(data_key) == tuple and len(data_key) >= 4 and data_key[0] == "._.":
                         attr = data_key[1]
                         start = data_key[2]
                         end = data_key[3]
                         if attr in underscore:
                             user_keys.add(attr)
-                            value = self.user_data[data_key]
                             if not srsly.is_json_serializable(value):
                                 raise ValueError(Errors.E107.format(attr=attr, value=repr(value)))
                             # Check if doc attribute
                             if start is None:
+                                if "_" not in data:
+                                    data["_"] = {}
                                 data["_"][attr] = value
                             # Check if token attribute
                             elif end is None:
+                                if "underscore_token" not in data:
+                                    data["underscore_token"] = {}
                                 if attr not in data["underscore_token"]:
-                                    data["underscore_token"][attr] = {"token_start": start, "value": value}
+                                    data["underscore_token"][attr] = []
+                                data["underscore_token"][attr].append({"start": start, "value": value})
                             # Else span attribute
                             else:
+                                if "underscore_span" not in data:
+                                    data["underscore_span"] = {}
                                 if attr not in data["underscore_span"]:
-                                    data["underscore_span"][attr] = {"token_start": start, "token_end": end, "value": value}
+                                    data["underscore_span"][attr] = []
+                                data["underscore_span"][attr].append({"start": start, "end": end, "value": value})
 
             for attr in underscore:
                 if attr not in user_keys:
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index e5cd3089b..fc2c46022 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -1482,7 +1482,7 @@ You'll also need to add the assets you want to track with
 </Infobox>
 
 ```cli
-$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
+$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose] [--quiet]
 ```
 
 > #### Example
@@ -1499,6 +1499,7 @@ $ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
 | `workflow`        | Name of workflow defined in `project.yml`. Defaults to first workflow if not set. ~~Optional[str] \(option)~~ |
 | `--force`, `-F`   | Force-updating config file. ~~bool (flag)~~                                                                   |
 | `--verbose`, `-V` | Print more output generated by DVC. ~~bool (flag)~~                                                           |
+| `--quiet`, `-q`   | Print no output generated by DVC. ~~bool (flag)~~                                                             |
 | `--help`, `-h`    | Show help message and available arguments. ~~bool (flag)~~                                                    |
 | **CREATES**       | A `dvc.yaml` file in the project directory, based on the steps defined in the given workflow.                 |
 
diff --git a/website/docs/api/kb_in_memory.md b/website/docs/api/kb_in_memory.md
index c9ce624f0..9e3279e6a 100644
--- a/website/docs/api/kb_in_memory.md
+++ b/website/docs/api/kb_in_memory.md
@@ -21,9 +21,9 @@ Create the knowledge base.
 > #### Example
 >
 > ```python
-> from spacy.kb import KnowledgeBase
+> from spacy.kb import InMemoryLookupKB
 > vocab = nlp.vocab
-> kb = KnowledgeBase(vocab=vocab, entity_vector_length=64)
+> kb = InMemoryLookupKB(vocab=vocab, entity_vector_length=64)
 > ```
 
 | Name                   | Description                                      |
diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index 4797bbfe3..90b612358 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -243,6 +243,27 @@ pipelines.
 > python -m spacy project run test . --vars.foo bar
 > ```
 
+> #### Tip: Environment Variables
+>
+> Commands in a project file are not executed in a shell, so they don't have
+> direct access to environment variables. But you can insert environment
+> variables using the `env` dictionary to make values available for
+> interpolation, just like values in `vars`. Here's an example `env` dict that
+> makes `$PATH` available as `ENV_PATH`:
+>
+> ```yaml
+> env:
+>   ENV_PATH: PATH
+> ```
+>
+> This can be used in a project command like so:
+>
+> ```yaml
+>   - name: "echo-path"
+>     script:
+>       - "echo ${env.ENV_PATH}"
+> ```
+
 | Section                                             | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `title`                                             | An optional project title used in `--help` message and [auto-generated docs](#custom-docs).                                                                                                                                                                                                                                                                                                                                                                                                                  |
diff --git a/website/meta/universe.json b/website/meta/universe.json
index a6a1a0fc7..d7c99956b 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,5 +1,46 @@
 {
     "resources": [
+        {
+            "id": "spacy-cleaner",
+            "title": "spacy-cleaner",
+            "slogan": "Easily clean text with spaCy!",
+            "description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n  mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
+            "github": "Ce11an/spacy-cleaner",
+            "pip": "spacy-cleaner",
+            "code_example": [
+                "import spacy",
+                "import spacy_cleaner",
+                "from spacy_cleaner.processing import removers, replacers, mutators",
+                "",
+                "model = spacy.load(\"en_core_web_sm\")",
+                "pipeline = spacy_cleaner.Pipeline(",
+                "    model,",
+                "    removers.remove_stopword_token,",
+                "    replacers.replace_punctuation_token,",
+                "    mutators.mutate_lemma_token,",
+                ")",
+                "",
+                "texts = [\"Hello, my name is Cellan! I love to swim!\"]",
+                "",
+                "pipeline.clean(texts)",
+                "# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
+            ],
+            "code_language": "python",
+            "url": "https://ce11an.github.io/spacy-cleaner/",
+            "image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
+            "author": "Cellan Hall",
+            "author_links": {
+                "twitter": "Ce11an",
+                "github": "Ce11an",
+                "website": "https://www.linkedin.com/in/cellan-hall/"
+            },
+            "category": [
+                "extension"
+            ],
+            "tags": [
+                "text-processing"
+            ]
+        },
         {
             "id": "Zshot",
             "title": "Zshot",
@@ -2460,20 +2501,20 @@
                 "import spacy",
                 "from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
                 "",
-                "# Load an spacy model (supported models are \"es\" and \"en\") ",
-                "nlp = spacy.load('en')",
-                "# Spacy 3.x",
-                "nlp.add_pipe(\"spacy_wordnet\", after='tagger', config={'lang': nlp.lang})",
-                "# Spacy 2.x",
+                "# Load a spaCy model (supported languages are \"es\" and \"en\") ",
+                "nlp = spacy.load('en_core_web_sm')",
+                "# spaCy 3.x",
+                "nlp.add_pipe(\"spacy_wordnet\", after='tagger')",
+                "# spaCy 2.x",
                 "# nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
                 "token = nlp('prices')[0]",
                 "",
-                "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
+                "# WordNet object links spaCy token with NLTK WordNet interface by giving access to",
                 "# synsets and lemmas ",
                 "token._.wordnet.synsets()",
                 "token._.wordnet.lemmas()",
                 "",
-                "# And automatically tags with wordnet domains",
+                "# And automatically add info about WordNet domains",
                 "token._.wordnet.wordnet_domains()"
             ],
             "author": "recognai",