diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 80c88b0b8..7e3f94df6 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -1,9 +1,6 @@
 parameters:
   python_version: ''
-  architecture: ''
-  prefix: ''
-  gpu: false
-  num_build_jobs: 1
+  architecture: 'x64'
 
 steps:
   - task: UsePythonVersion@0
@@ -16,16 +13,16 @@ steps:
     displayName: 'Set variables'
 
   - script: |
-      ${{ parameters.prefix }} python -m pip install -U pip setuptools
-      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
+      python -m pip install -U build pip setuptools
+      python -m pip install -U -r requirements.txt
     displayName: "Install dependencies"
 
   - script: |
-      ${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
-      ${{ parameters.prefix }} python setup.py sdist --formats=gztar
-    displayName: "Compile and build sdist"
+      python -m build --sdist
+    displayName: "Build sdist"
 
-  - script: python -m mypy spacy
+  - script: |
+      python -m mypy spacy
     displayName: 'Run mypy'
     condition: ne(variables['python_version'], '3.10')
 
@@ -34,35 +31,24 @@ steps:
       contents: "spacy"
     displayName: "Delete source directory"
 
+  - task: DeleteFiles@1
+    inputs:
+      contents: "*.egg-info"
+    displayName: "Delete egg-info directory"
+
   - script: |
-      ${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
-      ${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
+      python -m pip freeze > installed.txt
+      python -m pip uninstall -y -r installed.txt
     displayName: "Uninstall all packages"
 
   - bash: |
-      ${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
-      ${{ parameters.prefix }} python -m pip install dist/$SDIST
+      SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+      python -m pip install dist/$SDIST
     displayName: "Install from sdist"
 
   - script: |
-      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
-    displayName: "Install test requirements"
-
-  - script: |
-      ${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0
-      ${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
-    displayName: "Install GPU requirements"
-    condition: eq(${{ parameters.gpu }}, true)
-
-  - script: |
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy
-    displayName: "Run CPU tests"
-    condition: eq(${{ parameters.gpu }}, false)
-
-  - script: |
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu
-    displayName: "Run GPU tests"
-    condition: eq(${{ parameters.gpu }}, true)
+      python -W error -c "import spacy"
+    displayName: "Test import"
 
   - script: |
       python -m spacy download ca_core_news_sm
@@ -105,13 +91,21 @@ steps:
     displayName: 'Test assemble CLI vectors warning'
     condition: eq(variables['python_version'], '3.8')
 
+  - script: |
+      python -m pip install -U -r requirements.txt
+    displayName: "Install test requirements"
+
+  - script: |
+      python -m pytest --pyargs spacy -W error
+    displayName: "Run CPU tests"
+
+  - script: |
+      python -m pip install 'spacy[apple]'
+      python -m pytest --pyargs spacy
+    displayName: "Run CPU tests with thinc-apple-ops"
+    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10'))
+
   - script: |
       python .github/validate_universe_json.py website/meta/universe.json
     displayName: 'Test website/meta/universe.json'
     condition: eq(variables['python_version'], '3.8')
-
-  - script: |
-      ${{ parameters.prefix }} python -m pip install thinc-apple-ops
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy
-    displayName: "Run CPU tests with thinc-apple-ops"
-    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.9'))
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b959262e3..df59697b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
       language_version: python3.7
       additional_dependencies: ['click==8.0.4']
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 5.0.4
     hooks:
     - id: flake8
       args:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 4624b2eb2..a26d26974 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -31,7 +31,7 @@ jobs:
         inputs:
           versionSpec: "3.7"
       - script: |
-          pip install flake8==3.9.2
+          pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics
         displayName: "flake8"
 
@@ -41,7 +41,7 @@ jobs:
       matrix:
         # We're only running one platform per Python version to speed up builds
         Python36Linux:
-          imageName: "ubuntu-latest"
+          imageName: "ubuntu-20.04"
           python.version: "3.6"
         #        Python36Windows:
         #          imageName: "windows-latest"
@@ -50,7 +50,7 @@ jobs:
         #          imageName: "macos-latest"
         #          python.version: "3.6"
         #        Python37Linux:
-        #          imageName: "ubuntu-latest"
+        #          imageName: "ubuntu-20.04"
         #          python.version: "3.7"
         Python37Windows:
           imageName: "windows-latest"
@@ -92,20 +92,3 @@ jobs:
       - template: .github/azure-steps.yml
         parameters:
           python_version: '$(python.version)'
-          architecture: 'x64'
-
-#  - job: "TestGPU"
-#    dependsOn: "Validate"
-#    strategy:
-#      matrix:
-#        Python38LinuxX64_GPU:
-#          python.version: '3.8'
-#    pool:
-#      name: "LinuxX64_GPU"
-#    steps:
-#      - template: .github/azure-steps.yml
-#        parameters:
-#          python_version: '$(python.version)'
-#          architecture: 'x64'
-#          gpu: true
-#          num_build_jobs: 24
diff --git a/requirements.txt b/requirements.txt
index 69cefa3f6..783e6f0f8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,6 +12,7 @@ srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<0.5.0
 pathy>=0.3.5
+smart-open>=5.2.1,<7.0.0
 # Third party dependencies
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
diff --git a/setup.cfg b/setup.cfg
index f2c0c6958..97e6efc21 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -51,9 +51,10 @@ install_requires =
     wasabi>=0.9.1,<1.1.0
     srsly>=2.4.3,<3.0.0
     catalogue>=2.0.6,<2.1.0
+    # Third-party dependencies
     typer>=0.3.0,<0.5.0
     pathy>=0.3.5
-    # Third-party dependencies
+    smart-open>=5.2.1,<7.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
diff --git a/spacy/about.py b/spacy/about.py
index 42f059d16..b4ef29260 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.3.1"
+__version__ = "3.3.2"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index df98e711f..2f8e492a7 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
     if dest.exists() and not force:
         return None
     src = str(src)
-    with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
+    with smart_open.open(src, mode="rb", compression="disable") as input_file:
         with dest.open(mode="wb") as output_file:
             shutil.copyfileobj(input_file, output_file)
 
diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index 5d49b6eb7..8bf84981b 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -227,12 +227,13 @@ def parse_spans(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
             "kb_id": span.kb_id_ if span.kb_id_ else "",
             "kb_url": kb_url_template.format(span.kb_id_) if kb_url_template else "#",
         }
-        for span in doc.spans[spans_key]
+        for span in doc.spans.get(spans_key, [])
     ]
     tokens = [token.text for token in doc]
 
     if not spans:
-        warnings.warn(Warnings.W117.format(spans_key=spans_key))
+        keys = list(doc.spans.keys())
+        warnings.warn(Warnings.W117.format(spans_key=spans_key, keys=keys))
     title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
     settings = get_doc_settings(doc)
     return {
diff --git a/spacy/errors.py b/spacy/errors.py
index 60985fbd7..1f1c73f5f 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -195,7 +195,7 @@ class Warnings(metaclass=ErrorsWithCodes):
     W117 = ("No spans to visualize found in Doc object with spans_key: '{spans_key}'. If this is "
             "surprising to you, make sure the Doc was processed using a model "
             "that supports span categorization, and check the `doc.spans[spans_key]` "
-            "property manually if necessary.")
+            "property manually if necessary.\n\nAvailable keys: {keys}")
     W118 = ("Term '{term}' not found in glossary. It may however be explained in documentation "
             "for the corpora used to train the language. Please check "
             "`nlp.meta[\"sources\"]` for any relevant links.")
@@ -335,6 +335,11 @@ class Errors(metaclass=ErrorsWithCodes):
             "clear the existing vectors and resize the table.")
     E074 = ("Error interpreting compiled match pattern: patterns are expected "
             "to end with the attribute {attr}. Got: {bad_attr}.")
+    E079 = ("Error computing states in beam: number of predicted beams "
+            "({pbeams}) does not equal number of gold beams ({gbeams}).")
+    E080 = ("Duplicate state found in beam: {key}.")
+    E081 = ("Error getting gradient in beam: number of histories ({n_hist}) "
+            "does not equal number of losses ({losses}).")
     E082 = ("Error deprojectivizing parse: number of heads ({n_heads}), "
             "projective heads ({n_proj_heads}) and labels ({n_labels}) do not "
             "match.")
diff --git a/spacy/lang/ko/punctuation.py b/spacy/lang/ko/punctuation.py
index 7f7b40c5b..f5f1c51da 100644
--- a/spacy/lang/ko/punctuation.py
+++ b/spacy/lang/ko/punctuation.py
@@ -3,7 +3,7 @@ from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
 
 
 _infixes = (
-    ["·", "ㆍ", "\(", "\)"]
+    ["·", "ㆍ", r"\(", r"\)"]
     + [r"(?<=[0-9])~(?=[0-9-])"]
     + LIST_QUOTES
     + BASE_TOKENIZER_INFIXES
diff --git a/spacy/ml/_precomputable_affine.py b/spacy/ml/_precomputable_affine.py
index b99de2d2b..88c415754 100644
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@@ -22,9 +22,15 @@ def forward(model, X, is_train):
     nP = model.get_dim("nP")
     nI = model.get_dim("nI")
     W = model.get_param("W")
-    Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True)
+    # Preallocate array for layer output, including padding.
+    Yf = model.ops.alloc2f(X.shape[0] + 1, nF * nO * nP)
+    model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True, out=Yf[1:])
     Yf = Yf.reshape((Yf.shape[0], nF, nO, nP))
-    Yf = model.ops.xp.vstack((model.get_param("pad"), Yf))
+
+    # Set padding. Padding has shape (1, nF, nO, nP). Unfortunately, we cannot
+    # change its shape to (nF, nO, nP) without breaking existing models. So
+    # we'll squeeze the first dimension here.
+    Yf[0] = model.ops.xp.squeeze(model.get_param("pad"), 0)
 
     def backward(dY_ids):
         # This backprop is particularly tricky, because we get back a different
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index 54a7030dc..7507eb35b 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -331,9 +331,9 @@ class EditTreeLemmatizer(TrainablePipe):
 
             tree = dict(tree)
             if "orig" in tree:
-                tree["orig"] = self.vocab.strings[tree["orig"]]
+                tree["orig"] = self.vocab.strings.add(tree["orig"])
             if "orig" in tree:
-                tree["subst"] = self.vocab.strings[tree["subst"]]
+                tree["subst"] = self.vocab.strings.add(tree["subst"])
 
             trees.append(tree)
 
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 0a6138fbc..e3fea5946 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -269,7 +269,10 @@ class SpanCategorizer(TrainablePipe):
         DOCS: https://spacy.io/api/spancategorizer#predict
         """
         indices = self.suggester(docs, ops=self.model.ops)
-        scores = self.model.predict((docs, indices))  # type: ignore
+        if indices.lengths.sum() == 0:
+            scores = self.model.ops.alloc2f(0, 0)
+        else:
+            scores = self.model.predict((docs, indices))  # type: ignore
         return indices, scores
 
     def set_candidates(
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index c334cc6eb..1f2d7d999 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -123,14 +123,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
 
     # head before start
     arr = doc.to_array(["HEAD"])
-    arr[0] = -1
+    arr[0] = numpy.int32(-1).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
 
     # head after end
     arr = doc.to_array(["HEAD"])
-    arr[0] = 5
+    arr[0] = numpy.int32(5).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index dd4942989..a64ab2ba8 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -3,6 +3,7 @@ import weakref
 import numpy
 from numpy.testing import assert_array_equal
 import pytest
+import warnings
 from thinc.api import NumpyOps, get_current_ops
 
 from spacy.attrs import DEP, ENT_IOB, ENT_TYPE, HEAD, IS_ALPHA, MORPH, POS
@@ -529,9 +530,9 @@ def test_doc_from_array_sent_starts(en_vocab):
     # no warning using default attrs
     attrs = doc._get_array_attrs()
     arr = doc.to_array(attrs)
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         new_doc.from_array(attrs, arr)
-        assert len(record) == 0
     # only SENT_START uses SENT_START
     attrs = [SENT_START]
     arr = doc.to_array(attrs)
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 3810323bf..9ca7f441b 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -2,6 +2,9 @@ import pytest
 from spacy.tokens import Doc
 
 
+pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
+
+
 def test_ru_doc_lemmatization(ru_lemmatizer):
     words = ["мама", "мыла", "раму"]
     pos = ["NOUN", "VERB", "NOUN"]
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index 4a787b2a6..57dd4198a 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -1,6 +1,10 @@
+import pytest
 from spacy.tokens import Doc
 
 
+pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
+
+
 def test_uk_lemmatizer(uk_lemmatizer):
     """Check that the default uk lemmatizer runs."""
     doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py
index 3b24f3ba8..8a8d9eb84 100644
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@@ -1,4 +1,5 @@
 import pytest
+import warnings
 import srsly
 from mock import Mock
 
@@ -344,13 +345,13 @@ def test_phrase_matcher_validation(en_vocab):
         matcher.add("TEST1", [doc1])
     with pytest.warns(UserWarning):
         matcher.add("TEST2", [doc2])
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         matcher.add("TEST3", [doc3])
-        assert not record.list
     matcher = PhraseMatcher(en_vocab, attr="POS", validate=True)
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         matcher.add("TEST4", [doc2])
-        assert not record.list
 
 
 def test_attr_validation(en_vocab):
diff --git a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
index cf541e301..b12ca5dd4 100644
--- a/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
+++ b/spacy/tests/pipeline/test_edit_tree_lemmatizer.py
@@ -60,10 +60,45 @@ def test_initialize_from_labels():
     nlp2 = Language()
     lemmatizer2 = nlp2.add_pipe("trainable_lemmatizer")
     lemmatizer2.initialize(
-        get_examples=lambda: train_examples,
+        # We want to check that the strings in replacement nodes are
+        # added to the string store. Avoid that they get added through
+        # the examples.
+        get_examples=lambda: train_examples[:1],
         labels=lemmatizer.label_data,
     )
     assert lemmatizer2.tree2label == {1: 0, 3: 1, 4: 2, 6: 3}
+    assert lemmatizer2.label_data == {
+        "trees": [
+            {"orig": "S", "subst": "s"},
+            {
+                "prefix_len": 1,
+                "suffix_len": 0,
+                "prefix_tree": 0,
+                "suffix_tree": 4294967295,
+            },
+            {"orig": "s", "subst": ""},
+            {
+                "prefix_len": 0,
+                "suffix_len": 1,
+                "prefix_tree": 4294967295,
+                "suffix_tree": 2,
+            },
+            {
+                "prefix_len": 0,
+                "suffix_len": 0,
+                "prefix_tree": 4294967295,
+                "suffix_tree": 4294967295,
+            },
+            {"orig": "E", "subst": "e"},
+            {
+                "prefix_len": 1,
+                "suffix_len": 0,
+                "prefix_tree": 5,
+                "suffix_tree": 4294967295,
+            },
+        ],
+        "labels": (1, 3, 4, 6),
+    }
 
 
 def test_no_data():
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index a6cfead77..a45679b63 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -1048,6 +1048,10 @@ def test_no_gold_ents(patterns):
     for eg in train_examples:
         eg.predicted = ruler(eg.predicted)
 
+    # Entity ruler is no longer needed (initialization below wipes out the
+    # patterns and causes warnings)
+    nlp.remove_pipe("entity_ruler")
+
     def create_kb(vocab):
         # create artificial KB
         mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index 15256a763..e9db983d3 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -372,24 +372,39 @@ def test_overfitting_IO_overlapping():
 
 
 def test_zero_suggestions():
-    # Test with a suggester that returns 0 suggestions
+    # Test with a suggester that can return 0 suggestions
 
-    @registry.misc("test_zero_suggester")
-    def make_zero_suggester():
-        def zero_suggester(docs, *, ops=None):
+    @registry.misc("test_mixed_zero_suggester")
+    def make_mixed_zero_suggester():
+        def mixed_zero_suggester(docs, *, ops=None):
             if ops is None:
                 ops = get_current_ops()
-            return Ragged(
-                ops.xp.zeros((0, 0), dtype="i"), ops.xp.zeros((len(docs),), dtype="i")
-            )
+            spans = []
+            lengths = []
+            for doc in docs:
+                if len(doc) > 0 and len(doc) % 2 == 0:
+                    spans.append((0, 1))
+                    lengths.append(1)
+                else:
+                    lengths.append(0)
+            spans = ops.asarray2i(spans)
+            lengths_array = ops.asarray1i(lengths)
+            if len(spans) > 0:
+                output = Ragged(ops.xp.vstack(spans), lengths_array)
+            else:
+                output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
+            return output
 
-        return zero_suggester
+        return mixed_zero_suggester
 
     fix_random_seed(0)
     nlp = English()
     spancat = nlp.add_pipe(
         "spancat",
-        config={"suggester": {"@misc": "test_zero_suggester"}, "spans_key": SPAN_KEY},
+        config={
+            "suggester": {"@misc": "test_mixed_zero_suggester"},
+            "spans_key": SPAN_KEY,
+        },
     )
     train_examples = make_examples(nlp)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
@@ -397,6 +412,16 @@ def test_zero_suggestions():
     assert set(spancat.labels) == {"LOC", "PERSON"}
 
     nlp.update(train_examples, sgd=optimizer)
+    # empty doc
+    nlp("")
+    # single doc with zero suggestions
+    nlp("one")
+    # single doc with one suggestion
+    nlp("two two")
+    # batch with mixed zero/one suggestions
+    list(nlp.pipe(["one", "two two", "three three three", "", "four four four four"]))
+    # batch with no suggestions
+    list(nlp.pipe(["", "one", "three three three"]))
 
 
 def test_set_candidates():
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 838e00369..b04c49f47 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -15,6 +15,7 @@ from spacy.cli._util import is_subpath_of, load_project_config
 from spacy.cli._util import parse_config_overrides, string_to_list
 from spacy.cli._util import substitute_project_variables
 from spacy.cli._util import validate_project_commands
+from spacy.cli._util import upload_file, download_file
 from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
 from spacy.cli.debug_data import _get_labels_from_spancat
 from spacy.cli.debug_data import _get_distribution, _get_kl_divergence
@@ -855,3 +856,18 @@ def test_span_length_freq_dist_output_must_be_correct():
     span_freqs = _get_spans_length_freq_dist(sample_span_lengths, threshold)
     assert sum(span_freqs.values()) >= threshold
     assert list(span_freqs.keys()) == [3, 1, 4, 5, 2]
+
+
+def test_upload_download_local_file():
+    with make_tempdir() as d1, make_tempdir() as d2:
+        filename = "f.txt"
+        content = "content"
+        local_file = d1 / filename
+        remote_file = d2 / filename
+        with local_file.open(mode="w") as file_:
+            file_.write(content)
+        upload_file(local_file, remote_file)
+        local_file.unlink()
+        download_file(remote_file, local_file)
+        with local_file.open(mode="r") as file_:
+            assert file_.read() == content
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index ccc145b44..f298b38e0 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -203,6 +203,16 @@ def test_displacy_parse_spans_different_spans_key(en_vocab):
     ]
 
 
+def test_displacy_parse_empty_spans_key(en_vocab):
+    """Test that having an unset spans key doesn't raise an error"""
+    doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
+    doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
+    with pytest.warns(UserWarning, match="W117"):
+        spans = displacy.parse_spans(doc)
+
+    assert isinstance(spans, dict)
+
+
 def test_displacy_parse_ents(en_vocab):
     """Test that named entities on a Doc are converted into displaCy's format."""
     doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 2306cabb7..d91ed1201 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -23,7 +23,7 @@ def get_textcat_bow_kwargs():
 
 
 def get_textcat_cnn_kwargs():
-    return {"tok2vec": test_tok2vec(), "exclusive_classes": False, "nO": 13}
+    return {"tok2vec": make_test_tok2vec(), "exclusive_classes": False, "nO": 13}
 
 
 def get_all_params(model):
@@ -65,7 +65,7 @@ def get_tok2vec_kwargs():
     }
 
 
-def test_tok2vec():
+def make_test_tok2vec():
     return build_Tok2Vec_model(**get_tok2vec_kwargs())
 
 
diff --git a/spacy/tests/vocab_vectors/test_similarity.py b/spacy/tests/vocab_vectors/test_similarity.py
index 47cd1f060..e1f95b28b 100644
--- a/spacy/tests/vocab_vectors/test_similarity.py
+++ b/spacy/tests/vocab_vectors/test_similarity.py
@@ -7,7 +7,7 @@ from ..util import get_cosine, add_vecs_to_vocab
 
 @pytest.fixture
 def vectors():
-    return [("apple", [1, 2, 3]), ("orange", [-1, -2, -3])]
+    return [("apple", [1, 2, 3]), ("orange", [-1, -2, -5])]
 
 
 @pytest.fixture()
@@ -71,19 +71,17 @@ def test_vectors_similarity_DD(vocab, vectors):
 def test_vectors_similarity_TD(vocab, vectors):
     [(word1, vec1), (word2, vec2)] = vectors
     doc = Doc(vocab, words=[word1, word2])
-    with pytest.warns(UserWarning):
-        assert isinstance(doc.similarity(doc[0]), float)
-        assert isinstance(doc[0].similarity(doc), float)
-        assert doc.similarity(doc[0]) == doc[0].similarity(doc)
+    assert isinstance(doc.similarity(doc[0]), float)
+    assert isinstance(doc[0].similarity(doc), float)
+    assert doc.similarity(doc[0]) == doc[0].similarity(doc)
 
 
 def test_vectors_similarity_TS(vocab, vectors):
     [(word1, vec1), (word2, vec2)] = vectors
     doc = Doc(vocab, words=[word1, word2])
-    with pytest.warns(UserWarning):
-        assert isinstance(doc[:2].similarity(doc[0]), float)
-        assert isinstance(doc[0].similarity(doc[-2]), float)
-        assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
+    assert isinstance(doc[:2].similarity(doc[0]), float)
+    assert isinstance(doc[0].similarity(doc[-2]), float)
+    assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
 
 
 def test_vectors_similarity_DS(vocab, vectors):
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index e38de02b4..9d3bd5b69 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -359,6 +359,7 @@ cdef class Doc:
             for annot in annotations:
                 if annot:
                     if annot is heads or annot is sent_starts or annot is ent_iobs:
+                        annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64)
                         for i in range(len(words)):
                             if attrs.ndim == 1:
                                 attrs[i] = annot[i]
@@ -1557,6 +1558,7 @@ cdef class Doc:
 
             for j, (attr, annot) in enumerate(token_annotations.items()):
                 if attr is HEAD:
+                    annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64)
                     for i in range(len(words)):
                         array[i, j] = annot[i]
                 elif attr is MORPH:
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index ab888ae95..fd96f03f0 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -299,7 +299,7 @@ cdef class Span:
                     for ancestor in ancestors:
                         ancestor_i = ancestor.i - self.c.start
                         if ancestor_i in range(length):
-                            array[i, head_col] = ancestor_i - i
+                            array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
 
                 # if there is no appropriate ancestor, define a new artificial root
                 value = array[i, head_col]
@@ -307,7 +307,7 @@ cdef class Span:
                     new_root = old_to_new_root.get(ancestor_i, None)
                     if new_root is not None:
                         # take the same artificial root as a previous token from the same sentence
-                        array[i, head_col] = new_root - i
+                        array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
                     else:
                         # set this token as the new artificial root
                         array[i, head_col] = 0
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 3035388a6..ce4f746ac 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -353,26 +353,27 @@ def _annot2array(vocab, tok_annot, doc_annot):
         if key not in IDS:
             raise ValueError(Errors.E974.format(obj="token", key=key))
         elif key in ["ORTH", "SPACY"]:
-            pass
+            continue
         elif key == "HEAD":
             attrs.append(key)
-            values.append([h-i if h is not None else 0 for i, h in enumerate(value)])
+            row = [h-i if h is not None else 0 for i, h in enumerate(value)]
         elif key == "DEP":
             attrs.append(key)
-            values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value])
+            row = [vocab.strings.add(h) if h is not None else MISSING_DEP for h in value]
         elif key == "SENT_START":
             attrs.append(key)
-            values.append([to_ternary_int(v) for v in value])
+            row = [to_ternary_int(v) for v in value]
         elif key == "MORPH":
             attrs.append(key)
-            values.append([vocab.morphology.add(v) for v in value])
+            row = [vocab.morphology.add(v) for v in value]
         else:
             attrs.append(key)
             if not all(isinstance(v, str) for v in value):
                 types = set([type(v) for v in value])
                 raise TypeError(Errors.E969.format(field=key, types=types)) from None
-            values.append([vocab.strings.add(v) for v in value])
-    array = numpy.asarray(values, dtype="uint64")
+            row = [vocab.strings.add(v) for v in value]
+        values.append([numpy.array(v, dtype=numpy.int32).astype(numpy.uint64) if v < 0 else v for v in row])
+    array = numpy.array(values, dtype=numpy.uint64)
     return attrs, array.T
 
 
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 48ff7b589..6304e4a84 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -337,3 +337,5 @@ def ensure_shape(vectors_loc):
         # store all the results in a list in memory
         lines2 = open_file(vectors_loc)
         yield from lines2
+        lines2.close()
+    lines.close()