diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 80c88b0b8..7e3f94df6 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -1,9 +1,6 @@
 parameters:
   python_version: ''
-  architecture: ''
-  prefix: ''
-  gpu: false
-  num_build_jobs: 1
+  architecture: 'x64'
 
 steps:
   - task: UsePythonVersion@0
@@ -16,16 +13,16 @@ steps:
     displayName: 'Set variables'
 
   - script: |
-      ${{ parameters.prefix }} python -m pip install -U pip setuptools
-      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
+      python -m pip install -U build pip setuptools
+      python -m pip install -U -r requirements.txt
     displayName: "Install dependencies"
 
   - script: |
-      ${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
-      ${{ parameters.prefix }} python setup.py sdist --formats=gztar
-    displayName: "Compile and build sdist"
+      python -m build --sdist
+    displayName: "Build sdist"
 
-  - script: python -m mypy spacy
+  - script: |
+      python -m mypy spacy
     displayName: 'Run mypy'
     condition: ne(variables['python_version'], '3.10')
 
@@ -34,35 +31,24 @@ steps:
       contents: "spacy"
     displayName: "Delete source directory"
 
+  - task: DeleteFiles@1
+    inputs:
+      contents: "*.egg-info"
+    displayName: "Delete egg-info directory"
+
   - script: |
-      ${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
-      ${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
+      python -m pip freeze > installed.txt
+      python -m pip uninstall -y -r installed.txt
     displayName: "Uninstall all packages"
 
   - bash: |
-      ${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
-      ${{ parameters.prefix }} python -m pip install dist/$SDIST
+      SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+      python -m pip install dist/$SDIST
     displayName: "Install from sdist"
 
   - script: |
-      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
-    displayName: "Install test requirements"
-
-  - script: |
-      ${{ parameters.prefix }} python -m pip install -U cupy-cuda110 -f https://github.com/cupy/cupy/releases/v9.0.0
-      ${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
-    displayName: "Install GPU requirements"
-    condition: eq(${{ parameters.gpu }}, true)
-
-  - script: |
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy
-    displayName: "Run CPU tests"
-    condition: eq(${{ parameters.gpu }}, false)
-
-  - script: |
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu
-    displayName: "Run GPU tests"
-    condition: eq(${{ parameters.gpu }}, true)
+      python -W error -c "import spacy"
+    displayName: "Test import"
 
   - script: |
       python -m spacy download ca_core_news_sm
@@ -105,13 +91,21 @@ steps:
     displayName: 'Test assemble CLI vectors warning'
     condition: eq(variables['python_version'], '3.8')
 
+  - script: |
+      python -m pip install -U -r requirements.txt
+    displayName: "Install test requirements"
+
+  - script: |
+      python -m pytest --pyargs spacy -W error
+    displayName: "Run CPU tests"
+
+  - script: |
+      python -m pip install 'spacy[apple]'
+      python -m pytest --pyargs spacy
+    displayName: "Run CPU tests with thinc-apple-ops"
+    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.10'))
+
   - script: |
       python .github/validate_universe_json.py website/meta/universe.json
     displayName: 'Test website/meta/universe.json'
     condition: eq(variables['python_version'], '3.8')
-
-  - script: |
-      ${{ parameters.prefix }} python -m pip install thinc-apple-ops
-      ${{ parameters.prefix }} python -m pytest --pyargs spacy
-    displayName: "Run CPU tests with thinc-apple-ops"
-    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.9'))
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a7a12fd24..2612c9a81 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -5,7 +5,7 @@ repos:
     - id: black
       language_version: python3.7
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.9.2
+    rev: 5.0.4
     hooks:
     - id: flake8
       args:
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 71a793911..57f5c8727 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -29,7 +29,7 @@ jobs:
         inputs:
           versionSpec: "3.7"
       - script: |
-          pip install flake8==3.9.2
+          pip install flake8==5.0.4
           python -m flake8 spacy --count --select=E901,E999,F821,F822,F823 --show-source --statistics
         displayName: "flake8"
 
@@ -39,7 +39,7 @@ jobs:
       matrix:
         # We're only running one platform per Python version to speed up builds
         Python36Linux:
-          imageName: "ubuntu-latest"
+          imageName: "ubuntu-20.04"
           python.version: "3.6"
         #        Python36Windows:
         #          imageName: "windows-latest"
@@ -48,7 +48,7 @@ jobs:
         #          imageName: "macos-latest"
         #          python.version: "3.6"
         #        Python37Linux:
-        #          imageName: "ubuntu-latest"
+        #          imageName: "ubuntu-20.04"
         #          python.version: "3.7"
         Python37Windows:
           imageName: "windows-latest"
@@ -90,20 +90,3 @@ jobs:
       - template: .github/azure-steps.yml
         parameters:
           python_version: '$(python.version)'
-          architecture: 'x64'
-
-#  - job: "TestGPU"
-#    dependsOn: "Validate"
-#    strategy:
-#      matrix:
-#        Python38LinuxX64_GPU:
-#          python.version: '3.8'
-#    pool:
-#      name: "LinuxX64_GPU"
-#    steps:
-#      - template: .github/azure-steps.yml
-#        parameters:
-#          python_version: '$(python.version)'
-#          architecture: 'x64'
-#          gpu: true
-#          num_build_jobs: 24
diff --git a/requirements.txt b/requirements.txt
index d76135c71..d36f38d15 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,8 +10,8 @@ wasabi>=0.8.1,<1.1.0
 srsly>=2.4.1,<3.0.0
 catalogue>=2.0.6,<2.1.0
 typer>=0.3.0,<0.5.0
-click<8.1.0
 pathy>=0.3.5
+smart-open>=5.2.1,<7.0.0
 # Third party dependencies
 numpy>=1.15.0
 requests>=2.13.0,<3.0.0
@@ -28,7 +28,7 @@ cython>=0.25,<3.0
 pytest>=5.2.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<3.10.0
+flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
 mypy==0.910
 types-dataclasses>=0.1.3; python_version < "3.7"
diff --git a/setup.cfg b/setup.cfg
index b146577c4..8517b77b2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -50,10 +50,10 @@ install_requires =
     wasabi>=0.8.1,<1.1.0
     srsly>=2.4.1,<3.0.0
     catalogue>=2.0.6,<2.1.0
-    typer>=0.3.0,<0.5.0
-    click<8.1.0
-    pathy>=0.3.5
     # Third-party dependencies
+    typer>=0.3.0,<0.5.0
+    pathy>=0.3.5
+    smart-open>=5.2.1,<7.0.0
     tqdm>=4.38.0,<5.0.0
     numpy>=1.15.0
     requests>=2.13.0,<3.0.0
diff --git a/spacy/about.py b/spacy/about.py
index 37b9f2556..ef41b112b 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.1.6"
+__version__ = "3.1.7"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py
index fb680d888..1b9740d0b 100644
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
     if dest.exists() and not force:
         return None
     src = str(src)
-    with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
+    with smart_open.open(src, mode="rb", compression="disable") as input_file:
         with dest.open(mode="wb") as output_file:
             output_file.write(input_file.read())
 
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index b78806fec..7304cab5b 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -1,7 +1,7 @@
 {# This is a template for training configs used for the quickstart widget in
 the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
-{%- set use_transformer = hardware != "cpu" -%}
+{%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
 [paths]
 train = null
diff --git a/spacy/errors.py b/spacy/errors.py
index 2da52e3b8..a29d62d75 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -316,6 +316,11 @@ class Errors(metaclass=ErrorsWithCodes):
             "clear the existing vectors and resize the table.")
     E074 = ("Error interpreting compiled match pattern: patterns are expected "
             "to end with the attribute {attr}. Got: {bad_attr}.")
+    E079 = ("Error computing states in beam: number of predicted beams "
+            "({pbeams}) does not equal number of gold beams ({gbeams}).")
+    E080 = ("Duplicate state found in beam: {key}.")
+    E081 = ("Error getting gradient in beam: number of histories ({n_hist}) "
+            "does not equal number of losses ({losses}).")
     E082 = ("Error deprojectivizing parse: number of heads ({n_heads}), "
             "projective heads ({n_proj_heads}) and labels ({n_labels}) do not "
             "match.")
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 7c5d3386e..b4e08502b 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -229,7 +229,10 @@ class SpanCategorizer(TrainablePipe):
         DOCS: https://spacy.io/api/spancategorizer#predict
         """
         indices = self.suggester(docs, ops=self.model.ops)
-        scores = self.model.predict((docs, indices))  # type: ignore
+        if indices.lengths.sum() == 0:
+            scores = self.model.ops.alloc2f(0, 0)
+        else:
+            scores = self.model.predict((docs, indices))  # type: ignore
         return indices, scores
 
     def set_annotations(self, docs: Iterable[Doc], indices_scores) -> None:
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index ef54c581c..94f4cb140 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -1,4 +1,6 @@
 import pytest
+import numpy
+
 from spacy.tokens import Doc
 from spacy.attrs import ORTH, SHAPE, POS, DEP, MORPH
 
@@ -100,14 +102,14 @@ def test_doc_from_array_heads_in_bounds(en_vocab):
 
     # head before start
     arr = doc.to_array(["HEAD"])
-    arr[0] = -1
+    arr[0] = numpy.int32(-1).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
 
     # head after end
     arr = doc.to_array(["HEAD"])
-    arr[0] = 5
+    arr[0] = numpy.int32(5).astype(numpy.uint64)
     doc_from_array = Doc(en_vocab, words=words)
     with pytest.raises(ValueError):
         doc_from_array.from_array(["HEAD"], arr)
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 57df87642..316131848 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -2,6 +2,7 @@ import weakref
 
 import pytest
 import numpy
+import warnings
 
 from spacy.lang.xx import MultiLanguage
 from spacy.tokens import Doc, Span, Token
@@ -311,9 +312,9 @@ def test_doc_from_array_sent_starts(en_vocab):
     # no warning using default attrs
     attrs = doc._get_array_attrs()
     arr = doc.to_array(attrs)
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         new_doc.from_array(attrs, arr)
-        assert len(record) == 0
     # only SENT_START uses SENT_START
     attrs = [SENT_START]
     arr = doc.to_array(attrs)
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 3810323bf..9ca7f441b 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -2,6 +2,9 @@ import pytest
 from spacy.tokens import Doc
 
 
+pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
+
+
 def test_ru_doc_lemmatization(ru_lemmatizer):
     words = ["мама", "мыла", "раму"]
     pos = ["NOUN", "VERB", "NOUN"]
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index 4a787b2a6..57dd4198a 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -1,6 +1,10 @@
+import pytest
 from spacy.tokens import Doc
 
 
+pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning")
+
+
 def test_uk_lemmatizer(uk_lemmatizer):
     """Check that the default uk lemmatizer runs."""
     doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py
index 478949601..ffcd206d5 100644
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@@ -1,4 +1,5 @@
 import pytest
+import warnings
 import srsly
 from mock import Mock
 from spacy.matcher import PhraseMatcher
@@ -197,13 +198,13 @@ def test_phrase_matcher_validation(en_vocab):
         matcher.add("TEST1", [doc1])
     with pytest.warns(UserWarning):
         matcher.add("TEST2", [doc2])
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         matcher.add("TEST3", [doc3])
-        assert not record.list
     matcher = PhraseMatcher(en_vocab, attr="POS", validate=True)
-    with pytest.warns(None) as record:
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
         matcher.add("TEST4", [doc2])
-        assert not record.list
 
 
 def test_attr_validation(en_vocab):
diff --git a/spacy/tests/pipeline/test_spancat.py b/spacy/tests/pipeline/test_spancat.py
index 2f7e952d3..53d997400 100644
--- a/spacy/tests/pipeline/test_spancat.py
+++ b/spacy/tests/pipeline/test_spancat.py
@@ -369,24 +369,39 @@ def test_overfitting_IO_overlapping():
 
 
 def test_zero_suggestions():
-    # Test with a suggester that returns 0 suggestions
+    # Test with a suggester that can return 0 suggestions
 
-    @registry.misc("test_zero_suggester")
-    def make_zero_suggester():
-        def zero_suggester(docs, *, ops=None):
+    @registry.misc("test_mixed_zero_suggester")
+    def make_mixed_zero_suggester():
+        def mixed_zero_suggester(docs, *, ops=None):
             if ops is None:
                 ops = get_current_ops()
-            return Ragged(
-                ops.xp.zeros((0, 0), dtype="i"), ops.xp.zeros((len(docs),), dtype="i")
-            )
+            spans = []
+            lengths = []
+            for doc in docs:
+                if len(doc) > 0 and len(doc) % 2 == 0:
+                    spans.append((0, 1))
+                    lengths.append(1)
+                else:
+                    lengths.append(0)
+            spans = ops.asarray2i(spans)
+            lengths_array = ops.asarray1i(lengths)
+            if len(spans) > 0:
+                output = Ragged(ops.xp.vstack(spans), lengths_array)
+            else:
+                output = Ragged(ops.xp.zeros((0, 0), dtype="i"), lengths_array)
+            return output
 
-        return zero_suggester
+        return mixed_zero_suggester
 
     fix_random_seed(0)
     nlp = English()
     spancat = nlp.add_pipe(
         "spancat",
-        config={"suggester": {"@misc": "test_zero_suggester"}, "spans_key": SPAN_KEY},
+        config={
+            "suggester": {"@misc": "test_mixed_zero_suggester"},
+            "spans_key": SPAN_KEY,
+        },
     )
     train_examples = make_examples(nlp)
     optimizer = nlp.initialize(get_examples=lambda: train_examples)
@@ -394,3 +409,13 @@ def test_zero_suggestions():
     assert set(spancat.labels) == {"LOC", "PERSON"}
 
     nlp.update(train_examples, sgd=optimizer)
+    # empty doc
+    nlp("")
+    # single doc with zero suggestions
+    nlp("one")
+    # single doc with one suggestion
+    nlp("two two")
+    # batch with mixed zero/one suggestions
+    list(nlp.pipe(["one", "two two", "three three three", "", "four four four four"]))
+    # batch with no suggestions
+    list(nlp.pipe(["", "one", "three three three"]))
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 72bbe04e5..894a41b73 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -11,6 +11,7 @@ from spacy.cli._util import validate_project_commands, parse_config_overrides
 from spacy.cli._util import load_project_config, substitute_project_variables
 from spacy.cli._util import is_subpath_of
 from spacy.cli._util import string_to_list
+from spacy.cli._util import upload_file, download_file
 from spacy import about
 from spacy.util import get_minor_version
 from spacy.cli.validate import get_model_pkgs
@@ -574,3 +575,18 @@ def test_get_third_party_dependencies():
 )
 def test_is_subpath_of(parent, child, expected):
     assert is_subpath_of(parent, child) == expected
+
+
+def test_upload_download_local_file():
+    with make_tempdir() as d1, make_tempdir() as d2:
+        filename = "f.txt"
+        content = "content"
+        local_file = d1 / filename
+        remote_file = d2 / filename
+        with local_file.open(mode="w") as file_:
+            file_.write(content)
+        upload_file(local_file, remote_file)
+        local_file.unlink()
+        download_file(remote_file, local_file)
+        with local_file.open(mode="r") as file_:
+            assert file_.read() == content
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 2306cabb7..d91ed1201 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -23,7 +23,7 @@ def get_textcat_bow_kwargs():
 
 
 def get_textcat_cnn_kwargs():
-    return {"tok2vec": test_tok2vec(), "exclusive_classes": False, "nO": 13}
+    return {"tok2vec": make_test_tok2vec(), "exclusive_classes": False, "nO": 13}
 
 
 def get_all_params(model):
@@ -65,7 +65,7 @@ def get_tok2vec_kwargs():
     }
 
 
-def test_tok2vec():
+def make_test_tok2vec():
     return build_Tok2Vec_model(**get_tok2vec_kwargs())
 
 
diff --git a/spacy/tests/vocab_vectors/test_similarity.py b/spacy/tests/vocab_vectors/test_similarity.py
index b5f7303b5..91b2f5b05 100644
--- a/spacy/tests/vocab_vectors/test_similarity.py
+++ b/spacy/tests/vocab_vectors/test_similarity.py
@@ -7,7 +7,7 @@ from ..util import get_cosine, add_vecs_to_vocab
 
 @pytest.fixture
 def vectors():
-    return [("apple", [1, 2, 3]), ("orange", [-1, -2, -3])]
+    return [("apple", [1, 2, 3]), ("orange", [-1, -2, -5])]
 
 
 @pytest.fixture()
@@ -44,8 +44,7 @@ def test_vectors_similarity_TT(vocab, vectors):
 def test_vectors_similarity_TD(vocab, vectors):
     [(word1, vec1), (word2, vec2)] = vectors
     doc = Doc(vocab, words=[word1, word2])
-    with pytest.warns(UserWarning):
-        assert doc.similarity(doc[0]) == doc[0].similarity(doc)
+    assert doc.similarity(doc[0]) == doc[0].similarity(doc)
 
 
 def test_vectors_similarity_DS(vocab, vectors):
@@ -57,5 +56,4 @@ def test_vectors_similarity_DS(vocab, vectors):
 def test_vectors_similarity_TS(vocab, vectors):
     [(word1, vec1), (word2, vec2)] = vectors
     doc = Doc(vocab, words=[word1, word2])
-    with pytest.warns(UserWarning):
-        assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
+    assert doc[:2].similarity(doc[0]) == doc[0].similarity(doc[:2])
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 1ee845934..111d35974 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -356,6 +356,7 @@ cdef class Doc:
             for annot in annotations:
                 if annot:
                     if annot is heads or annot is sent_starts or annot is ent_iobs:
+                        annot = numpy.array(annot, dtype=numpy.int32).astype(numpy.uint64)
                         for i in range(len(words)):
                             if attrs.ndim == 1:
                                 attrs[i] = annot[i]
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index c9c807d7d..f2a41fa3c 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -307,7 +307,7 @@ cdef class Span:
                     for ancestor in ancestors:
                         ancestor_i = ancestor.i - self.c.start
                         if ancestor_i in range(length):
-                            array[i, head_col] = ancestor_i - i
+                            array[i, head_col] = numpy.int32(ancestor_i - i).astype(numpy.uint64)
 
                 # if there is no appropriate ancestor, define a new artificial root
                 value = array[i, head_col]
@@ -315,7 +315,7 @@ cdef class Span:
                     new_root = old_to_new_root.get(ancestor_i, None)
                     if new_root is not None:
                         # take the same artificial root as a previous token from the same sentence
-                        array[i, head_col] = new_root - i
+                        array[i, head_col] = numpy.int32(new_root - i).astype(numpy.uint64)
                     else:
                         # set this token as the new artificial root
                         array[i, head_col] = 0
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index 732203e7b..7e5ff32dd 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -333,26 +333,27 @@ def _annot2array(vocab, tok_annot, doc_annot):
         if key not in IDS:
             raise ValueError(Errors.E974.format(obj="token", key=key))
         elif key in ["ORTH", "SPACY"]:
-            pass
+            continue
         elif key == "HEAD":
             attrs.append(key)
-            values.append([h-i if h is not None else 0 for i, h in enumerate(value)])
+            row = [h-i if h is not None else 0 for i, h in enumerate(value)]
         elif key == "DEP":
             attrs.append(key)
-            values.append([vocab.strings.add(h) if h is not None else MISSING_DEP for h in value])
+            row = [vocab.strings.add(h) if h is not None else MISSING_DEP for h in value]
         elif key == "SENT_START":
             attrs.append(key)
-            values.append([to_ternary_int(v) for v in value])
+            row = [to_ternary_int(v) for v in value]
         elif key == "MORPH":
             attrs.append(key)
-            values.append([vocab.morphology.add(v) for v in value])
+            row = [vocab.morphology.add(v) for v in value]
         else:
             attrs.append(key)
             if not all(isinstance(v, str) for v in value):
                 types = set([type(v) for v in value])
                 raise TypeError(Errors.E969.format(field=key, types=types)) from None
-            values.append([vocab.strings.add(v) for v in value])
-    array = numpy.asarray(values, dtype="uint64")
+            row = [vocab.strings.add(v) for v in value]
+        values.append([numpy.array(v, dtype=numpy.int32).astype(numpy.uint64) if v < 0 else v for v in row])
+    array = numpy.array(values, dtype=numpy.uint64)
     return attrs, array.T
 
 
diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py
index 96abcc7cd..159df4ab9 100644
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@@ -290,3 +290,5 @@ def ensure_shape(vectors_loc):
         # store all the results in a list in memory
         lines2 = open_file(vectors_loc)
         yield from lines2
+        lines2.close()
+    lines.close()