From 36ecba224e2111d39a0b45b3019c63d2dd1529f0 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 22 Apr 2021 14:58:29 +0200
Subject: [PATCH] Set up GPU CI testing (#7293)

* Set up CI for tests with GPU agent

* Update tests for enabled GPU

* Fix steps filename

* Add parallel build jobs as a setting

* Fix test requirements

* Fix install test requirements condition

* Fix pipeline models test

* Reset current ops in prefer/require testing

* Fix more tests

* Remove separate test_models test

* Fix regression 5551

* fix StaticVectors for GPU use

* fix vocab tests

* Fix regression test 5082

* Move azure steps to .github and reenable default pool jobs

* Consolidate/rename azure steps

Co-authored-by: svlandeg <sofie.vanlandeghem@gmail.com>
---
 .github/azure-steps.yml                       | 57 ++++++++++++++++++
 azure-pipelines.yml                           | 53 ++++++-----------
 spacy/ml/staticvectors.py                     |  2 +-
 spacy/tests/enable_gpu.py                     |  3 +
 spacy/tests/pipeline/test_entity_ruler.py     | 16 ++---
 spacy/tests/pipeline/test_models.py           | 12 ++--
 spacy/tests/pipeline/test_textcat.py          | 18 ++++--
 spacy/tests/pipeline/test_tok2vec.py          |  7 ++-
 spacy/tests/regression/test_issue4501-5000.py | 50 ++++++++--------
 spacy/tests/regression/test_issue5001-5500.py | 16 ++---
 spacy/tests/regression/test_issue5501-6000.py | 10 ++--
 spacy/tests/test_language.py                  | 31 ++++++----
 spacy/tests/test_misc.py                      |  9 ++-
 spacy/tests/test_models.py                    | 26 ++++++--
 spacy/tests/util.py                           |  6 +-
 spacy/tests/vocab_vectors/test_vectors.py     | 59 ++++++++++---------
 16 files changed, 238 insertions(+), 137 deletions(-)
 create mode 100644 .github/azure-steps.yml
 create mode 100644 spacy/tests/enable_gpu.py

diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
new file mode 100644
index 000000000..750e096d0
--- /dev/null
+++ b/.github/azure-steps.yml
@@ -0,0 +1,57 @@
+parameters:
+  python_version: ''
+  architecture: ''
+  prefix: ''
+  gpu: false
+  num_build_jobs: 1
+
+steps:
+  - task: UsePythonVersion@0
+    inputs:
+      versionSpec: ${{ parameters.python_version }}
+      architecture: ${{ parameters.architecture }}
+
+  - script: |
+      ${{ parameters.prefix }} python -m pip install -U pip setuptools
+      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
+    displayName: "Install dependencies"
+
+  - script: |
+      ${{ parameters.prefix }} python setup.py build_ext --inplace -j ${{ parameters.num_build_jobs }}
+      ${{ parameters.prefix }} python setup.py sdist --formats=gztar
+    displayName: "Compile and build sdist"
+
+  - task: DeleteFiles@1
+    inputs:
+      contents: "spacy"
+    displayName: "Delete source directory"
+
+  - script: |
+      ${{ parameters.prefix }} python -m pip freeze --exclude torch --exclude cupy-cuda110 > installed.txt
+      ${{ parameters.prefix }} python -m pip uninstall -y -r installed.txt
+    displayName: "Uninstall all packages"
+
+  - bash: |
+      ${{ parameters.prefix }} SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
+      ${{ parameters.prefix }} python -m pip install dist/$SDIST
+    displayName: "Install from sdist"
+
+  - script: |
+      ${{ parameters.prefix }} python -m pip install -U -r requirements.txt
+    displayName: "Install test requirements"
+
+  - script: |
+      ${{ parameters.prefix }} python -m pip install -U cupy-cuda110
+      ${{ parameters.prefix }} python -m pip install "torch==1.7.1+cu110" -f https://download.pytorch.org/whl/torch_stable.html
+    displayName: "Install GPU requirements"
+    condition: eq(${{ parameters.gpu }}, true)
+
+  - script: |
+      ${{ parameters.prefix }} python -m pytest --pyargs spacy
+    displayName: "Run CPU tests"
+    condition: eq(${{ parameters.gpu }}, false)
+
+  - script: |
+      ${{ parameters.prefix }} python -m pytest --pyargs spacy -p spacy.tests.enable_gpu
+    displayName: "Run GPU tests"
+    condition: eq(${{ parameters.gpu }}, true)
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index bb259dded..bea65cae2 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -76,39 +76,24 @@ jobs:
       maxParallel: 4
     pool:
       vmImage: $(imageName)
-
     steps:
-      - task: UsePythonVersion@0
-        inputs:
-          versionSpec: "$(python.version)"
-          architecture: "x64"
+      - template: .github/azure-steps.yml
+        parameters:
+          python_version: '$(python.version)'
+          architecture: 'x64'
 
-      - script: |
-          python -m pip install -U setuptools
-          pip install -r requirements.txt
-        displayName: "Install dependencies"
-
-      - script: |
-          python setup.py build_ext --inplace
-          python setup.py sdist --formats=gztar
-        displayName: "Compile and build sdist"
-
-      - task: DeleteFiles@1
-        inputs:
-          contents: "spacy"
-        displayName: "Delete source directory"
-
-      - script: |
-          pip freeze > installed.txt
-          pip uninstall -y -r installed.txt
-        displayName: "Uninstall all packages"
-
-      - bash: |
-          SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
-          pip install dist/$SDIST
-        displayName: "Install from sdist"
-
-      - script: |
-          pip install -r requirements.txt
-          python -m pytest --pyargs spacy
-        displayName: "Run tests"
+  - job: "TestGPU"
+    dependsOn: "Validate"
+    strategy:
+      matrix:
+        Python38LinuxX64_GPU:
+          python.version: '3.8'
+    pool:
+      name: "LinuxX64_GPU"
+    steps:
+      - template: .github/azure-steps.yml
+        parameters:
+          python_version: '$(python.version)'
+          architecture: 'x64'
+          gpu: true
+          num_build_jobs: 24
diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py
index cfd25c24b..4e7262e7d 100644
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@@ -38,7 +38,7 @@ def forward(
         return _handle_empty(model.ops, model.get_dim("nO"))
     key_attr = model.attrs["key_attr"]
     W = cast(Floats2d, model.ops.as_contig(model.get_param("W")))
-    V = cast(Floats2d, docs[0].vocab.vectors.data)
+    V = cast(Floats2d, model.ops.asarray(docs[0].vocab.vectors.data))
     rows = model.ops.flatten(
         [doc.vocab.vectors.find(keys=doc.to_array(key_attr)) for doc in docs]
     )
diff --git a/spacy/tests/enable_gpu.py b/spacy/tests/enable_gpu.py
new file mode 100644
index 000000000..3d4fded10
--- /dev/null
+++ b/spacy/tests/enable_gpu.py
@@ -0,0 +1,3 @@
+from spacy import require_gpu
+
+require_gpu()
diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py
index 3f998d78d..2f6da79d6 100644
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@@ -5,6 +5,7 @@ from spacy.tokens import Span
 from spacy.language import Language
 from spacy.pipeline import EntityRuler
 from spacy.errors import MatchPatternError
+from thinc.api import NumpyOps, get_current_ops
 
 
 @pytest.fixture
@@ -201,13 +202,14 @@ def test_entity_ruler_overlapping_spans(nlp):
 
 @pytest.mark.parametrize("n_process", [1, 2])
 def test_entity_ruler_multiprocessing(nlp, n_process):
-    texts = ["I enjoy eating Pizza Hut pizza."]
+    if isinstance(get_current_ops, NumpyOps) or n_process < 2:
+        texts = ["I enjoy eating Pizza Hut pizza."]
 
-    patterns = [{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}]
+        patterns = [{"label": "FASTFOOD", "pattern": "Pizza Hut", "id": "1234"}]
 
-    ruler = nlp.add_pipe("entity_ruler")
-    ruler.add_patterns(patterns)
+        ruler = nlp.add_pipe("entity_ruler")
+        ruler.add_patterns(patterns)
 
-    for doc in nlp.pipe(texts, n_process=2):
-        for ent in doc.ents:
-            assert ent.ent_id_ == "1234"
+        for doc in nlp.pipe(texts, n_process=2):
+            for ent in doc.ents:
+                assert ent.ent_id_ == "1234"
diff --git a/spacy/tests/pipeline/test_models.py b/spacy/tests/pipeline/test_models.py
index d04ac9cd4..302c307e2 100644
--- a/spacy/tests/pipeline/test_models.py
+++ b/spacy/tests/pipeline/test_models.py
@@ -4,7 +4,7 @@ import numpy
 import pytest
 from numpy.testing import assert_almost_equal
 from spacy.vocab import Vocab
-from thinc.api import NumpyOps, Model, data_validation
+from thinc.api import Model, data_validation, get_current_ops
 from thinc.types import Array2d, Ragged
 
 from spacy.lang.en import English
@@ -13,7 +13,7 @@ from spacy.ml._character_embed import CharacterEmbed
 from spacy.tokens import Doc
 
 
-OPS = NumpyOps()
+OPS = get_current_ops()
 
 texts = ["These are 4 words", "Here just three"]
 l0 = [[1, 2], [3, 4], [5, 6], [7, 8]]
@@ -82,7 +82,7 @@ def util_batch_unbatch_docs_list(
         Y_batched = model.predict(in_data)
         Y_not_batched = [model.predict([u])[0] for u in in_data]
         for i in range(len(Y_batched)):
-            assert_almost_equal(Y_batched[i], Y_not_batched[i], decimal=4)
+            assert_almost_equal(OPS.to_numpy(Y_batched[i]), OPS.to_numpy(Y_not_batched[i]), decimal=4)
 
 
 def util_batch_unbatch_docs_array(
@@ -91,7 +91,7 @@ def util_batch_unbatch_docs_array(
     with data_validation(True):
         model.initialize(in_data, out_data)
         Y_batched = model.predict(in_data).tolist()
-        Y_not_batched = [model.predict([u])[0] for u in in_data]
+        Y_not_batched = [model.predict([u])[0].tolist() for u in in_data]
         assert_almost_equal(Y_batched, Y_not_batched, decimal=4)
 
 
@@ -100,8 +100,8 @@ def util_batch_unbatch_docs_ragged(
 ):
     with data_validation(True):
         model.initialize(in_data, out_data)
-        Y_batched = model.predict(in_data)
+        Y_batched = model.predict(in_data).data.tolist()
         Y_not_batched = []
         for u in in_data:
             Y_not_batched.extend(model.predict([u]).data.tolist())
-        assert_almost_equal(Y_batched.data, Y_not_batched, decimal=4)
+        assert_almost_equal(Y_batched, Y_not_batched, decimal=4)
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 61af16eb5..43dfff147 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -1,7 +1,7 @@
 import pytest
 import random
 import numpy.random
-from numpy.testing import assert_equal
+from numpy.testing import assert_almost_equal
 from thinc.api import fix_random_seed
 from spacy import util
 from spacy.lang.en import English
@@ -222,8 +222,12 @@ def test_overfitting_IO():
     batch_cats_1 = [doc.cats for doc in nlp.pipe(texts)]
     batch_cats_2 = [doc.cats for doc in nlp.pipe(texts)]
     no_batch_cats = [doc.cats for doc in [nlp(text) for text in texts]]
-    assert_equal(batch_cats_1, batch_cats_2)
-    assert_equal(batch_cats_1, no_batch_cats)
+    for cats_1, cats_2 in zip(batch_cats_1, batch_cats_2):
+        for cat in cats_1:
+            assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
+    for cats_1, cats_2 in zip(batch_cats_1, no_batch_cats):
+        for cat in cats_1:
+            assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
 
 
 def test_overfitting_IO_multi():
@@ -270,8 +274,12 @@ def test_overfitting_IO_multi():
     batch_deps_1 = [doc.cats for doc in nlp.pipe(texts)]
     batch_deps_2 = [doc.cats for doc in nlp.pipe(texts)]
     no_batch_deps = [doc.cats for doc in [nlp(text) for text in texts]]
-    assert_equal(batch_deps_1, batch_deps_2)
-    assert_equal(batch_deps_1, no_batch_deps)
+    for cats_1, cats_2 in zip(batch_deps_1, batch_deps_2):
+        for cat in cats_1:
+            assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
+    for cats_1, cats_2 in zip(batch_deps_1, no_batch_deps):
+        for cat in cats_1:
+            assert_almost_equal(cats_1[cat], cats_2[cat], decimal=5)
 
 
 # fmt: off
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index ac5428de6..e3b71c502 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -8,8 +8,8 @@ from spacy.tokens import Doc
 from spacy.training import Example
 from spacy import util
 from spacy.lang.en import English
-from thinc.api import Config
-from numpy.testing import assert_equal
+from thinc.api import Config, get_current_ops
+from numpy.testing import assert_array_equal
 
 from ..util import get_batch, make_tempdir
 
@@ -160,7 +160,8 @@ def test_tok2vec_listener():
 
     doc = nlp("Running the pipeline as a whole.")
     doc_tensor = tagger_tok2vec.predict([doc])[0]
-    assert_equal(doc.tensor, doc_tensor)
+    ops = get_current_ops()
+    assert_array_equal(ops.to_numpy(doc.tensor), ops.to_numpy(doc_tensor))
 
     # TODO: should this warn or error?
     nlp.select_pipes(disable="tok2vec")
diff --git a/spacy/tests/regression/test_issue4501-5000.py b/spacy/tests/regression/test_issue4501-5000.py
index 6dbbc233b..f5fcb53fd 100644
--- a/spacy/tests/regression/test_issue4501-5000.py
+++ b/spacy/tests/regression/test_issue4501-5000.py
@@ -9,6 +9,7 @@ from spacy.language import Language
 from spacy.util import ensure_path, load_model_from_path
 import numpy
 import pickle
+from thinc.api import NumpyOps, get_current_ops
 
 from ..util import make_tempdir
 
@@ -169,21 +170,22 @@ def test_issue4725_1():
 
 
 def test_issue4725_2():
-    # ensures that this runs correctly and doesn't hang or crash because of the global vectors
-    # if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),
-    # or because of issues with pickling the NER (cf test_issue4725_1)
-    vocab = Vocab(vectors_name="test_vocab_add_vector")
-    data = numpy.ndarray((5, 3), dtype="f")
-    data[0] = 1.0
-    data[1] = 2.0
-    vocab.set_vector("cat", data[0])
-    vocab.set_vector("dog", data[1])
-    nlp = English(vocab=vocab)
-    nlp.add_pipe("ner")
-    nlp.initialize()
-    docs = ["Kurt is in London."] * 10
-    for _ in nlp.pipe(docs, batch_size=2, n_process=2):
-        pass
+    if isinstance(get_current_ops, NumpyOps):
+        # ensures that this runs correctly and doesn't hang or crash because of the global vectors
+        # if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),
+        # or because of issues with pickling the NER (cf test_issue4725_1)
+        vocab = Vocab(vectors_name="test_vocab_add_vector")
+        data = numpy.ndarray((5, 3), dtype="f")
+        data[0] = 1.0
+        data[1] = 2.0
+        vocab.set_vector("cat", data[0])
+        vocab.set_vector("dog", data[1])
+        nlp = English(vocab=vocab)
+        nlp.add_pipe("ner")
+        nlp.initialize()
+        docs = ["Kurt is in London."] * 10
+        for _ in nlp.pipe(docs, batch_size=2, n_process=2):
+            pass
 
 
 def test_issue4849():
@@ -204,10 +206,11 @@ def test_issue4849():
         count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
     assert count_ents == 2
     # USING 2 PROCESSES
-    count_ents = 0
-    for doc in nlp.pipe([text], n_process=2):
-        count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
-    assert count_ents == 2
+    if isinstance(get_current_ops, NumpyOps):
+        count_ents = 0
+        for doc in nlp.pipe([text], n_process=2):
+            count_ents += len([ent for ent in doc.ents if ent.ent_id > 0])
+        assert count_ents == 2
 
 
 @Language.factory("my_pipe")
@@ -239,10 +242,11 @@ def test_issue4903():
     nlp.add_pipe("sentencizer")
     nlp.add_pipe("my_pipe", after="sentencizer")
     text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
-    docs = list(nlp.pipe(text, n_process=2))
-    assert docs[0].text == "I like bananas."
-    assert docs[1].text == "Do you like them?"
-    assert docs[2].text == "No, I prefer wasabi."
+    if isinstance(get_current_ops(), NumpyOps):
+        docs = list(nlp.pipe(text, n_process=2))
+        assert docs[0].text == "I like bananas."
+        assert docs[1].text == "Do you like them?"
+        assert docs[2].text == "No, I prefer wasabi."
 
 
 def test_issue4924():
diff --git a/spacy/tests/regression/test_issue5001-5500.py b/spacy/tests/regression/test_issue5001-5500.py
index dbfe78679..0575c8270 100644
--- a/spacy/tests/regression/test_issue5001-5500.py
+++ b/spacy/tests/regression/test_issue5001-5500.py
@@ -6,6 +6,7 @@ from spacy.language import Language
 from spacy.lang.en.syntax_iterators import noun_chunks
 from spacy.vocab import Vocab
 import spacy
+from thinc.api import get_current_ops
 import pytest
 
 from ...util import make_tempdir
@@ -54,16 +55,17 @@ def test_issue5082():
     ruler.add_patterns(patterns)
     parsed_vectors_1 = [t.vector for t in nlp(text)]
     assert len(parsed_vectors_1) == 4
-    numpy.testing.assert_array_equal(parsed_vectors_1[0], array1)
-    numpy.testing.assert_array_equal(parsed_vectors_1[1], array2)
-    numpy.testing.assert_array_equal(parsed_vectors_1[2], array3)
-    numpy.testing.assert_array_equal(parsed_vectors_1[3], array4)
+    ops = get_current_ops()
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[0]), array1)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[1]), array2)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[2]), array3)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_1[3]), array4)
     nlp.add_pipe("merge_entities")
     parsed_vectors_2 = [t.vector for t in nlp(text)]
     assert len(parsed_vectors_2) == 3
-    numpy.testing.assert_array_equal(parsed_vectors_2[0], array1)
-    numpy.testing.assert_array_equal(parsed_vectors_2[1], array2)
-    numpy.testing.assert_array_equal(parsed_vectors_2[2], array34)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[0]), array1)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[1]), array2)
+    numpy.testing.assert_array_equal(ops.to_numpy(parsed_vectors_2[2]), array34)
 
 
 def test_issue5137():
diff --git a/spacy/tests/regression/test_issue5501-6000.py b/spacy/tests/regression/test_issue5501-6000.py
index 8d1199e98..a35de92fa 100644
--- a/spacy/tests/regression/test_issue5501-6000.py
+++ b/spacy/tests/regression/test_issue5501-6000.py
@@ -1,5 +1,6 @@
 import pytest
-from thinc.api import Config, fix_random_seed
+from numpy.testing import assert_almost_equal
+from thinc.api import Config, fix_random_seed, get_current_ops
 
 from spacy.lang.en import English
 from spacy.pipeline.textcat import single_label_default_config, single_label_bow_config
@@ -44,11 +45,12 @@ def test_issue5551(textcat_config):
         nlp.update([Example.from_dict(doc, annots)])
         # Store the result of each iteration
         result = pipe.model.predict([doc])
-        results.append(list(result[0]))
+        results.append(result[0])
     # All results should be the same because of the fixed seed
     assert len(results) == 3
-    assert results[0] == results[1]
-    assert results[0] == results[2]
+    ops = get_current_ops()
+    assert_almost_equal(ops.to_numpy(results[0]), ops.to_numpy(results[1]))
+    assert_almost_equal(ops.to_numpy(results[0]), ops.to_numpy(results[2]))
 
 
 def test_issue5838():
diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py
index bec85a1a2..7fb03da0c 100644
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@@ -10,6 +10,7 @@ from spacy.lang.en import English
 from spacy.lang.de import German
 from spacy.util import registry, ignore_error, raise_error
 import spacy
+from thinc.api import NumpyOps, get_current_ops
 
 from .util import add_vecs_to_vocab, assert_docs_equal
 
@@ -142,25 +143,29 @@ def texts():
 
 @pytest.mark.parametrize("n_process", [1, 2])
 def test_language_pipe(nlp2, n_process, texts):
-    texts = texts * 10
-    expecteds = [nlp2(text) for text in texts]
-    docs = nlp2.pipe(texts, n_process=n_process, batch_size=2)
+    ops = get_current_ops()
+    if isinstance(ops, NumpyOps) or n_process < 2:
+        texts = texts * 10
+        expecteds = [nlp2(text) for text in texts]
+        docs = nlp2.pipe(texts, n_process=n_process, batch_size=2)
 
-    for doc, expected_doc in zip(docs, expecteds):
-        assert_docs_equal(doc, expected_doc)
+        for doc, expected_doc in zip(docs, expecteds):
+            assert_docs_equal(doc, expected_doc)
 
 
 @pytest.mark.parametrize("n_process", [1, 2])
 def test_language_pipe_stream(nlp2, n_process, texts):
-    # check if nlp.pipe can handle infinite length iterator properly.
-    stream_texts = itertools.cycle(texts)
-    texts0, texts1 = itertools.tee(stream_texts)
-    expecteds = (nlp2(text) for text in texts0)
-    docs = nlp2.pipe(texts1, n_process=n_process, batch_size=2)
+    ops = get_current_ops()
+    if isinstance(ops, NumpyOps) or n_process < 2:
+        # check if nlp.pipe can handle infinite length iterator properly.
+        stream_texts = itertools.cycle(texts)
+        texts0, texts1 = itertools.tee(stream_texts)
+        expecteds = (nlp2(text) for text in texts0)
+        docs = nlp2.pipe(texts1, n_process=n_process, batch_size=2)
 
-    n_fetch = 20
-    for doc, expected_doc in itertools.islice(zip(docs, expecteds), n_fetch):
-        assert_docs_equal(doc, expected_doc)
+        n_fetch = 20
+        for doc, expected_doc in itertools.islice(zip(docs, expecteds), n_fetch):
+            assert_docs_equal(doc, expected_doc)
 
 
 def test_language_pipe_error_handler():
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 58bebc4ca..0d09999a9 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -8,7 +8,8 @@ from spacy import prefer_gpu, require_gpu, require_cpu
 from spacy.ml._precomputable_affine import PrecomputableAffine
 from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
 from spacy.util import dot_to_object, SimpleFrozenList, import_file
-from thinc.api import Config, Optimizer, ConfigValidationError
+from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
+from thinc.api import set_current_ops
 from spacy.training.batchers import minibatch_by_words
 from spacy.lang.en import English
 from spacy.lang.nl import Dutch
@@ -81,6 +82,7 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
 
 
 def test_prefer_gpu():
+    current_ops = get_current_ops()
     try:
         import cupy  # noqa: F401
 
@@ -88,9 +90,11 @@ def test_prefer_gpu():
         assert isinstance(get_current_ops(), CupyOps)
     except ImportError:
         assert not prefer_gpu()
+    set_current_ops(current_ops)
 
 
 def test_require_gpu():
+    current_ops = get_current_ops()
     try:
         import cupy  # noqa: F401
 
@@ -99,9 +103,11 @@ def test_require_gpu():
     except ImportError:
         with pytest.raises(ValueError):
             require_gpu()
+    set_current_ops(current_ops)
 
 
 def test_require_cpu():
+    current_ops = get_current_ops()
     require_cpu()
     assert isinstance(get_current_ops(), NumpyOps)
     try:
@@ -113,6 +119,7 @@ def test_require_cpu():
         pass
     require_cpu()
     assert isinstance(get_current_ops(), NumpyOps)
+    set_current_ops(current_ops)
 
 
 def test_ascii_filenames():
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 200d7dcfd..45cee13ea 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -1,7 +1,7 @@
 from typing import List
 import pytest
 from thinc.api import fix_random_seed, Adam, set_dropout_rate
-from numpy.testing import assert_array_equal
+from numpy.testing import assert_array_equal, assert_array_almost_equal
 import numpy
 from spacy.ml.models import build_Tok2Vec_model, MultiHashEmbed, MaxoutWindowEncoder
 from spacy.ml.models import build_bow_text_classifier, build_simple_cnn_text_classifier
@@ -109,7 +109,7 @@ def test_models_initialize_consistently(seed, model_func, kwargs):
     model2.initialize()
     params1 = get_all_params(model1)
     params2 = get_all_params(model2)
-    assert_array_equal(params1, params2)
+    assert_array_equal(model1.ops.to_numpy(params1), model2.ops.to_numpy(params2))
 
 
 @pytest.mark.parametrize(
@@ -134,14 +134,25 @@ def test_models_predict_consistently(seed, model_func, kwargs, get_X):
         for i in range(len(tok2vec1)):
             for j in range(len(tok2vec1[i])):
                 assert_array_equal(
-                    numpy.asarray(tok2vec1[i][j]), numpy.asarray(tok2vec2[i][j])
+                    numpy.asarray(model1.ops.to_numpy(tok2vec1[i][j])),
+                    numpy.asarray(model2.ops.to_numpy(tok2vec2[i][j])),
                 )
 
+    try:
+        Y1 = model1.ops.to_numpy(Y1)
+        Y2 = model2.ops.to_numpy(Y2)
+    except Exception:
+        pass
     if isinstance(Y1, numpy.ndarray):
         assert_array_equal(Y1, Y2)
     elif isinstance(Y1, List):
         assert len(Y1) == len(Y2)
         for y1, y2 in zip(Y1, Y2):
+            try:
+                y1 = model1.ops.to_numpy(y1)
+                y2 = model2.ops.to_numpy(y2)
+            except Exception:
+                pass
             assert_array_equal(y1, y2)
     else:
         raise ValueError(f"Could not compare type {type(Y1)}")
@@ -169,12 +180,17 @@ def test_models_update_consistently(seed, dropout, model_func, kwargs, get_X):
             model.finish_update(optimizer)
         updated_params = get_all_params(model)
         with pytest.raises(AssertionError):
-            assert_array_equal(initial_params, updated_params)
+            assert_array_equal(
+                model.ops.to_numpy(initial_params), model.ops.to_numpy(updated_params)
+            )
         return model
 
     model1 = get_updated_model()
     model2 = get_updated_model()
-    assert_array_equal(get_all_params(model1), get_all_params(model2))
+    assert_array_almost_equal(
+        model1.ops.to_numpy(get_all_params(model1)),
+        model2.ops.to_numpy(get_all_params(model2)),
+    )
 
 
 @pytest.mark.parametrize("model_func,kwargs", [(StaticVectors, {"nO": 128, "nM": 300})])
diff --git a/spacy/tests/util.py b/spacy/tests/util.py
index ef7b4d00d..365ea4349 100644
--- a/spacy/tests/util.py
+++ b/spacy/tests/util.py
@@ -5,6 +5,7 @@ import srsly
 from spacy.tokens import Doc
 from spacy.vocab import Vocab
 from spacy.util import make_tempdir  # noqa: F401
+from thinc.api import get_current_ops
 
 
 @contextlib.contextmanager
@@ -58,7 +59,10 @@ def add_vecs_to_vocab(vocab, vectors):
 
 def get_cosine(vec1, vec2):
     """Get cosine for two given vectors"""
-    return numpy.dot(vec1, vec2) / (numpy.linalg.norm(vec1) * numpy.linalg.norm(vec2))
+    OPS = get_current_ops()
+    v1 = OPS.to_numpy(OPS.asarray(vec1))
+    v2 = OPS.to_numpy(OPS.asarray(vec2))
+    return numpy.dot(v1, v2) / (numpy.linalg.norm(v1) * numpy.linalg.norm(v2))
 
 
 def assert_docs_equal(doc1, doc2):
diff --git a/spacy/tests/vocab_vectors/test_vectors.py b/spacy/tests/vocab_vectors/test_vectors.py
index 4257022ea..37d48ad0f 100644
--- a/spacy/tests/vocab_vectors/test_vectors.py
+++ b/spacy/tests/vocab_vectors/test_vectors.py
@@ -1,6 +1,7 @@
 import pytest
 import numpy
 from numpy.testing import assert_allclose, assert_equal
+from thinc.api import get_current_ops
 from spacy.vocab import Vocab
 from spacy.vectors import Vectors
 from spacy.tokenizer import Tokenizer
@@ -9,6 +10,7 @@ from spacy.tokens import Doc
 
 from ..util import add_vecs_to_vocab, get_cosine, make_tempdir
 
+OPS = get_current_ops()
 
 @pytest.fixture
 def strings():
@@ -18,21 +20,21 @@ def strings():
 @pytest.fixture
 def vectors():
     return [
-        ("apple", [1, 2, 3]),
-        ("orange", [-1, -2, -3]),
-        ("and", [-1, -1, -1]),
-        ("juice", [5, 5, 10]),
-        ("pie", [7, 6.3, 8.9]),
+        ("apple", OPS.asarray([1, 2, 3])),
+        ("orange", OPS.asarray([-1, -2, -3])),
+        ("and", OPS.asarray([-1, -1, -1])),
+        ("juice", OPS.asarray([5, 5, 10])),
+        ("pie", OPS.asarray([7, 6.3, 8.9])),
     ]
 
 
 @pytest.fixture
 def ngrams_vectors():
     return [
-        ("apple", [1, 2, 3]),
-        ("app", [-0.1, -0.2, -0.3]),
-        ("ppl", [-0.2, -0.3, -0.4]),
-        ("pl", [0.7, 0.8, 0.9]),
+        ("apple", OPS.asarray([1, 2, 3])),
+        ("app", OPS.asarray([-0.1, -0.2, -0.3])),
+        ("ppl", OPS.asarray([-0.2, -0.3, -0.4])),
+        ("pl", OPS.asarray([0.7, 0.8, 0.9])),
     ]
 
 
@@ -171,8 +173,10 @@ def test_vectors_most_similar_identical():
 @pytest.mark.parametrize("text", ["apple and orange"])
 def test_vectors_token_vector(tokenizer_v, vectors, text):
     doc = tokenizer_v(text)
-    assert vectors[0] == (doc[0].text, list(doc[0].vector))
-    assert vectors[1] == (doc[2].text, list(doc[2].vector))
+    assert vectors[0][0] == doc[0].text
+    assert all([a == b for a, b in zip(vectors[0][1], doc[0].vector)])
+    assert vectors[1][0] == doc[2].text
+    assert all([a == b for a, b in zip(vectors[1][1], doc[2].vector)])
 
 
 @pytest.mark.parametrize("text", ["apple"])
@@ -301,7 +305,7 @@ def test_vectors_doc_doc_similarity(vocab, text1, text2):
 
 def test_vocab_add_vector():
     vocab = Vocab(vectors_name="test_vocab_add_vector")
-    data = numpy.ndarray((5, 3), dtype="f")
+    data = OPS.xp.ndarray((5, 3), dtype="f")
     data[0] = 1.0
     data[1] = 2.0
     vocab.set_vector("cat", data[0])
@@ -320,10 +324,10 @@ def test_vocab_prune_vectors():
     _ = vocab["cat"]  # noqa: F841
     _ = vocab["dog"]  # noqa: F841
     _ = vocab["kitten"]  # noqa: F841
-    data = numpy.ndarray((5, 3), dtype="f")
-    data[0] = [1.0, 1.2, 1.1]
-    data[1] = [0.3, 1.3, 1.0]
-    data[2] = [0.9, 1.22, 1.05]
+    data = OPS.xp.ndarray((5, 3), dtype="f")
+    data[0] = OPS.asarray([1.0, 1.2, 1.1])
+    data[1] = OPS.asarray([0.3, 1.3, 1.0])
+    data[2] = OPS.asarray([0.9, 1.22, 1.05])
     vocab.set_vector("cat", data[0])
     vocab.set_vector("dog", data[1])
     vocab.set_vector("kitten", data[2])
@@ -332,40 +336,41 @@ def test_vocab_prune_vectors():
     assert list(remap.keys()) == ["kitten"]
     neighbour, similarity = list(remap.values())[0]
     assert neighbour == "cat", remap
-    assert_allclose(similarity, get_cosine(data[0], data[2]), atol=1e-4, rtol=1e-3)
+    cosine = get_cosine(data[0], data[2])
+    assert_allclose(float(similarity), cosine, atol=1e-4, rtol=1e-3)
 
 
 def test_vectors_serialize():
-    data = numpy.asarray([[4, 2, 2, 2], [4, 2, 2, 2], [1, 1, 1, 1]], dtype="f")
+    data = OPS.asarray([[4, 2, 2, 2], [4, 2, 2, 2], [1, 1, 1, 1]], dtype="f")
     v = Vectors(data=data, keys=["A", "B", "C"])
     b = v.to_bytes()
     v_r = Vectors()
     v_r.from_bytes(b)
-    assert_equal(v.data, v_r.data)
+    assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
     assert v.key2row == v_r.key2row
     v.resize((5, 4))
     v_r.resize((5, 4))
-    row = v.add("D", vector=numpy.asarray([1, 2, 3, 4], dtype="f"))
-    row_r = v_r.add("D", vector=numpy.asarray([1, 2, 3, 4], dtype="f"))
+    row = v.add("D", vector=OPS.asarray([1, 2, 3, 4], dtype="f"))
+    row_r = v_r.add("D", vector=OPS.asarray([1, 2, 3, 4], dtype="f"))
     assert row == row_r
-    assert_equal(v.data, v_r.data)
+    assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
     assert v.is_full == v_r.is_full
     with make_tempdir() as d:
         v.to_disk(d)
         v_r.from_disk(d)
-        assert_equal(v.data, v_r.data)
+        assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
         assert v.key2row == v_r.key2row
         v.resize((5, 4))
         v_r.resize((5, 4))
-        row = v.add("D", vector=numpy.asarray([10, 20, 30, 40], dtype="f"))
-        row_r = v_r.add("D", vector=numpy.asarray([10, 20, 30, 40], dtype="f"))
+        row = v.add("D", vector=OPS.asarray([10, 20, 30, 40], dtype="f"))
+        row_r = v_r.add("D", vector=OPS.asarray([10, 20, 30, 40], dtype="f"))
         assert row == row_r
-        assert_equal(v.data, v_r.data)
+        assert_equal(OPS.to_numpy(v.data), OPS.to_numpy(v_r.data))
 
 
 def test_vector_is_oov():
     vocab = Vocab(vectors_name="test_vocab_is_oov")
-    data = numpy.ndarray((5, 3), dtype="f")
+    data = OPS.xp.ndarray((5, 3), dtype="f")
     data[0] = 1.0
     data[1] = 2.0
     vocab.set_vector("cat", data[0])