diff --git a/spacy/__init__.py b/spacy/__init__.py
index 4a311ec86..2c063ce24 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -5,7 +5,7 @@ warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
 
 # These are imported as part of the API
-from thinc.util import prefer_gpu, require_gpu
+from thinc.api import prefer_gpu, require_gpu
 
 from . import pipeline
 from .cli.info import info as cli_info
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 0f7677fd2..585eaea51 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -4,7 +4,7 @@ from .link import link  # noqa: F401
 from .package import package  # noqa: F401
 from .profile import profile  # noqa: F401
 from .train import train  # noqa: F401
-from .train_from_config import train_from_config_cli # noqa: F401
+from .train_from_config import train_from_config_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py
index 4bcafce24..1705bf446 100644
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@@ -192,11 +192,7 @@ def debug_data(
             has_ws_ents_error = True
 
         if gold_train_data["punct_ents"]:
-            msg.warn(
-                "{} entity span(s) with punctuation".format(
-                    gold_train_data["punct_ents"]
-                )
-            )
+            msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
             has_punct_ents_warning = True
 
         for label in new_labels:
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 2cef378c0..690e3107d 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -4,14 +4,12 @@ import time
 import re
 from collections import Counter
 from pathlib import Path
-from thinc.layers import Linear, Maxout
-from thinc.util import prefer_gpu
+from thinc.api import Linear, Maxout, chain, list2array, prefer_gpu
+from thinc.api import CosineDistance, L2Distance
 from wasabi import msg
 import srsly
-from thinc.layers import chain, list2array
-from thinc.loss import CosineDistance, L2Distance
 
-from spacy.gold import Example
+from ..gold import Example
 from ..errors import Errors
 from ..tokens import Doc
 from ..attrs import ID, HEAD
@@ -85,7 +83,7 @@ def pretrain(
         )
     if not output_dir.exists():
         output_dir.mkdir()
-        msg.good("Created output directory: {}".format(output_dir))
+        msg.good(f"Created output directory: {output_dir}")
     srsly.write_json(output_dir / "config.json", config)
     msg.good("Saved settings to config.json")
 
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index adae91ff9..d8514095b 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -1,7 +1,7 @@
 import os
 import tqdm
 from pathlib import Path
-from thinc.backends import use_ops
+from thinc.api import use_ops
 from timeit import default_timer as timer
 import shutil
 import srsly
@@ -89,7 +89,7 @@ def train(
         )
     if not output_path.exists():
         output_path.mkdir()
-        msg.good("Created output directory: {}".format(output_path))
+        msg.good(f"Created output directory: {output_path}")
 
     tag_map = {}
     if tag_map_path is not None:
@@ -125,17 +125,17 @@ def train(
     msg.text(f"Training pipeline: {pipeline}")
     disabled_pipes = None
     pipes_added = False
-    msg.text("Training pipeline: {}".format(pipeline))
+    msg.text(f"Training pipeline: {pipeline}")
     if use_gpu >= 0:
         activated_gpu = None
         try:
             activated_gpu = set_gpu(use_gpu)
         except Exception as e:
-            msg.warn("Exception: {}".format(e))
+            msg.warn(f"Exception: {e}")
         if activated_gpu is not None:
-            msg.text("Using GPU: {}".format(use_gpu))
+            msg.text(f"Using GPU: {use_gpu}")
         else:
-            msg.warn("Unable to activate GPU: {}".format(use_gpu))
+            msg.warn(f"Unable to activate GPU: {use_gpu}")
             msg.text("Using CPU only")
             use_gpu = -1
     if base_model:
@@ -158,11 +158,11 @@ def train(
                     "positive_label": textcat_positive_label,
                 }
             if pipe not in nlp.pipe_names:
-                msg.text("Adding component to base model '{}'".format(pipe))
+                msg.text(f"Adding component to base model '{pipe}'")
                 nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
                 pipes_added = True
             elif replace_components:
-                msg.text("Replacing component from base model '{}'".format(pipe))
+                msg.text(f"Replacing component from base model '{pipe}'")
                 nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg))
                 pipes_added = True
             else:
@@ -180,7 +180,7 @@ def train(
                             f"Existing cfg: {base_cfg}, provided cfg: {pipe_cfg}",
                             exits=1,
                         )
-                msg.text("Extending component from base model '{}'".format(pipe))
+                msg.text(f"Extending component from base model '{pipe}'")
         disabled_pipes = nlp.disable_pipes(
             [p for p in nlp.pipe_names if p not in pipeline]
         )
@@ -377,7 +377,7 @@ def train(
                             msg.warn(
                                 "Did you provide the same parameters during 'train' as during 'pretrain'?"
                             )
-                        msg.fail("Original error message: {}".format(e), exits=1)
+                        msg.fail(f"Original error message: {e}", exits=1)
                     if raw_text:
                         # If raw text is available, perform 'rehearsal' updates,
                         # which use unlabelled data to reduce overfitting.
@@ -504,11 +504,7 @@ def train(
                         )
                         break
     except Exception as e:
-        msg.warn(
-            "Aborting and saving the final best model. Encountered exception: {}".format(
-                e
-            )
-        )
+        msg.warn(f"Aborting and saving final best model. Encountered exception: {e}")
     finally:
         best_pipes = nlp.pipe_names
         if disabled_pipes:
diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py
index 0488dd04c..9150da356 100644
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@@ -1,19 +1,20 @@
+from typing import Optional, Dict, List, Union, Sequence
 import plac
-from thinc.util import require_gpu
 from wasabi import msg
 from pathlib import Path
 import thinc
 import thinc.schedules
-from thinc.model import Model
-from spacy.gold import GoldCorpus
-import spacy
-from spacy.pipeline.tok2vec import Tok2VecListener
-from typing import Optional, Dict, List, Union, Sequence
+from thinc.api import Model
 from pydantic import BaseModel, FilePath, StrictInt
 import tqdm
 
-from ..ml import component_models
-from .. import util
+# TODO: relative imports?
+import spacy
+from spacy.gold import GoldCorpus
+from spacy.pipeline.tok2vec import Tok2VecListener
+from spacy.ml import component_models
+from spacy import util
+
 
 registry = util.registry
 
@@ -153,10 +154,9 @@ def create_tb_parser_model(
     hidden_width: StrictInt = 64,
     maxout_pieces: StrictInt = 3,
 ):
-    from thinc.layers import Linear, chain, list2array
+    from thinc.api import Linear, chain, list2array, use_ops, zero_init
     from spacy.ml._layers import PrecomputableAffine
     from spacy.syntax._parser_model import ParserModel
-    from thinc.api import use_ops, zero_init
 
     token_vector_width = tok2vec.get_dim("nO")
     tok2vec = chain(tok2vec, list2array())
@@ -221,13 +221,9 @@ def train_from_config_cli(
 
 
 def train_from_config(
-    config_path,
-    data_paths,
-    raw_text=None,
-    meta_path=None,
-    output_path=None,
+    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
 ):
-    msg.info("Loading config from: {}".format(config_path))
+    msg.info(f"Loading config from: {config_path}")
     config = util.load_from_config(config_path, create_objects=True)
     use_gpu = config["training"]["use_gpu"]
     if use_gpu >= 0:
@@ -241,9 +237,7 @@ def train_from_config(
     msg.info("Loading training corpus")
     corpus = GoldCorpus(data_paths["train"], data_paths["dev"], limit=limit)
     msg.info("Initializing the nlp pipeline")
-    nlp.begin_training(
-        lambda: corpus.train_examples, device=use_gpu
-    )
+    nlp.begin_training(lambda: corpus.train_examples, device=use_gpu)
 
     train_batches = create_train_batches(nlp, corpus, config["training"])
     evaluate = create_evaluation_callback(nlp, optimizer, corpus, config["training"])
@@ -260,7 +254,7 @@ def train_from_config(
         config["training"]["eval_frequency"],
     )
 
-    msg.info("Training. Initial learn rate: {}".format(optimizer.learn_rate))
+    msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}")
     print_row = setup_printer(config)
 
     try:
@@ -414,7 +408,7 @@ def subdivide_batch(batch):
 def setup_printer(config):
     score_cols = config["training"]["scores"]
     score_widths = [max(len(col), 6) for col in score_cols]
-    loss_cols = ["Loss {}".format(pipe) for pipe in config["nlp"]["pipeline"]]
+    loss_cols = [f"Loss {pipe}" for pipe in config["nlp"]["pipeline"]]
     loss_widths = [max(len(col), 8) for col in loss_cols]
     table_header = ["#"] + loss_cols + score_cols + ["Score"]
     table_header = [col.upper() for col in table_header]
diff --git a/spacy/compat.py b/spacy/compat.py
index 6fa49353e..8c5c2930b 100644
--- a/spacy/compat.py
+++ b/spacy/compat.py
@@ -30,7 +30,7 @@ try:
 except ImportError:
     cupy = None
 
-from thinc.optimizers import Optimizer  # noqa: F401
+from thinc.api import Optimizer  # noqa: F401
 
 pickle = pickle
 copy_reg = copy_reg
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index d6970aa2f..a721ce480 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -1,4 +1,3 @@
-
 # Setting explicit height and max-width: none on the SVG is required for
 # Jupyter to render it properly in a cell
 
diff --git a/spacy/glossary.py b/spacy/glossary.py
index 5e7e531a9..938a575cd 100644
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@@ -1,4 +1,3 @@
-
 def explain(term):
     """Get a description for a given POS tag, dependency label or entity type.
 
diff --git a/spacy/gold.pxd b/spacy/gold.pxd
index 49dba16df..aea691130 100644
--- a/spacy/gold.pxd
+++ b/spacy/gold.pxd
@@ -1,6 +1,6 @@
 from cymem.cymem cimport Pool
 
-from spacy.tokens import Doc
+from .tokens import Doc
 from .typedefs cimport attr_t
 from .syntax.transition_system cimport Transition
 
@@ -65,5 +65,3 @@ cdef class Example:
     cdef public TokenAnnotation token_annotation
     cdef public DocAnnotation doc_annotation
     cdef public object goldparse
-
-
diff --git a/spacy/kb.pxd b/spacy/kb.pxd
index d5aa382b1..518ce0f4e 100644
--- a/spacy/kb.pxd
+++ b/spacy/kb.pxd
@@ -6,7 +6,7 @@ from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport FILE
 
-from spacy.vocab cimport Vocab
+from .vocab cimport Vocab
 from .typedefs cimport hash_t
 
 from .structs cimport KBEntryC, AliasC
@@ -113,7 +113,7 @@ cdef class KnowledgeBase:
         return new_index
 
     cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
-        """ 
+        """
         Initializing the vectors and making sure the first element of each vector is a dummy,
         because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
         cf. https://github.com/explosion/preshed/issues/17
@@ -169,4 +169,3 @@ cdef class Reader:
     cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
 
     cdef int _read(self, void* value, size_t size) except -1
-
diff --git a/spacy/lang/af/stop_words.py b/spacy/lang/af/stop_words.py
index dfd144de9..4b5a04a5e 100644
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-af
 
 STOP_WORDS = set(
diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py
index 45a252bc9..aae7692a2 100644
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/Alir3z4/stop-words
 
 STOP_WORDS = set(
diff --git a/spacy/lang/bn/examples.py b/spacy/lang/bn/examples.py
index 051e59d84..c3be4c556 100644
--- a/spacy/lang/bn/examples.py
+++ b/spacy/lang/bn/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/bn/stop_words.py b/spacy/lang/bn/stop_words.py
index 6bcd06b37..bf38e3254 100644
--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত  অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
diff --git a/spacy/lang/ca/examples.py b/spacy/lang/ca/examples.py
index 3fbf1fb0a..ae6aa3e24 100644
--- a/spacy/lang/ca/examples.py
+++ b/spacy/lang/ca/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/cs/stop_words.py b/spacy/lang/cs/stop_words.py
index e8171a7e5..70aab030b 100644
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/Alir3z4/stop-words
 
 STOP_WORDS = set(
diff --git a/spacy/lang/da/examples.py b/spacy/lang/da/examples.py
index e5c6448f0..80b2b925b 100644
--- a/spacy/lang/da/examples.py
+++ b/spacy/lang/da/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/de/examples.py b/spacy/lang/de/examples.py
index 530ece629..735d1c316 100644
--- a/spacy/lang/de/examples.py
+++ b/spacy/lang/de/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/de/stop_words.py b/spacy/lang/de/stop_words.py
index cc5aa0f3c..f52687eb9 100644
--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
@@ -44,7 +43,7 @@ kleines kommen kommt können könnt konnte könnte konnten kurz
 lang lange leicht leider lieber los
 
 machen macht machte mag magst man manche manchem manchen mancher manches mehr
-mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten 
+mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten
 mögen möglich mögt morgen muss muß müssen musst müsst musste mussten
 
 na nach nachdem nahm natürlich neben nein neue neuen neun neunte neunten neunter
diff --git a/spacy/lang/el/get_pos_from_wiktionary.py b/spacy/lang/el/get_pos_from_wiktionary.py
index 01deb23a2..369973cc0 100644
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@@ -1,4 +1,3 @@
-
 def get_pos_from_wiktionary():
     import re
     from gensim.corpora.wikicorpus import extract_pages
diff --git a/spacy/lang/el/norm_exceptions.py b/spacy/lang/el/norm_exceptions.py
index d540aae2c..aa774c19b 100644
--- a/spacy/lang/el/norm_exceptions.py
+++ b/spacy/lang/el/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Norms are only set if no alternative is provided in the tokenizer exceptions.
 
diff --git a/spacy/lang/el/stop_words.py b/spacy/lang/el/stop_words.py
index 8484826d1..7c436219f 100644
--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@@ -1,4 +1,3 @@
-
 # Stop words
 # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
 STOP_WORDS = set(
diff --git a/spacy/lang/en/examples.py b/spacy/lang/en/examples.py
index 0363a45e7..2cca9e05f 100644
--- a/spacy/lang/en/examples.py
+++ b/spacy/lang/en/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/en/norm_exceptions.py b/spacy/lang/en/norm_exceptions.py
index 431d9c049..4125cd37b 100644
--- a/spacy/lang/en/norm_exceptions.py
+++ b/spacy/lang/en/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 _exc = {
     # Slang and abbreviations
     "cos": "because",
diff --git a/spacy/lang/en/stop_words.py b/spacy/lang/en/stop_words.py
index 4573c9411..1ca5cbc16 100644
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@@ -1,4 +1,3 @@
-
 # Stop words
 STOP_WORDS = set(
     """
diff --git a/spacy/lang/es/examples.py b/spacy/lang/es/examples.py
index 1c1ad631b..a1db41a16 100644
--- a/spacy/lang/es/examples.py
+++ b/spacy/lang/es/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/es/stop_words.py b/spacy/lang/es/stop_words.py
index 3d46a88cb..004df4fca 100644
--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 actualmente acuerdo adelante ademas además adrede afirmó agregó ahi ahora ahí
diff --git a/spacy/lang/et/stop_words.py b/spacy/lang/et/stop_words.py
index 3b600a158..e1da1f14d 100644
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-et
 
 STOP_WORDS = set(
diff --git a/spacy/lang/fa/examples.py b/spacy/lang/fa/examples.py
index d89feb6c8..9c6fb0345 100644
--- a/spacy/lang/fa/examples.py
+++ b/spacy/lang/fa/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/fa/generate_verbs_exc.py b/spacy/lang/fa/generate_verbs_exc.py
index 61586dc3f..62094c6de 100644
--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@@ -1,4 +1,3 @@
-
 verb_roots = """
 #هست
 آخت#آهنج
diff --git a/spacy/lang/fa/stop_words.py b/spacy/lang/fa/stop_words.py
index 372422b67..f462f2e7a 100644
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@@ -1,4 +1,3 @@
-
 # Stop words from HAZM package
 STOP_WORDS = set(
     """
diff --git a/spacy/lang/fi/stop_words.py b/spacy/lang/fi/stop_words.py
index 642cfc369..8e8dcfa56 100644
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
 # Reformatted with some minor corrections
 STOP_WORDS = set(
diff --git a/spacy/lang/fr/examples.py b/spacy/lang/fr/examples.py
index 57d57f4a6..a74a62204 100644
--- a/spacy/lang/fr/examples.py
+++ b/spacy/lang/fr/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/fr/stop_words.py b/spacy/lang/fr/stop_words.py
index 9c12e49a3..a331f3c0f 100644
--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 a à â abord absolument afin ah ai aie ailleurs ainsi ait allaient allo allons
diff --git a/spacy/lang/ga/irish_morphology_helpers.py b/spacy/lang/ga/irish_morphology_helpers.py
index c8cd36835..d606da975 100644
--- a/spacy/lang/ga/irish_morphology_helpers.py
+++ b/spacy/lang/ga/irish_morphology_helpers.py
@@ -1,4 +1,3 @@
-
 # fmt: off
 consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "z"]
 broad_vowels = ["a", "á", "o", "ó", "u", "ú"]
diff --git a/spacy/lang/he/examples.py b/spacy/lang/he/examples.py
index 29075c7d4..d54d2a145 100644
--- a/spacy/lang/he/examples.py
+++ b/spacy/lang/he/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py
index 7639ff940..ecb0b328c 100644
--- a/spacy/lang/hi/examples.py
+++ b/spacy/lang/hi/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/hi/stop_words.py b/spacy/lang/hi/stop_words.py
index 142fc6f47..475b07da1 100644
--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6
 
 STOP_WORDS = set(
diff --git a/spacy/lang/hu/examples.py b/spacy/lang/hu/examples.py
index b60f752ec..711a438bd 100644
--- a/spacy/lang/hu/examples.py
+++ b/spacy/lang/hu/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/hu/stop_words.py b/spacy/lang/hu/stop_words.py
index 024af68f4..e39a26d35 100644
--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
diff --git a/spacy/lang/id/examples.py b/spacy/lang/id/examples.py
index 2ce46ce5a..1069232ff 100644
--- a/spacy/lang/id/examples.py
+++ b/spacy/lang/id/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/is/stop_words.py b/spacy/lang/is/stop_words.py
index 5b3ff2f5a..917fb6df4 100644
--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/Xangis/extra-stopwords
 
 STOP_WORDS = set(
diff --git a/spacy/lang/it/examples.py b/spacy/lang/it/examples.py
index 30327bd14..506721276 100644
--- a/spacy/lang/it/examples.py
+++ b/spacy/lang/it/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/it/stop_words.py b/spacy/lang/it/stop_words.py
index 5cd1af137..e97613912 100644
--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py
index 1d532ad77..c3a011862 100644
--- a/spacy/lang/ja/examples.py
+++ b/spacy/lang/ja/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/kn/stop_words.py b/spacy/lang/kn/stop_words.py
index cfeb0e69d..dba9740af 100644
--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 ಹಲವು
diff --git a/spacy/lang/lt/examples.py b/spacy/lang/lt/examples.py
index b2889114c..eaf941f1a 100644
--- a/spacy/lang/lt/examples.py
+++ b/spacy/lang/lt/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/lv/stop_words.py b/spacy/lang/lv/stop_words.py
index a9612f949..2685c2430 100644
--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-lv
 
 STOP_WORDS = set(
diff --git a/spacy/lang/mr/stop_words.py b/spacy/lang/mr/stop_words.py
index 0d7501461..9b0cee951 100644
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
 STOP_WORDS = set(
     """
diff --git a/spacy/lang/nb/examples.py b/spacy/lang/nb/examples.py
index 89e265951..b1a63ad74 100644
--- a/spacy/lang/nb/examples.py
+++ b/spacy/lang/nb/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/nl/examples.py b/spacy/lang/nl/examples.py
index fcefa9d62..8c8c50c60 100644
--- a/spacy/lang/nl/examples.py
+++ b/spacy/lang/nl/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/norm_exceptions.py b/spacy/lang/norm_exceptions.py
index c194f05c7..f35f613b1 100644
--- a/spacy/lang/norm_exceptions.py
+++ b/spacy/lang/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Individual languages can also add their own exceptions and overwrite them -
 # for example, British vs. American spelling in English.
diff --git a/spacy/lang/pl/examples.py b/spacy/lang/pl/examples.py
index 6eabe1843..b1ea5880f 100644
--- a/spacy/lang/pl/examples.py
+++ b/spacy/lang/pl/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/pt/examples.py b/spacy/lang/pt/examples.py
index 7427f8b25..13f3512cf 100644
--- a/spacy/lang/pt/examples.py
+++ b/spacy/lang/pt/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/pt/stop_words.py b/spacy/lang/pt/stop_words.py
index 8065fcda7..ff45ad3a7 100644
--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
diff --git a/spacy/lang/ro/examples.py b/spacy/lang/ro/examples.py
index d472f0d6d..bfa258ffc 100644
--- a/spacy/lang/ro/examples.py
+++ b/spacy/lang/ro/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/ru/examples.py b/spacy/lang/ru/examples.py
index 34cf5a1eb..adb007625 100644
--- a/spacy/lang/ru/examples.py
+++ b/spacy/lang/ru/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/ru/norm_exceptions.py b/spacy/lang/ru/norm_exceptions.py
index c5d725031..0975bf5b8 100644
--- a/spacy/lang/ru/norm_exceptions.py
+++ b/spacy/lang/ru/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 _exc = {
     # Slang
     "прив": "привет",
diff --git a/spacy/lang/si/examples.py b/spacy/lang/si/examples.py
index 0ff00e76e..b34051d00 100644
--- a/spacy/lang/si/examples.py
+++ b/spacy/lang/si/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/si/stop_words.py b/spacy/lang/si/stop_words.py
index 49723c860..bde662bf7 100644
--- a/spacy/lang/si/stop_words.py
+++ b/spacy/lang/si/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 අතර
diff --git a/spacy/lang/sk/stop_words.py b/spacy/lang/sk/stop_words.py
index 269bdc58b..017e7beef 100644
--- a/spacy/lang/sk/stop_words.py
+++ b/spacy/lang/sk/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/Ardevop-sk/stopwords-sk
 
 STOP_WORDS = set(
diff --git a/spacy/lang/sl/stop_words.py b/spacy/lang/sl/stop_words.py
index c8596ad0b..6fb01a183 100644
--- a/spacy/lang/sl/stop_words.py
+++ b/spacy/lang/sl/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-sl
 # TODO: probably needs to be tidied up – the list seems to have month names in
 # it, which shouldn't be considered stop words.
diff --git a/spacy/lang/sq/examples.py b/spacy/lang/sq/examples.py
index e1075f70a..06ed20fa1 100644
--- a/spacy/lang/sq/examples.py
+++ b/spacy/lang/sq/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/sq/stop_words.py b/spacy/lang/sq/stop_words.py
index 58ee87d05..f2b1a4f4a 100644
--- a/spacy/lang/sq/stop_words.py
+++ b/spacy/lang/sq/stop_words.py
@@ -1,4 +1,3 @@
-
 # Source: https://github.com/andrixh/index-albanian
 
 STOP_WORDS = set(
diff --git a/spacy/lang/sr/examples.py b/spacy/lang/sr/examples.py
index 1ac867f4c..ec7f57ced 100644
--- a/spacy/lang/sr/examples.py
+++ b/spacy/lang/sr/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/sr/norm_exceptions.py b/spacy/lang/sr/norm_exceptions.py
index add8350a0..723ab84c0 100644
--- a/spacy/lang/sr/norm_exceptions.py
+++ b/spacy/lang/sr/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 _exc = {
     # Slang
     "ћале": "отац",
diff --git a/spacy/lang/sr/stop_words.py b/spacy/lang/sr/stop_words.py
index 488c82a75..5df5509d2 100644
--- a/spacy/lang/sr/stop_words.py
+++ b/spacy/lang/sr/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 а
diff --git a/spacy/lang/sv/examples.py b/spacy/lang/sv/examples.py
index 98eee700b..bc6cd7a54 100644
--- a/spacy/lang/sv/examples.py
+++ b/spacy/lang/sv/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/sv/stop_words.py b/spacy/lang/sv/stop_words.py
index 4d933a76d..2422b2a9e 100644
--- a/spacy/lang/sv/stop_words.py
+++ b/spacy/lang/sv/stop_words.py
@@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
     """
 aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras
diff --git a/spacy/lang/ta/examples.py b/spacy/lang/ta/examples.py
index 2590163cb..a53227220 100644
--- a/spacy/lang/ta/examples.py
+++ b/spacy/lang/ta/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/ta/stop_words.py b/spacy/lang/ta/stop_words.py
index 83410d65e..abbff949d 100644
--- a/spacy/lang/ta/stop_words.py
+++ b/spacy/lang/ta/stop_words.py
@@ -1,4 +1,3 @@
-
 # Stop words
 
 STOP_WORDS = set(
diff --git a/spacy/lang/te/examples.py b/spacy/lang/te/examples.py
index 6162b231e..cff7d3cb0 100644
--- a/spacy/lang/te/examples.py
+++ b/spacy/lang/te/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/th/norm_exceptions.py b/spacy/lang/th/norm_exceptions.py
index 98b878308..b8ddbab16 100644
--- a/spacy/lang/th/norm_exceptions.py
+++ b/spacy/lang/th/norm_exceptions.py
@@ -1,4 +1,3 @@
-
 _exc = {
     # Conjugation and Diversion invalid to Tonal form (ผันอักษรและเสียงไม่ตรงกับรูปวรรณยุกต์)
     "สนุ๊กเกอร์": "สนุกเกอร์",
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index fa4e347fd..ee58a7b09 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -34,7 +34,7 @@ URL_PATTERN = (
     r"|"
     # host & domain names
     # mods: match is case-sensitive, so include [A-Z]
-      "(?:"
+      "(?:"  # noqa: E131
         "(?:"
           "[A-Za-z0-9\u00a1-\uffff]"
           "[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py
index a14d87a46..dfb324a4e 100644
--- a/spacy/lang/tr/examples.py
+++ b/spacy/lang/tr/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.tr.examples import sentences
diff --git a/spacy/lang/uk/examples.py b/spacy/lang/uk/examples.py
index d17768ea6..f75d44488 100644
--- a/spacy/lang/uk/examples.py
+++ b/spacy/lang/uk/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/ur/examples.py b/spacy/lang/ur/examples.py
index 7024483b5..e55b337be 100644
--- a/spacy/lang/ur/examples.py
+++ b/spacy/lang/ur/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/xx/__init__.py b/spacy/lang/xx/__init__.py
index 2af650703..347c624fd 100644
--- a/spacy/lang/xx/__init__.py
+++ b/spacy/lang/xx/__init__.py
@@ -1,4 +1,3 @@
-
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ..norm_exceptions import BASE_NORMS
 from ...language import Language
diff --git a/spacy/lang/xx/examples.py b/spacy/lang/xx/examples.py
index 15f5c4ff8..8d63c3c20 100644
--- a/spacy/lang/xx/examples.py
+++ b/spacy/lang/xx/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/yo/examples.py b/spacy/lang/yo/examples.py
index 9b875d09e..0a610f125 100644
--- a/spacy/lang/yo/examples.py
+++ b/spacy/lang/yo/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/lang/zh/examples.py b/spacy/lang/zh/examples.py
index d0715eb0d..8be1336d2 100644
--- a/spacy/lang/zh/examples.py
+++ b/spacy/lang/zh/examples.py
@@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 
diff --git a/spacy/language.py b/spacy/language.py
index 3aaf0b327..1c6014cec 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -4,7 +4,7 @@ import weakref
 import functools
 from contextlib import contextmanager
 from copy import copy, deepcopy
-from thinc.backends import get_current_ops
+from thinc.api import get_current_ops
 import srsly
 import multiprocessing as mp
 from itertools import chain, cycle
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 1292a46bd..5910ebfe1 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -6,7 +6,7 @@ cimport numpy as np
 np.import_array()
 
 import numpy
-from thinc.util import get_array_module
+from thinc.api import get_array_module
 
 from .typedefs cimport attr_t, flags_t
 from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
diff --git a/spacy/ml/_character_embed.py b/spacy/ml/_character_embed.py
index 2ff67746f..b366f67c6 100644
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@@ -3,18 +3,20 @@ from thinc.api import Model
 
 def CharacterEmbed(nM, nC):
     # nM: Number of dimensions per character. nC: Number of characters.
-    nO = nM*nC if (nM is not None and nC is not None) else None
+    nO = nM * nC if (nM is not None and nC is not None) else None
     return Model(
         "charembed",
         forward,
         init=init,
         dims={"nM": nM, "nC": nC, "nO": nO, "nV": 256},
-        params={"E": None}
+        params={"E": None},
     ).initialize()
 
 
 def init(model, X=None, Y=None):
-    vectors_table = model.ops.alloc3f(model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM"))
+    vectors_table = model.ops.alloc3f(
+        model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM")
+    )
     model.set_param("E", vectors_table)
 
 
diff --git a/spacy/ml/_layers.py b/spacy/ml/_layers.py
index e6aa798e7..7e9150d8b 100644
--- a/spacy/ml/_layers.py
+++ b/spacy/ml/_layers.py
@@ -1,5 +1,4 @@
-from thinc.model import Model
-from thinc.api import normal_init
+from thinc.api import Model, normal_init
 
 
 def PrecomputableAffine(nO, nI, nF, nP):
@@ -20,9 +19,7 @@ def forward(model, X, is_train):
     nP = model.get_dim("nP")
     nI = model.get_dim("nI")
     W = model.get_param("W")
-    Yf = model.ops.gemm(
-        X, W.reshape((nF * nO * nP, nI)), trans2=True
-    )
+    Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True)
     Yf = Yf.reshape((Yf.shape[0], nF, nO, nP))
     Yf = model.ops.xp.vstack((model.get_param("pad"), Yf))
 
@@ -37,14 +34,14 @@ def forward(model, X, is_train):
         # for b in range(nB):
         #     for f in range(nF):
         #         dYf[b, ids[b, f]] += dY[b]
-        # 
+        #
         # However, we avoid building that array for efficiency -- and just pass
         # in the indices.
         dY, ids = dY_ids
         assert dY.ndim == 3
         assert dY.shape[1] == nO, dY.shape
         assert dY.shape[2] == nP, dY.shape
-        nB = dY.shape[0]
+        # nB = dY.shape[0]
         model.inc_grad("pad", _backprop_precomputable_affine_padding(model, dY, ids))
         Xf = X[ids]
         Xf = Xf.reshape((Xf.shape[0], nF * nI))
@@ -83,12 +80,12 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
     #     for f in range(nF):
     #         if ids[b, f] < 0:
     #             d_padding[0, f] += dY[b]
-    # 
+    #
     # Which can be rewritten as:
     #
     # for b in range(nB):
     #     d_pad[0, ids[b] < 0] += dY[b]
-    # 
+    #
     # I don't know how to avoid the loop without building a whole array :(.
     # Cursed numpy.
     d_pad = model.ops.alloc((1, nF, nO, nP))
@@ -118,7 +115,7 @@ def init(model, X=None, Y=None):
     pad = model.ops.alloc4f(1, nF, nO, nP)
 
     ops = model.ops
-    W = normal_init(ops, W.shape, fan_in=nF*nI)
+    W = normal_init(ops, W.shape, fan_in=nF * nI)
     model.set_param("W", W)
     model.set_param("b", b)
     model.set_param("pad", pad)
diff --git a/spacy/ml/component_models.py b/spacy/ml/component_models.py
index a24c2bfce..8c694f950 100644
--- a/spacy/ml/component_models.py
+++ b/spacy/ml/component_models.py
@@ -9,7 +9,7 @@ from thinc.api import Model, Maxout, Linear, residual, reduce_mean, list2ragged
 from thinc.api import PyTorchLSTM, add, MultiSoftmax, HashEmbed, StaticVectors
 from thinc.api import expand_window, FeatureExtractor, SparseLinear, chain
 from thinc.api import clone, concatenate, with_array, Softmax, Logistic, uniqued
-from thinc.api import zero_init, glorot_uniform_init
+from thinc.api import zero_init
 
 
 def build_text_classifier(arch, config):
@@ -33,10 +33,7 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes, **cfg
             output_layer = Softmax(nO=nr_class, nI=tok2vec.get_dim("nO"))
         else:
             # TODO: experiment with init_w=zero_init
-            output_layer = (
-                Linear(nO=nr_class, nI=tok2vec.get_dim("nO"))
-                >> Logistic()
-            )
+            output_layer = Linear(nO=nr_class, nI=tok2vec.get_dim("nO")) >> Logistic()
         model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
     model.set_ref("tok2vec", tok2vec)
     model.set_dim("nO", nr_class)
@@ -149,13 +146,21 @@ def Tok2Vec(
     with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
         norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=0.0)
         if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0)
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0)
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0)
+            prefix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0
+            )
+            suffix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0
+            )
+            shape = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0
+            )
         else:
             prefix, suffix, shape = (None, None, None)
         if pretrained_vectors is not None:
-            glove = StaticVectors(vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0)
+            glove = StaticVectors(
+                vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0
+            )
 
             if subword_features:
                 embed = uniqued(
diff --git a/spacy/ml/extract_ngrams.py b/spacy/ml/extract_ngrams.py
index 1ec5b5fc1..d4195b9a4 100644
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@@ -1,5 +1,5 @@
 import numpy
-from thinc.model import Model
+from thinc.api import Model
 
 from ..attrs import LOWER
 
@@ -26,9 +26,7 @@ def forward(self, docs, is_train: bool):
     # The dtype here matches what thinc is expecting -- which differs per
     # platform (by int definition). This should be fixed once the problem
     # is fixed on Thinc's side.
-    lengths = self.ops.asarray(
-        [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
-    )
+    lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
     batch_keys = self.ops.xp.concatenate(batch_keys)
     batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
 
@@ -36,4 +34,3 @@ def forward(self, docs, is_train: bool):
         return dY
 
     return (batch_keys, batch_vals, lengths), backprop
-
diff --git a/spacy/ml/tok2vec.py b/spacy/ml/tok2vec.py
index 102b88604..5e51bc47a 100644
--- a/spacy/ml/tok2vec.py
+++ b/spacy/ml/tok2vec.py
@@ -1,11 +1,8 @@
-from thinc.layers import chain, clone, concatenate, with_array, uniqued
-from thinc.model import Model
-from thinc.layers import noop, with_padded
-from thinc.layers import Maxout, expand_window
-from thinc.layers import HashEmbed, StaticVectors
-from thinc.layers import residual, LayerNorm, FeatureExtractor
+from thinc.api import Model, chain, clone, concatenate, with_array, uniqued, noop
+from thinc.api import with_padded, Maxout, expand_window, HashEmbed, StaticVectors
+from thinc.api import residual, LayerNorm, FeatureExtractor
 
-from spacy.ml import _character_embed
+from ..ml import _character_embed
 from ..util import make_layer, registry
 
 
@@ -93,8 +90,10 @@ def MaxoutWindowEncoder(config):
     nW = config["window_size"]
     nP = config["pieces"]
     depth = config["depth"]
-
-    cnn = expand_window(window_size=nW), Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True)
+    cnn = (
+        expand_window(window_size=nW),
+        Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True),
+    )
     model = clone(residual(cnn), depth)
     model.set_dim("nO", nO)
     model.attrs["receptive_field"] = nW * depth
@@ -103,13 +102,16 @@ def MaxoutWindowEncoder(config):
 
 @registry.architectures.register("spacy.MishWindowEncoder.v1")
 def MishWindowEncoder(config):
-    from thinc.layers import Mish
+    from thinc.api import Mish
 
     nO = config["width"]
     nW = config["window_size"]
     depth = config["depth"]
-
-    cnn = chain(expand_window(window_size=nW), Mish(nO=nO, nI=nO * ((nW * 2) + 1)), LayerNorm(nO))
+    cnn = chain(
+        expand_window(window_size=nW),
+        Mish(nO=nO, nI=nO * ((nW * 2) + 1)),
+        LayerNorm(nO),
+    )
     model = clone(residual(cnn), depth)
     model.set_dim("nO", nO)
     return model
@@ -118,14 +120,20 @@ def MishWindowEncoder(config):
 @registry.architectures.register("spacy.PretrainedVectors.v1")
 def PretrainedVectors(config):
     # TODO: actual vectors instead of name
-    return StaticVectors(vectors=config["vectors_name"], nO=config["width"], column=config["column"], dropout=0.0)
+    return StaticVectors(
+        vectors=config["vectors_name"],
+        nO=config["width"],
+        column=config["column"],
+        dropout=0.0,
+    )
 
 
 @registry.architectures.register("spacy.TorchBiLSTMEncoder.v1")
 def TorchBiLSTMEncoder(config):
     import torch.nn
-    # TODO FIX
-    from thinc.layers import PyTorchRNNWrapper
+
+    # TODO: FIX
+    from thinc.api import PyTorchRNNWrapper
 
     width = config["width"]
     depth = config["depth"]
diff --git a/spacy/pipeline/hooks.py b/spacy/pipeline/hooks.py
index 00c328e81..d48b04bd1 100644
--- a/spacy/pipeline/hooks.py
+++ b/spacy/pipeline/hooks.py
@@ -1,4 +1,4 @@
-from thinc.layers import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity
+from thinc.api import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity
 
 from .pipes import Pipe
 from ..language import component
@@ -63,8 +63,7 @@ class SimilarityHook(Pipe):
     @classmethod
     def Model(cls, length):
         return siamese(
-            concatenate(reduce_max(), reduce_mean()),
-            CauchySimilarity(length * 2)
+            concatenate(reduce_max(), reduce_mean()), CauchySimilarity(length * 2)
         )
 
     def __call__(self, doc):
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 7b9e4b04e..999132b35 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -3,8 +3,8 @@ from collections import defaultdict
 import numpy
 cimport numpy as np
 
-from thinc.layers import chain, list2array
-from thinc.util import to_categorical, copy_array, get_array_module
+from thinc.api import chain, list2array, to_categorical, get_array_module
+from thinc.util import copy_array
 
 from .. import util
 from .pipes import Pipe
diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx
index c77281b2c..ad75d2e78 100644
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@@ -3,11 +3,9 @@
 import numpy
 import srsly
 import random
-from thinc.layers import chain, Linear, Maxout, Softmax, LayerNorm, list2array
-from thinc.initializers import zero_init
-from thinc.loss import CosineDistance
-from thinc.util import to_categorical, get_array_module
-from thinc.model import set_dropout_rate
+from thinc.api import chain, Linear, Maxout, Softmax, LayerNorm, list2array
+from thinc.api import zero_init, CosineDistance, to_categorical, get_array_module
+from thinc.api import set_dropout_rate
 
 from ..tokens.doc cimport Doc
 from ..syntax.nn_parser cimport Parser
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 9857c87eb..8290468cf 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -1,3 +1,5 @@
+from thinc.api import Model, set_dropout_rate
+
 from .pipes import Pipe
 from ..gold import Example
 from ..tokens import Doc
@@ -5,8 +7,6 @@ from ..vocab import Vocab
 from ..language import component
 from ..util import link_vectors_to_models, minibatch, registry, eg2doc
 
-from thinc.model import Model, set_dropout_rate
-
 
 @component("tok2vec", assigns=["doc.tensor"])
 class Tok2Vec(Pipe):
@@ -39,7 +39,9 @@ class Tok2Vec(Pipe):
         self.listeners = []
 
     def create_listener(self):
-        listener = Tok2VecListener(upstream_name="tok2vec", width=self.model.get_dim("nO"))
+        listener = Tok2VecListener(
+            upstream_name="tok2vec", width=self.model.get_dim("nO")
+        )
         self.listeners.append(listener)
 
     def add_listener(self, listener):
@@ -112,10 +114,10 @@ class Tok2Vec(Pipe):
             docs = [docs]
         set_dropout_rate(self.model, drop)
         tokvecs, bp_tokvecs = self.model.begin_update(docs)
-        
+
         def capture_losses(d_tokvecs):
             """Accumulate tok2vec loss before doing backprop."""
-            l2_loss = sum((d_t2v**2).sum() for d_t2v in d_tokvecs)
+            l2_loss = sum((d_t2v ** 2).sum() for d_t2v in d_tokvecs)
             if self.name in losses:
                 losses[self.name] += l2_loss / len(d_tokvecs)
             else:
@@ -133,7 +135,9 @@ class Tok2Vec(Pipe):
     def get_loss(self, docs, golds, scores):
         pass
 
-    def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs):
+    def begin_training(
+        self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs
+    ):
         """Allocate models and pre-process training data
 
         get_examples (function): Function returning example training data.
@@ -151,6 +155,7 @@ class Tok2VecListener(Model):
     """A layer that gets fed its answers from an upstream connection,
     for instance from a component earlier in the pipeline.
     """
+
     name = "tok2vec-listener"
 
     def __init__(self, upstream_name, width):
diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx
index cb8e1d127..442233f19 100644
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@@ -11,9 +11,7 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free, realloc
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
-from thinc.layers import Linear
-from thinc.model import Model
-from thinc.backends import CupyOps, NumpyOps, use_ops
+from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops
 from thinc.backends.linalg cimport Vec, VecVec
 cimport blis.cy
 
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 8e55d3873..cf57e1cf6 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -1,11 +1,8 @@
 # cython: infer_types=True
 # cython: cdivision=True
 # cython: boundscheck=False
-import numpy
 cimport cython.parallel
-import numpy.random
 cimport numpy as np
-from itertools import islice
 from cpython.ref cimport PyObject, Py_XDECREF
 from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
 from libc.math cimport exp
@@ -14,15 +11,16 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
-from thinc.layers import chain, clone, Linear, list2array
-from thinc.backends import NumpyOps, CupyOps, use_ops
-from thinc.util import get_array_module
 from thinc.backends.linalg cimport Vec, VecVec
-from thinc.initializers import zero_init
-from thinc.model import set_dropout_rate
-import srsly
 
-from spacy.gold import Example
+from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops
+from thinc.api import get_array_module, zero_init, set_dropout_rate
+from itertools import islice
+import srsly
+import numpy.random
+import numpy
+
+from ..gold import Example
 from ..typedefs cimport weight_t, class_t, hash_t
 from ._parser_model cimport alloc_activations, free_activations
 from ._parser_model cimport predict_states, arg_max_if_valid
diff --git a/spacy/syntax/nonproj.pyx b/spacy/syntax/nonproj.pyx
index f024c1f05..27516ffd9 100644
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@@ -6,7 +6,7 @@ scheme.
 """
 from copy import copy
 
-from spacy.gold import Example
+from ..gold import Example
 from ..tokens.doc cimport Doc, set_children_from_heads
 from ..errors import Errors
 
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index b7627b175..4323bb736 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -1,4 +1,3 @@
-
 import pytest
 import numpy
 from spacy.tokens import Doc, Span
@@ -274,7 +273,19 @@ def test_doc_is_nered(en_vocab):
 def test_doc_from_array_sent_starts(en_vocab):
     words = ["I", "live", "in", "New", "York", ".", "I", "like", "cats", "."]
     heads = [0, 0, 0, 0, 0, 0, 6, 6, 6, 6]
-    deps = ["ROOT", "dep", "dep", "dep", "dep", "dep", "ROOT", "dep", "dep", "dep", "dep"]
+    deps = [
+        "ROOT",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+        "ROOT",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+    ]
     doc = Doc(en_vocab, words=words)
     for i, (dep, head) in enumerate(zip(deps, heads)):
         doc[i].dep_ = dep
diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py
index 82fb549ba..221b6f683 100644
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@@ -29,7 +29,9 @@ def test_morph_props(i_has):
 
 def test_morph_iter(i_has):
     assert set(i_has[0].morph) == set(["PronType=prs"])
-    assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"])
+    assert set(i_has[1].morph) == set(
+        ["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]
+    )
 
 
 def test_morph_get(i_has):
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index 17bcd2c64..5e564d1f2 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -8,7 +8,12 @@ from ..util import get_doc
 
 def test_doc_retokenize_merge(en_tokenizer):
     text = "WKRO played songs by the beach boys all night"
-    attrs = {"tag": "NAMED", "lemma": "LEMMA", "ent_type": "TYPE", "morph": "Number=Plur"}
+    attrs = {
+        "tag": "NAMED",
+        "lemma": "LEMMA",
+        "ent_type": "TYPE",
+        "morph": "Number=Plur",
+    }
     doc = en_tokenizer(text)
     assert len(doc) == 9
     with doc.retokenize() as retokenizer:
diff --git a/spacy/tests/lang/ar/test_text.py b/spacy/tests/lang/ar/test_text.py
index f4a8cc1e3..c5ab376f1 100644
--- a/spacy/tests/lang/ar/test_text.py
+++ b/spacy/tests/lang/ar/test_text.py
@@ -1,4 +1,3 @@
-
 def test_ar_tokenizer_handles_long_text(ar_tokenizer):
     text = """نجيب محفوظ مؤلف و كاتب روائي عربي، يعد من أهم الأدباء العرب خلال القرن العشرين.
      ولد نجيب محفوظ في مدينة القاهرة، حيث ترعرع و تلقى تعليمه الجامعي في جامعتها،
diff --git a/spacy/tests/lang/en/test_indices.py b/spacy/tests/lang/en/test_indices.py
index d50c75fc5..93daeec30 100644
--- a/spacy/tests/lang/en/test_indices.py
+++ b/spacy/tests/lang/en/test_indices.py
@@ -1,4 +1,3 @@
-
 def test_en_simple_punct(en_tokenizer):
     text = "to walk, do foo"
     tokens = en_tokenizer(text)
diff --git a/spacy/tests/lang/fi/test_tokenizer.py b/spacy/tests/lang/fi/test_tokenizer.py
index 02aa63207..bcd62f239 100644
--- a/spacy/tests/lang/fi/test_tokenizer.py
+++ b/spacy/tests/lang/fi/test_tokenizer.py
@@ -19,16 +19,10 @@ HYPHENATED_TESTS = [
 ABBREVIATION_INFLECTION_TESTS = [
     (
         "VTT:ssa ennen v:ta 2010 suoritetut mittaukset",
-        ["VTT:ssa", "ennen", "v:ta", "2010", "suoritetut", "mittaukset"]
+        ["VTT:ssa", "ennen", "v:ta", "2010", "suoritetut", "mittaukset"],
     ),
-    (
-        "ALV:n osuus on 24 %.",
-        ["ALV:n", "osuus", "on", "24", "%", "."]
-    ),
-    (
-        "Hiihtäjä oli kilpailun 14:s.",
-        ["Hiihtäjä", "oli", "kilpailun", "14:s", "."]
-    )
+    ("ALV:n osuus on 24 %.", ["ALV:n", "osuus", "on", "24", "%", "."]),
+    ("Hiihtäjä oli kilpailun 14:s.", ["Hiihtäjä", "oli", "kilpailun", "14:s", "."]),
 ]
 
 
diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py
index d0d8c2268..fd3acd0a0 100644
--- a/spacy/tests/lang/hu/test_tokenizer.py
+++ b/spacy/tests/lang/hu/test_tokenizer.py
@@ -294,12 +294,7 @@ WIKI_TESTS = [
 ]
 
 EXTRA_TESTS = (
-    DOT_TESTS
-    + QUOTE_TESTS
-    + NUMBER_TESTS
-    + HYPHEN_TESTS
-    + WIKI_TESTS
-    + TYPO_TESTS
+    DOT_TESTS + QUOTE_TESTS + NUMBER_TESTS + HYPHEN_TESTS + WIKI_TESTS + TYPO_TESTS
 )
 
 # normal: default tests + 10% of extra tests
@@ -308,7 +303,14 @@ TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0])
 
 # slow: remaining 90% of extra tests
 SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0]
-TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS])
+TESTS.extend(
+    [
+        pytest.param(x[0], x[1], marks=pytest.mark.slow())
+        if not isinstance(x[0], tuple)
+        else x
+        for x in SLOW_TESTS
+    ]
+)
 
 
 @pytest.mark.parametrize("text,expected_tokens", TESTS)
diff --git a/spacy/tests/lang/sv/test_text.py b/spacy/tests/lang/sv/test_text.py
index dc4911ab6..1e26c45bc 100644
--- a/spacy/tests/lang/sv/test_text.py
+++ b/spacy/tests/lang/sv/test_text.py
@@ -1,4 +1,3 @@
-
 def test_sv_tokenizer_handles_long_text(sv_tokenizer):
     text = """Det var så härligt ute på landet. Det var sommar, majsen var gul, havren grön,
 höet var uppställt i stackar nere vid den gröna ängen, och där gick storken på sina långa,
diff --git a/spacy/tests/lang/zh/test_text.py b/spacy/tests/lang/zh/test_text.py
index d48feaee5..d9a65732e 100644
--- a/spacy/tests/lang/zh/test_text.py
+++ b/spacy/tests/lang/zh/test_text.py
@@ -1,4 +1,3 @@
-
 import pytest
 
 
diff --git a/spacy/tests/morphology/test_morph_converters.py b/spacy/tests/morphology/test_morph_converters.py
index 3bff4f924..9486cad45 100644
--- a/spacy/tests/morphology/test_morph_converters.py
+++ b/spacy/tests/morphology/test_morph_converters.py
@@ -1,4 +1,3 @@
-import pytest
 from spacy.morphology import Morphology
 
 
diff --git a/spacy/tests/morphology/test_morph_features.py b/spacy/tests/morphology/test_morph_features.py
index 0d8d7dea9..f644a5867 100644
--- a/spacy/tests/morphology/test_morph_features.py
+++ b/spacy/tests/morphology/test_morph_features.py
@@ -24,13 +24,20 @@ def test_add_morphology_with_int_ids(morphology):
     morphology.strings.add("gen")
     morphology.strings.add("Number")
     morphology.strings.add("sing")
-    morphology.add({get_string_id("Case"): get_string_id("gen"), get_string_id("Number"): get_string_id("sing")})
+    morphology.add(
+        {
+            get_string_id("Case"): get_string_id("gen"),
+            get_string_id("Number"): get_string_id("sing"),
+        }
+    )
 
 
 def test_add_morphology_with_mix_strings_and_ints(morphology):
     morphology.strings.add("PunctSide")
     morphology.strings.add("ini")
-    morphology.add({get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"})
+    morphology.add(
+        {get_string_id("PunctSide"): get_string_id("ini"), "VerbType": "aux"}
+    )
 
 
 def test_morphology_tags_hash_distinctly(morphology):
diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py
index 25892ac71..fe847a6ae 100644
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@@ -1,6 +1,5 @@
 import pytest
-from thinc.optimizers import Adam
-from thinc.backends import NumpyOps
+from thinc.api import Adam, NumpyOps
 from spacy.attrs import NORM
 from spacy.gold import GoldParse
 from spacy.vocab import Vocab
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 8dda1f406..9a4d21a8d 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -10,7 +10,7 @@ from spacy.tokens import Doc
 TRAIN_DATA = [
     ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
     ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
-    ]
+]
 
 
 @pytest.fixture
diff --git a/spacy/tests/parser/test_preset_sbd.py b/spacy/tests/parser/test_preset_sbd.py
index 5e56442b5..c6c1240a8 100644
--- a/spacy/tests/parser/test_preset_sbd.py
+++ b/spacy/tests/parser/test_preset_sbd.py
@@ -1,6 +1,5 @@
 import pytest
-from thinc.optimizers import Adam
-from thinc.backends import NumpyOps
+from thinc.api import Adam
 from spacy.attrs import NORM
 from spacy.gold import GoldParse
 from spacy.vocab import Vocab
diff --git a/spacy/tests/pipeline/test_entity_ruler.py b/spacy/tests/pipeline/test_entity_ruler.py
index 234603e94..b04569e22 100644
--- a/spacy/tests/pipeline/test_entity_ruler.py
+++ b/spacy/tests/pipeline/test_entity_ruler.py
@@ -149,10 +149,5 @@ def test_entity_ruler_validate(nlp):
 
 def test_entity_ruler_properties(nlp, patterns):
     ruler = EntityRuler(nlp, patterns=patterns, overwrite_ents=True)
-    assert sorted(ruler.labels) == sorted([
-        "HELLO",
-        "BYE",
-        "COMPLEX",
-        "TECH_ORG"
-    ])
+    assert sorted(ruler.labels) == sorted(["HELLO", "BYE", "COMPLEX", "TECH_ORG"])
     assert sorted(ruler.ent_ids) == ["a1", "a2"]
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 6a6ec8665..366cd4f1a 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,5 +1,4 @@
 import pytest
-import srsly
 from spacy.language import Language
 
 
diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py
index d9e1d663a..2bfdbd7c3 100644
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@@ -270,7 +270,9 @@ def test_issue1963(en_tokenizer):
 def test_issue1967(label):
     ner = EntityRecognizer(Vocab())
     example = Example(doc=None)
-    example.set_token_annotation(ids=[0], words=["word"], tags=["tag"], heads=[0], deps=["dep"], entities=[label])
+    example.set_token_annotation(
+        ids=[0], words=["word"], tags=["tag"], heads=[0], deps=["dep"], entities=[label]
+    )
     ner.moves.get_actions(gold_parses=[example])
 
 
diff --git a/spacy/tests/regression/test_issue3611.py b/spacy/tests/regression/test_issue3611.py
index fca884356..120cea1d2 100644
--- a/spacy/tests/regression/test_issue3611.py
+++ b/spacy/tests/regression/test_issue3611.py
@@ -39,8 +39,5 @@ def test_issue3611():
 
             for batch in batches:
                 nlp.update(
-                    examples=batch,
-                    sgd=optimizer,
-                    drop=0.1,
-                    losses=losses,
+                    examples=batch, sgd=optimizer, drop=0.1, losses=losses,
                 )
diff --git a/spacy/tests/regression/test_issue4030.py b/spacy/tests/regression/test_issue4030.py
index 7153594db..7158d9b21 100644
--- a/spacy/tests/regression/test_issue4030.py
+++ b/spacy/tests/regression/test_issue4030.py
@@ -39,10 +39,7 @@ def test_issue4030():
 
             for batch in batches:
                 nlp.update(
-                    examples=batch,
-                    sgd=optimizer,
-                    drop=0.1,
-                    losses=losses,
+                    examples=batch, sgd=optimizer, drop=0.1, losses=losses,
                 )
 
     # processing of an empty doc should result in 0.0 for all categories
diff --git a/spacy/tests/test_architectures.py b/spacy/tests/test_architectures.py
index 786e2cedf..31b2a2d2f 100644
--- a/spacy/tests/test_architectures.py
+++ b/spacy/tests/test_architectures.py
@@ -1,6 +1,6 @@
 import pytest
 from spacy import registry
-from thinc.layers import Linear
+from thinc.api import Linear
 from catalogue import RegistryError
 
 
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 049858960..306adc881 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -65,8 +65,9 @@ def test_cli_converters_conllu2json_subtokens():
         "5\t.\t$.\tPUNCT\t_\t_\t4\tpunct\t_\tname=O",
     ]
     input_data = "\n".join(lines)
-    converted = conllu2json(input_data, n_sents=1, merge_subtokens=True,
-                            append_morphology=True)
+    converted = conllu2json(
+        input_data, n_sents=1, merge_subtokens=True, append_morphology=True
+    )
     assert len(converted) == 1
     assert converted[0]["id"] == 0
     assert len(converted[0]["paragraphs"]) == 1
@@ -81,11 +82,16 @@ def test_cli_converters_conllu2json_subtokens():
         "NOUN__Definite=Ind|Gender=Masc|Number=Sing",
         "PROPN_X__Gender=Fem,Masc|Tense=past",
         "VERB__Mood=Ind|Tense=Pres|VerbForm=Fin",
-        "PUNCT"
+        "PUNCT",
     ]
-    assert [t["pos"] for t in tokens] == ['NOUN', 'PROPN', 'VERB', 'PUNCT']
-    assert [t["morph"] for t in tokens] == ['Definite=Ind|Gender=Masc|Number=Sing', 'Gender=Fem,Masc|Tense=past', 'Mood=Ind|Tense=Pres|VerbForm=Fin', '']
-    assert [t["lemma"] for t in tokens] == ['dommer', 'Finn Eilertsen', 'avstå', '$.']
+    assert [t["pos"] for t in tokens] == ["NOUN", "PROPN", "VERB", "PUNCT"]
+    assert [t["morph"] for t in tokens] == [
+        "Definite=Ind|Gender=Masc|Number=Sing",
+        "Gender=Fem,Masc|Tense=past",
+        "Mood=Ind|Tense=Pres|VerbForm=Fin",
+        "",
+    ]
+    assert [t["lemma"] for t in tokens] == ["dommer", "Finn Eilertsen", "avstå", "$."]
     assert [t["head"] for t in tokens] == [1, 1, 0, -1]
     assert [t["dep"] for t in tokens] == ["appos", "nsubj", "ROOT", "punct"]
     assert [t["ner"] for t in tokens] == ["O", "U-PER", "O", "O"]
diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/tokenizer/test_exceptions.py
index 8276d7aea..9a98e049e 100644
--- a/spacy/tests/tokenizer/test_exceptions.py
+++ b/spacy/tests/tokenizer/test_exceptions.py
@@ -4,7 +4,9 @@ import pytest
 
 def test_tokenizer_handles_emoticons(tokenizer):
     # Tweebo challenge (CMU)
-    text = """:o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| :> ...."""
+    text = (
+        """:o :/ :'( >:o (: :) >.< XD -__- o.O ;D :-) @_@ :P 8D :1 >:( :D =| :> ...."""
+    )
     tokens = tokenizer(text)
     assert tokens[0].text == ":o"
     assert tokens[1].text == ":/"
diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py
index 3dce1ae31..c035559b4 100644
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@@ -130,7 +130,19 @@ def test_tokenizer_special_cases_with_affixes(tokenizer):
     tokenizer.add_special_case("_SPECIAL_", [{"orth": "_SPECIAL_"}])
     tokenizer.add_special_case("A/B", [{"orth": "A/B"}])
     doc = tokenizer(text)
-    assert [token.text for token in doc] == ["(", "(", "(", "_SPECIAL_", "A/B", ",", "A/B", "-", "A/B", '"', ")"]
+    assert [token.text for token in doc] == [
+        "(",
+        "(",
+        "(",
+        "_SPECIAL_",
+        "A/B",
+        ",",
+        "A/B",
+        "-",
+        "A/B",
+        '"',
+        ")",
+    ]
 
 
 def test_tokenizer_special_cases_with_period(tokenizer):
diff --git a/spacy/tokens/_retokenize.pyx b/spacy/tokens/_retokenize.pyx
index ec7e8a9e8..337c154a2 100644
--- a/spacy/tokens/_retokenize.pyx
+++ b/spacy/tokens/_retokenize.pyx
@@ -4,8 +4,8 @@
 from libc.string cimport memcpy, memset
 from libc.stdlib cimport malloc, free
 from cymem.cymem cimport Pool
-from thinc.util import get_array_module
 
+from thinc.api import get_array_module
 import numpy
 
 from .doc cimport Doc, set_children_from_heads, token_by_start, token_by_end
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 4a18acd77..65b70d1b3 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -1,7 +1,7 @@
 import numpy
 import zlib
 import srsly
-from thinc.backends import NumpyOps
+from thinc.api import NumpyOps
 
 from ..compat import copy_reg
 from ..tokens import Doc
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 16ef5f966..54d92f8b1 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -11,7 +11,8 @@ import numpy
 import numpy.linalg
 import struct
 import srsly
-from thinc.util import get_array_module, copy_array
+from thinc.api import get_array_module
+from thinc.util import copy_array
 
 from .span cimport Span
 from .token cimport Token
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index 796a5e674..d6b50b5f4 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -4,7 +4,7 @@ from libc.math cimport sqrt
 
 import numpy
 import numpy.linalg
-from thinc.util import get_array_module
+from thinc.api import get_array_module
 from collections import defaultdict
 
 from .doc cimport token_by_start, token_by_end, get_token_attr, _get_lca_matrix
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index c241cd5ad..379da6c77 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -7,7 +7,7 @@ cimport numpy as np
 np.import_array()
 
 import numpy
-from thinc.util import get_array_module
+from thinc.api import get_array_module
 
 from ..typedefs cimport hash_t
 from ..lexeme cimport Lexeme
diff --git a/spacy/util.py b/spacy/util.py
index 0cc11cef7..995ff722f 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -5,13 +5,9 @@ import re
 from pathlib import Path
 import random
 from typing import List
-
 import thinc
 import thinc.config
-from thinc.backends import NumpyOps, get_current_ops
-from thinc.optimizers import Adam
-from thinc.util import require_gpu
-
+from thinc.api import NumpyOps, get_current_ops, Adam, require_gpu
 import functools
 import itertools
 import numpy.random
@@ -797,5 +793,13 @@ def create_default_optimizer():
     eps = env_opt("optimizer_eps", 1e-8)
     L2 = env_opt("L2_penalty", 1e-6)
     grad_clip = env_opt("grad_norm_clip", 1.0)
-    optimizer = Adam(learn_rate, L2=L2, beta1=beta1, beta2=beta2, eps=eps, ops=ops, grad_clip=grad_clip)
+    optimizer = Adam(
+        learn_rate,
+        L2=L2,
+        beta1=beta1,
+        beta2=beta2,
+        eps=eps,
+        ops=ops,
+        grad_clip=grad_clip,
+    )
     return optimizer
diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx
index f812acac4..0ade8b280 100644
--- a/spacy/vectors.pyx
+++ b/spacy/vectors.pyx
@@ -5,8 +5,7 @@ from libcpp.set cimport set as cppset
 import functools
 import numpy
 import srsly
-from thinc.util import get_array_module
-from thinc.backends import get_current_ops
+from thinc.api import get_array_module, get_current_ops
 
 from .strings cimport StringStore
 
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 3da9978c4..a1929559f 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -2,7 +2,7 @@
 from libc.string cimport memcpy
 
 import srsly
-from thinc.util import get_array_module
+from thinc.api import get_array_module
 
 from .lexeme cimport EMPTY_LEXEME
 from .lexeme cimport Lexeme