Tidy up and auto-format

2025-12-23 01:53:17 +03:00 · 2020-02-18 15:38:18 +01:00 · 2020-02-18 15:38:18 +01:00 · e3f40a6a0f
commit e3f40a6a0f
parent 1278161f47
127 changed files with 219 additions and 275 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -5,7 +5,7 @@ warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

 # These are imported as part of the API
-from thinc.util import prefer_gpu, require_gpu
+from thinc.api import prefer_gpu, require_gpu

 from . import pipeline
 from .cli.info import info as cli_info
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -4,7 +4,7 @@ from .link import link  # noqa: F401
 from .package import package  # noqa: F401
 from .profile import profile  # noqa: F401
 from .train import train  # noqa: F401
-from .train_from_config import train_from_config_cli # noqa: F401
+from .train_from_config import train_from_config_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -192,11 +192,7 @@ def debug_data(
            has_ws_ents_error = True

        if gold_train_data["punct_ents"]:
-            msg.warn(
-                "{} entity span(s) with punctuation".format(
-                    gold_train_data["punct_ents"]
-                )
-            )
+            msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
            has_punct_ents_warning = True

        for label in new_labels:
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -4,14 +4,12 @@ import time
 import re
 from collections import Counter
 from pathlib import Path
-from thinc.layers import Linear, Maxout
-from thinc.util import prefer_gpu
+from thinc.api import Linear, Maxout, chain, list2array, prefer_gpu
+from thinc.api import CosineDistance, L2Distance
 from wasabi import msg
 import srsly
-from thinc.layers import chain, list2array
-from thinc.loss import CosineDistance, L2Distance

-from spacy.gold import Example
+from ..gold import Example
 from ..errors import Errors
 from ..tokens import Doc
 from ..attrs import ID, HEAD
@ -85,7 +83,7 @@ def pretrain(
        )
    if not output_dir.exists():
        output_dir.mkdir()
-        msg.good("Created output directory: {}".format(output_dir))
+        msg.good(f"Created output directory: {output_dir}")
    srsly.write_json(output_dir / "config.json", config)
    msg.good("Saved settings to config.json")

--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -1,7 +1,7 @@
 import os
 import tqdm
 from pathlib import Path
-from thinc.backends import use_ops
+from thinc.api import use_ops
 from timeit import default_timer as timer
 import shutil
 import srsly
@ -89,7 +89,7 @@ def train(
        )
    if not output_path.exists():
        output_path.mkdir()
-        msg.good("Created output directory: {}".format(output_path))
+        msg.good(f"Created output directory: {output_path}")

    tag_map = {}
    if tag_map_path is not None:
@ -125,17 +125,17 @@ def train(
    msg.text(f"Training pipeline: {pipeline}")
    disabled_pipes = None
    pipes_added = False
-    msg.text("Training pipeline: {}".format(pipeline))
+    msg.text(f"Training pipeline: {pipeline}")
    if use_gpu >= 0:
        activated_gpu = None
        try:
            activated_gpu = set_gpu(use_gpu)
        except Exception as e:
-            msg.warn("Exception: {}".format(e))
+            msg.warn(f"Exception: {e}")
        if activated_gpu is not None:
-            msg.text("Using GPU: {}".format(use_gpu))
+            msg.text(f"Using GPU: {use_gpu}")
        else:
-            msg.warn("Unable to activate GPU: {}".format(use_gpu))
+            msg.warn(f"Unable to activate GPU: {use_gpu}")
            msg.text("Using CPU only")
            use_gpu = -1
    if base_model:
@ -158,11 +158,11 @@ def train(
                    "positive_label": textcat_positive_label,
                }
            if pipe not in nlp.pipe_names:
-                msg.text("Adding component to base model '{}'".format(pipe))
+                msg.text(f"Adding component to base model '{pipe}'")
                nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
                pipes_added = True
            elif replace_components:
-                msg.text("Replacing component from base model '{}'".format(pipe))
+                msg.text(f"Replacing component from base model '{pipe}'")
                nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg))
                pipes_added = True
            else:
@ -180,7 +180,7 @@ def train(
                            f"Existing cfg: {base_cfg}, provided cfg: {pipe_cfg}",
                            exits=1,
                        )
-                msg.text("Extending component from base model '{}'".format(pipe))
+                msg.text(f"Extending component from base model '{pipe}'")
        disabled_pipes = nlp.disable_pipes(
            [p for p in nlp.pipe_names if p not in pipeline]
        )
@ -377,7 +377,7 @@ def train(
                            msg.warn(
                                "Did you provide the same parameters during 'train' as during 'pretrain'?"
                            )
-                        msg.fail("Original error message: {}".format(e), exits=1)
+                        msg.fail(f"Original error message: {e}", exits=1)
                    if raw_text:
                        # If raw text is available, perform 'rehearsal' updates,
                        # which use unlabelled data to reduce overfitting.
@ -504,11 +504,7 @@ def train(
                        )
                        break
    except Exception as e:
-        msg.warn(
-            "Aborting and saving the final best model. Encountered exception: {}".format(
-                e
-            )
-        )
+        msg.warn(f"Aborting and saving final best model. Encountered exception: {e}")
    finally:
        best_pipes = nlp.pipe_names
        if disabled_pipes:
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -1,19 +1,20 @@
+from typing import Optional, Dict, List, Union, Sequence
 import plac
-from thinc.util import require_gpu
 from wasabi import msg
 from pathlib import Path
 import thinc
 import thinc.schedules
-from thinc.model import Model
-from spacy.gold import GoldCorpus
-import spacy
-from spacy.pipeline.tok2vec import Tok2VecListener
-from typing import Optional, Dict, List, Union, Sequence
+from thinc.api import Model
 from pydantic import BaseModel, FilePath, StrictInt
 import tqdm

-from ..ml import component_models
-from .. import util
+# TODO: relative imports?
+import spacy
+from spacy.gold import GoldCorpus
+from spacy.pipeline.tok2vec import Tok2VecListener
+from spacy.ml import component_models
+from spacy import util
+

 registry = util.registry

@ -153,10 +154,9 @@ def create_tb_parser_model(
    hidden_width: StrictInt = 64,
    maxout_pieces: StrictInt = 3,
 ):
-    from thinc.layers import Linear, chain, list2array
+    from thinc.api import Linear, chain, list2array, use_ops, zero_init
    from spacy.ml._layers import PrecomputableAffine
    from spacy.syntax._parser_model import ParserModel
-    from thinc.api import use_ops, zero_init

    token_vector_width = tok2vec.get_dim("nO")
    tok2vec = chain(tok2vec, list2array())
@ -221,13 +221,9 @@ def train_from_config_cli(


 def train_from_config(
-    config_path,
-    data_paths,
-    raw_text=None,
-    meta_path=None,
-    output_path=None,
+    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
 ):
-    msg.info("Loading config from: {}".format(config_path))
+    msg.info(f"Loading config from: {config_path}")
    config = util.load_from_config(config_path, create_objects=True)
    use_gpu = config["training"]["use_gpu"]
    if use_gpu >= 0:
@ -241,9 +237,7 @@ def train_from_config(
    msg.info("Loading training corpus")
    corpus = GoldCorpus(data_paths["train"], data_paths["dev"], limit=limit)
    msg.info("Initializing the nlp pipeline")
-    nlp.begin_training(
-        lambda: corpus.train_examples, device=use_gpu
-    )
+    nlp.begin_training(lambda: corpus.train_examples, device=use_gpu)

    train_batches = create_train_batches(nlp, corpus, config["training"])
    evaluate = create_evaluation_callback(nlp, optimizer, corpus, config["training"])
@ -260,7 +254,7 @@ def train_from_config(
        config["training"]["eval_frequency"],
    )

-    msg.info("Training. Initial learn rate: {}".format(optimizer.learn_rate))
+    msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}")
    print_row = setup_printer(config)

    try:
@ -414,7 +408,7 @@ def subdivide_batch(batch):
 def setup_printer(config):
    score_cols = config["training"]["scores"]
    score_widths = [max(len(col), 6) for col in score_cols]
-    loss_cols = ["Loss {}".format(pipe) for pipe in config["nlp"]["pipeline"]]
+    loss_cols = [f"Loss {pipe}" for pipe in config["nlp"]["pipeline"]]
    loss_widths = [max(len(col), 8) for col in loss_cols]
    table_header = ["#"] + loss_cols + score_cols + ["Score"]
    table_header = [col.upper() for col in table_header]
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -30,7 +30,7 @@ try:
 except ImportError:
    cupy = None

-from thinc.optimizers import Optimizer  # noqa: F401
+from thinc.api import Optimizer  # noqa: F401

 pickle = pickle
 copy_reg = copy_reg
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@ -1,4 +1,3 @@
-
 # Setting explicit height and max-width: none on the SVG is required for
 # Jupyter to render it properly in a cell

--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@ -1,4 +1,3 @@
-
 def explain(term):
    """Get a description for a given POS tag, dependency label or entity type.

--- a/spacy/gold.pxd
+++ b/spacy/gold.pxd
@ -1,6 +1,6 @@
 from cymem.cymem cimport Pool

-from spacy.tokens import Doc
+from .tokens import Doc
 from .typedefs cimport attr_t
 from .syntax.transition_system cimport Transition

@ -65,5 +65,3 @@ cdef class Example:
    cdef public TokenAnnotation token_annotation
    cdef public DocAnnotation doc_annotation
    cdef public object goldparse
-
-
--- a/spacy/kb.pxd
+++ b/spacy/kb.pxd
@ -6,7 +6,7 @@ from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport FILE

-from spacy.vocab cimport Vocab
+from .vocab cimport Vocab
 from .typedefs cimport hash_t

 from .structs cimport KBEntryC, AliasC
@ -169,4 +169,3 @@ cdef class Reader:
    cdef int read_alias(self, int64_t* entry_index, float* prob) except -1

    cdef int _read(self, void* value, size_t size) except -1
-
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-af

 STOP_WORDS = set(
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/Alir3z4/stop-words

 STOP_WORDS = set(
--- a/spacy/lang/bn/examples.py
+++ b/spacy/lang/bn/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত  অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
--- a/spacy/lang/ca/examples.py
+++ b/spacy/lang/ca/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/Alir3z4/stop-words

 STOP_WORDS = set(
--- a/spacy/lang/da/examples.py
+++ b/spacy/lang/da/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/de/examples.py
+++ b/spacy/lang/de/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@ -1,4 +1,3 @@
-
 def get_pos_from_wiktionary():
    import re
    from gensim.corpora.wikicorpus import extract_pages
--- a/spacy/lang/el/norm_exceptions.py
+++ b/spacy/lang/el/norm_exceptions.py
@ -1,4 +1,3 @@
-
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Norms are only set if no alternative is provided in the tokenizer exceptions.

--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@ -1,4 +1,3 @@
-
 # Stop words
 # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
 STOP_WORDS = set(
--- a/spacy/lang/en/examples.py
+++ b/spacy/lang/en/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/en/norm_exceptions.py
+++ b/spacy/lang/en/norm_exceptions.py
@ -1,4 +1,3 @@
-
 _exc = {
    # Slang and abbreviations
    "cos": "because",
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@ -1,4 +1,3 @@
-
 # Stop words
 STOP_WORDS = set(
    """
--- a/spacy/lang/es/examples.py
+++ b/spacy/lang/es/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 actualmente acuerdo adelante ademas además adrede afirmó agregó ahi ahora ahí
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-et

 STOP_WORDS = set(
--- a/spacy/lang/fa/examples.py
+++ b/spacy/lang/fa/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@ -1,4 +1,3 @@
-
 verb_roots = """
 #هست
 آخت#آهنج
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@ -1,4 +1,3 @@
-
 # Stop words from HAZM package
 STOP_WORDS = set(
    """
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@ -1,4 +1,3 @@
-
 # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
 # Reformatted with some minor corrections
 STOP_WORDS = set(
--- a/spacy/lang/fr/examples.py
+++ b/spacy/lang/fr/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 a à â abord absolument afin ah ai aie ailleurs ainsi ait allaient allo allons
--- a/spacy/lang/ga/irish_morphology_helpers.py
+++ b/spacy/lang/ga/irish_morphology_helpers.py
@ -1,4 +1,3 @@
-
 # fmt: off
 consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "z"]
 broad_vowels = ["a", "á", "o", "ó", "u", "ú"]
--- a/spacy/lang/he/examples.py
+++ b/spacy/lang/he/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/hi/examples.py
+++ b/spacy/lang/hi/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6

 STOP_WORDS = set(
--- a/spacy/lang/hu/examples.py
+++ b/spacy/lang/hu/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
--- a/spacy/lang/id/examples.py
+++ b/spacy/lang/id/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/Xangis/extra-stopwords

 STOP_WORDS = set(
--- a/spacy/lang/it/examples.py
+++ b/spacy/lang/it/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
--- a/spacy/lang/ja/examples.py
+++ b/spacy/lang/ja/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 ಹಲವು
--- a/spacy/lang/lt/examples.py
+++ b/spacy/lang/lt/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-lv

 STOP_WORDS = set(
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
 STOP_WORDS = set(
    """
--- a/spacy/lang/nb/examples.py
+++ b/spacy/lang/nb/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/nl/examples.py
+++ b/spacy/lang/nl/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/norm_exceptions.py
+++ b/spacy/lang/norm_exceptions.py
@ -1,4 +1,3 @@
-
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Individual languages can also add their own exceptions and overwrite them -
 # for example, British vs. American spelling in English.
--- a/spacy/lang/pl/examples.py
+++ b/spacy/lang/pl/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/pt/examples.py
+++ b/spacy/lang/pt/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
--- a/spacy/lang/ro/examples.py
+++ b/spacy/lang/ro/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/ru/examples.py
+++ b/spacy/lang/ru/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/ru/norm_exceptions.py
+++ b/spacy/lang/ru/norm_exceptions.py
@ -1,4 +1,3 @@
-
 _exc = {
    # Slang
    "прив": "привет",
--- a/spacy/lang/si/examples.py
+++ b/spacy/lang/si/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/si/stop_words.py
+++ b/spacy/lang/si/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 අතර
--- a/spacy/lang/sk/stop_words.py
+++ b/spacy/lang/sk/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/Ardevop-sk/stopwords-sk

 STOP_WORDS = set(
--- a/spacy/lang/sl/stop_words.py
+++ b/spacy/lang/sl/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/stopwords-iso/stopwords-sl
 # TODO: probably needs to be tidied up – the list seems to have month names in
 # it, which shouldn't be considered stop words.
--- a/spacy/lang/sq/examples.py
+++ b/spacy/lang/sq/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/sq/stop_words.py
+++ b/spacy/lang/sq/stop_words.py
@ -1,4 +1,3 @@
-
 # Source: https://github.com/andrixh/index-albanian

 STOP_WORDS = set(
--- a/spacy/lang/sr/examples.py
+++ b/spacy/lang/sr/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/sr/norm_exceptions.py
+++ b/spacy/lang/sr/norm_exceptions.py
@ -1,4 +1,3 @@
-
 _exc = {
    # Slang
    "ћале": "отац",
--- a/spacy/lang/sr/stop_words.py
+++ b/spacy/lang/sr/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 а
--- a/spacy/lang/sv/examples.py
+++ b/spacy/lang/sv/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/sv/stop_words.py
+++ b/spacy/lang/sv/stop_words.py
@ -1,4 +1,3 @@
-
 STOP_WORDS = set(
    """
 aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras
--- a/spacy/lang/ta/examples.py
+++ b/spacy/lang/ta/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/ta/stop_words.py
+++ b/spacy/lang/ta/stop_words.py
@ -1,4 +1,3 @@
-
 # Stop words

 STOP_WORDS = set(
--- a/spacy/lang/te/examples.py
+++ b/spacy/lang/te/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/th/norm_exceptions.py
+++ b/spacy/lang/th/norm_exceptions.py
@ -1,4 +1,3 @@
-
 _exc = {
    # Conjugation and Diversion invalid to Tonal form (ผันอักษรและเสียงไม่ตรงกับรูปวรรณยุกต์)
    "สนุ๊กเกอร์": "สนุกเกอร์",
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@ -34,7 +34,7 @@ URL_PATTERN = (
    r"|"
    # host & domain names
    # mods: match is case-sensitive, so include [A-Z]
-      "(?:"
+      "(?:"  # noqa: E131
        "(?:"
          "[A-Za-z0-9\u00a1-\uffff]"
          "[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
--- a/spacy/lang/tr/examples.py
+++ b/spacy/lang/tr/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.tr.examples import sentences
--- a/spacy/lang/uk/examples.py
+++ b/spacy/lang/uk/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/ur/examples.py
+++ b/spacy/lang/ur/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/xx/init.py
+++ b/spacy/lang/xx/init.py
@ -1,4 +1,3 @@
-
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ..norm_exceptions import BASE_NORMS
 from ...language import Language
--- a/spacy/lang/xx/examples.py
+++ b/spacy/lang/xx/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/yo/examples.py
+++ b/spacy/lang/yo/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/lang/zh/examples.py
+++ b/spacy/lang/zh/examples.py
@ -1,4 +1,3 @@
-
 """
 Example sentences to test spaCy and its language models.

--- a/spacy/language.py
+++ b/spacy/language.py
@ -4,7 +4,7 @@ import weakref
 import functools
 from contextlib import contextmanager
 from copy import copy, deepcopy
-from thinc.backends import get_current_ops
+from thinc.api import get_current_ops
 import srsly
 import multiprocessing as mp
 from itertools import chain, cycle
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -6,7 +6,7 @@ cimport numpy as np
 np.import_array()

 import numpy
-from thinc.util import get_array_module
+from thinc.api import get_array_module

 from .typedefs cimport attr_t, flags_t
 from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@ -3,18 +3,20 @@ from thinc.api import Model

 def CharacterEmbed(nM, nC):
    # nM: Number of dimensions per character. nC: Number of characters.
-    nO = nM*nC if (nM is not None and nC is not None) else None
+    nO = nM * nC if (nM is not None and nC is not None) else None
    return Model(
        "charembed",
        forward,
        init=init,
        dims={"nM": nM, "nC": nC, "nO": nO, "nV": 256},
-        params={"E": None}
+        params={"E": None},
    ).initialize()


 def init(model, X=None, Y=None):
-    vectors_table = model.ops.alloc3f(model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM"))
+    vectors_table = model.ops.alloc3f(
+        model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM")
+    )
    model.set_param("E", vectors_table)


--- a/spacy/ml/_layers.py
+++ b/spacy/ml/_layers.py
@ -1,5 +1,4 @@
-from thinc.model import Model
-from thinc.api import normal_init
+from thinc.api import Model, normal_init


 def PrecomputableAffine(nO, nI, nF, nP):
@ -20,9 +19,7 @@ def forward(model, X, is_train):
    nP = model.get_dim("nP")
    nI = model.get_dim("nI")
    W = model.get_param("W")
-    Yf = model.ops.gemm(
-        X, W.reshape((nF * nO * nP, nI)), trans2=True
-    )
+    Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True)
    Yf = Yf.reshape((Yf.shape[0], nF, nO, nP))
    Yf = model.ops.xp.vstack((model.get_param("pad"), Yf))

@ -44,7 +41,7 @@ def forward(model, X, is_train):
        assert dY.ndim == 3
        assert dY.shape[1] == nO, dY.shape
        assert dY.shape[2] == nP, dY.shape
-        nB = dY.shape[0]
+        # nB = dY.shape[0]
        model.inc_grad("pad", _backprop_precomputable_affine_padding(model, dY, ids))
        Xf = X[ids]
        Xf = Xf.reshape((Xf.shape[0], nF * nI))
@ -118,7 +115,7 @@ def init(model, X=None, Y=None):
    pad = model.ops.alloc4f(1, nF, nO, nP)

    ops = model.ops
-    W = normal_init(ops, W.shape, fan_in=nF*nI)
+    W = normal_init(ops, W.shape, fan_in=nF * nI)
    model.set_param("W", W)
    model.set_param("b", b)
    model.set_param("pad", pad)
--- a/spacy/ml/component_models.py
+++ b/spacy/ml/component_models.py
@ -9,7 +9,7 @@ from thinc.api import Model, Maxout, Linear, residual, reduce_mean, list2ragged
 from thinc.api import PyTorchLSTM, add, MultiSoftmax, HashEmbed, StaticVectors
 from thinc.api import expand_window, FeatureExtractor, SparseLinear, chain
 from thinc.api import clone, concatenate, with_array, Softmax, Logistic, uniqued
-from thinc.api import zero_init, glorot_uniform_init
+from thinc.api import zero_init


 def build_text_classifier(arch, config):
@ -33,10 +33,7 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes, **cfg
            output_layer = Softmax(nO=nr_class, nI=tok2vec.get_dim("nO"))
        else:
            # TODO: experiment with init_w=zero_init
-            output_layer = (
-                Linear(nO=nr_class, nI=tok2vec.get_dim("nO"))
-                >> Logistic()
-            )
+            output_layer = Linear(nO=nr_class, nI=tok2vec.get_dim("nO")) >> Logistic()
        model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
    model.set_ref("tok2vec", tok2vec)
    model.set_dim("nO", nr_class)
@ -149,13 +146,21 @@ def Tok2Vec(
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=0.0)
        if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0)
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0)
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0)
+            prefix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0
+            )
+            suffix = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0
+            )
+            shape = HashEmbed(
+                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0
+            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
-            glove = StaticVectors(vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0)
+            glove = StaticVectors(
+                vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0
+            )

            if subword_features:
                embed = uniqued(
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@ -1,5 +1,5 @@
 import numpy
-from thinc.model import Model
+from thinc.api import Model

 from ..attrs import LOWER

@ -26,9 +26,7 @@ def forward(self, docs, is_train: bool):
    # The dtype here matches what thinc is expecting -- which differs per
    # platform (by int definition). This should be fixed once the problem
    # is fixed on Thinc's side.
-    lengths = self.ops.asarray(
-        [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
-    )
+    lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
    batch_keys = self.ops.xp.concatenate(batch_keys)
    batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")

@ -36,4 +34,3 @@ def forward(self, docs, is_train: bool):
        return dY

    return (batch_keys, batch_vals, lengths), backprop
-
--- a/spacy/ml/tok2vec.py
+++ b/spacy/ml/tok2vec.py
@ -1,11 +1,8 @@
-from thinc.layers import chain, clone, concatenate, with_array, uniqued
-from thinc.model import Model
-from thinc.layers import noop, with_padded
-from thinc.layers import Maxout, expand_window
-from thinc.layers import HashEmbed, StaticVectors
-from thinc.layers import residual, LayerNorm, FeatureExtractor
+from thinc.api import Model, chain, clone, concatenate, with_array, uniqued, noop
+from thinc.api import with_padded, Maxout, expand_window, HashEmbed, StaticVectors
+from thinc.api import residual, LayerNorm, FeatureExtractor

-from spacy.ml import _character_embed
+from ..ml import _character_embed
 from ..util import make_layer, registry


@ -93,8 +90,10 @@ def MaxoutWindowEncoder(config):
    nW = config["window_size"]
    nP = config["pieces"]
    depth = config["depth"]
-
-    cnn = expand_window(window_size=nW), Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True)
+    cnn = (
+        expand_window(window_size=nW),
+        Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True),
+    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", nO)
    model.attrs["receptive_field"] = nW * depth
@ -103,13 +102,16 @@ def MaxoutWindowEncoder(config):

@registry.architectures.register("spacy.MishWindowEncoder.v1")
 def MishWindowEncoder(config):
-    from thinc.layers import Mish
+    from thinc.api import Mish

    nO = config["width"]
    nW = config["window_size"]
    depth = config["depth"]
-
-    cnn = chain(expand_window(window_size=nW), Mish(nO=nO, nI=nO * ((nW * 2) + 1)), LayerNorm(nO))
+    cnn = chain(
+        expand_window(window_size=nW),
+        Mish(nO=nO, nI=nO * ((nW * 2) + 1)),
+        LayerNorm(nO),
+    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", nO)
    return model
@ -118,14 +120,20 @@ def MishWindowEncoder(config):
@registry.architectures.register("spacy.PretrainedVectors.v1")
 def PretrainedVectors(config):
    # TODO: actual vectors instead of name
-    return StaticVectors(vectors=config["vectors_name"], nO=config["width"], column=config["column"], dropout=0.0)
+    return StaticVectors(
+        vectors=config["vectors_name"],
+        nO=config["width"],
+        column=config["column"],
+        dropout=0.0,
+    )


@registry.architectures.register("spacy.TorchBiLSTMEncoder.v1")
 def TorchBiLSTMEncoder(config):
    import torch.nn
-    # TODO FIX
-    from thinc.layers import PyTorchRNNWrapper
+
+    # TODO: FIX
+    from thinc.api import PyTorchRNNWrapper

    width = config["width"]
    depth = config["depth"]
--- a/spacy/pipeline/hooks.py
+++ b/spacy/pipeline/hooks.py
@ -1,4 +1,4 @@
-from thinc.layers import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity
+from thinc.api import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity

 from .pipes import Pipe
 from ..language import component
@ -63,8 +63,7 @@ class SimilarityHook(Pipe):
    @classmethod
    def Model(cls, length):
        return siamese(
-            concatenate(reduce_max(), reduce_mean()),
-            CauchySimilarity(length * 2)
+            concatenate(reduce_max(), reduce_mean()), CauchySimilarity(length * 2)
        )

    def __call__(self, doc):
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -3,8 +3,8 @@ from collections import defaultdict
 import numpy
 cimport numpy as np

-from thinc.layers import chain, list2array
-from thinc.util import to_categorical, copy_array, get_array_module
+from thinc.api import chain, list2array, to_categorical, get_array_module
+from thinc.util import copy_array

 from .. import util
 from .pipes import Pipe
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -3,11 +3,9 @@
 import numpy
 import srsly
 import random
-from thinc.layers import chain, Linear, Maxout, Softmax, LayerNorm, list2array
-from thinc.initializers import zero_init
-from thinc.loss import CosineDistance
-from thinc.util import to_categorical, get_array_module
-from thinc.model import set_dropout_rate
+from thinc.api import chain, Linear, Maxout, Softmax, LayerNorm, list2array
+from thinc.api import zero_init, CosineDistance, to_categorical, get_array_module
+from thinc.api import set_dropout_rate

 from ..tokens.doc cimport Doc
 from ..syntax.nn_parser cimport Parser
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -1,3 +1,5 @@
+from thinc.api import Model, set_dropout_rate
+
 from .pipes import Pipe
 from ..gold import Example
 from ..tokens import Doc
@ -5,8 +7,6 @@ from ..vocab import Vocab
 from ..language import component
 from ..util import link_vectors_to_models, minibatch, registry, eg2doc

-from thinc.model import Model, set_dropout_rate
-

@component("tok2vec", assigns=["doc.tensor"])
 class Tok2Vec(Pipe):
@ -39,7 +39,9 @@ class Tok2Vec(Pipe):
        self.listeners = []

    def create_listener(self):
-        listener = Tok2VecListener(upstream_name="tok2vec", width=self.model.get_dim("nO"))
+        listener = Tok2VecListener(
+            upstream_name="tok2vec", width=self.model.get_dim("nO")
+        )
        self.listeners.append(listener)

    def add_listener(self, listener):
@ -115,7 +117,7 @@ class Tok2Vec(Pipe):

        def capture_losses(d_tokvecs):
            """Accumulate tok2vec loss before doing backprop."""
-            l2_loss = sum((d_t2v**2).sum() for d_t2v in d_tokvecs)
+            l2_loss = sum((d_t2v ** 2).sum() for d_t2v in d_tokvecs)
            if self.name in losses:
                losses[self.name] += l2_loss / len(d_tokvecs)
            else:
@ -133,7 +135,9 @@ class Tok2Vec(Pipe):
    def get_loss(self, docs, golds, scores):
        pass

-    def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs):
+    def begin_training(
+        self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs
+    ):
        """Allocate models and pre-process training data

        get_examples (function): Function returning example training data.
@ -151,6 +155,7 @@ class Tok2VecListener(Model):
    """A layer that gets fed its answers from an upstream connection,
    for instance from a component earlier in the pipeline.
    """
+
    name = "tok2vec-listener"

    def __init__(self, upstream_name, width):
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@ -11,9 +11,7 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free, realloc
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
-from thinc.layers import Linear
-from thinc.model import Model
-from thinc.backends import CupyOps, NumpyOps, use_ops
+from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops
 from thinc.backends.linalg cimport Vec, VecVec
 cimport blis.cy

--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -1,11 +1,8 @@
 # cython: infer_types=True
 # cython: cdivision=True
 # cython: boundscheck=False
-import numpy
 cimport cython.parallel
-import numpy.random
 cimport numpy as np
-from itertools import islice
 from cpython.ref cimport PyObject, Py_XDECREF
 from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
 from libc.math cimport exp
@ -14,15 +11,16 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
-from thinc.layers import chain, clone, Linear, list2array
-from thinc.backends import NumpyOps, CupyOps, use_ops
-from thinc.util import get_array_module
 from thinc.backends.linalg cimport Vec, VecVec
-from thinc.initializers import zero_init
-from thinc.model import set_dropout_rate
-import srsly

-from spacy.gold import Example
+from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops
+from thinc.api import get_array_module, zero_init, set_dropout_rate
+from itertools import islice
+import srsly
+import numpy.random
+import numpy
+
+from ..gold import Example
 from ..typedefs cimport weight_t, class_t, hash_t
 from ._parser_model cimport alloc_activations, free_activations
 from ._parser_model cimport predict_states, arg_max_if_valid
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@ -6,7 +6,7 @@ scheme.
 """
 from copy import copy

-from spacy.gold import Example
+from ..gold import Example
 from ..tokens.doc cimport Doc, set_children_from_heads
 from ..errors import Errors

--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -1,4 +1,3 @@
-
 import pytest
 import numpy
 from spacy.tokens import Doc, Span
@ -274,7 +273,19 @@ def test_doc_is_nered(en_vocab):
 def test_doc_from_array_sent_starts(en_vocab):
    words = ["I", "live", "in", "New", "York", ".", "I", "like", "cats", "."]
    heads = [0, 0, 0, 0, 0, 0, 6, 6, 6, 6]
-    deps = ["ROOT", "dep", "dep", "dep", "dep", "dep", "ROOT", "dep", "dep", "dep", "dep"]
+    deps = [
+        "ROOT",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+        "ROOT",
+        "dep",
+        "dep",
+        "dep",
+        "dep",
+    ]
    doc = Doc(en_vocab, words=words)
    for i, (dep, head) in enumerate(zip(deps, heads)):
        doc[i].dep_ = dep
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@ -29,7 +29,9 @@ def test_morph_props(i_has):

 def test_morph_iter(i_has):
    assert set(i_has[0].morph) == set(["PronType=prs"])
-    assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"])
+    assert set(i_has[1].morph) == set(
+        ["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]
+    )


 def test_morph_get(i_has):
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@ -8,7 +8,12 @@ from ..util import get_doc

 def test_doc_retokenize_merge(en_tokenizer):
    text = "WKRO played songs by the beach boys all night"
-    attrs = {"tag": "NAMED", "lemma": "LEMMA", "ent_type": "TYPE", "morph": "Number=Plur"}
+    attrs = {
+        "tag": "NAMED",
+        "lemma": "LEMMA",
+        "ent_type": "TYPE",
+        "morph": "Number=Plur",
+    }
    doc = en_tokenizer(text)
    assert len(doc) == 9
    with doc.retokenize() as retokenizer:
--- a/spacy/tests/lang/ar/test_text.py
+++ b/spacy/tests/lang/ar/test_text.py
@ -1,4 +1,3 @@
-
 def test_ar_tokenizer_handles_long_text(ar_tokenizer):
    text = """نجيب محفوظ مؤلف و كاتب روائي عربي، يعد من أهم الأدباء العرب خلال القرن العشرين.
     ولد نجيب محفوظ في مدينة القاهرة، حيث ترعرع و تلقى تعليمه الجامعي في جامعتها،
--- a/Show More
+++ b/Show More