Tidy up and auto-format

2025-12-22 17:43:13 +03:00 · 2020-02-18 15:38:18 +01:00 · 2020-02-18 15:38:18 +01:00 · e3f40a6a0f
commit e3f40a6a0f
parent 1278161f47
127 changed files with 219 additions and 275 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -5,7 +5,7 @@ warnings.filterwarnings("ignore", message="numpy.dtype size changed")
 warnings.filterwarnings("ignore", message="numpy.ufunc size changed")
 # These are imported as part of the API
-from thinc.util import prefer_gpu, require_gpu
+from thinc.api import prefer_gpu, require_gpu
 from . import pipeline
 from .cli.info import info as cli_info
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -4,7 +4,7 @@ from .link import link  # noqa: F401
 from .package import package  # noqa: F401
 from .profile import profile  # noqa: F401
 from .train import train  # noqa: F401
-from .train_from_config import train_from_config_cli # noqa: F401
+from .train_from_config import train_from_config_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -192,11 +192,7 @@ def debug_data(
            has_ws_ents_error = True
        if gold_train_data["punct_ents"]:
-            msg.warn(
+            msg.warn(f"{gold_train_data['punct_ents']} entity span(s) with punctuation")
                "{} entity span(s) with punctuation".format(
                    gold_train_data["punct_ents"]
                )
            )
            has_punct_ents_warning = True
        for label in new_labels:
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -4,14 +4,12 @@ import time
 import re
 from collections import Counter
 from pathlib import Path
-from thinc.layers import Linear, Maxout
+from thinc.api import Linear, Maxout, chain, list2array, prefer_gpu
-from thinc.util import prefer_gpu
+from thinc.api import CosineDistance, L2Distance
 from wasabi import msg
 import srsly
 from thinc.layers import chain, list2array
 from thinc.loss import CosineDistance, L2Distance
-from spacy.gold import Example
+from ..gold import Example
 from ..errors import Errors
 from ..tokens import Doc
 from ..attrs import ID, HEAD
@ -85,7 +83,7 @@ def pretrain(
        )
    if not output_dir.exists():
        output_dir.mkdir()
-        msg.good("Created output directory: {}".format(output_dir))
+        msg.good(f"Created output directory: {output_dir}")
    srsly.write_json(output_dir / "config.json", config)
    msg.good("Saved settings to config.json")
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -1,7 +1,7 @@
 import os
 import tqdm
 from pathlib import Path
-from thinc.backends import use_ops
+from thinc.api import use_ops
 from timeit import default_timer as timer
 import shutil
 import srsly
@ -89,7 +89,7 @@ def train(
        )
    if not output_path.exists():
        output_path.mkdir()
-        msg.good("Created output directory: {}".format(output_path))
+        msg.good(f"Created output directory: {output_path}")
    tag_map = {}
    if tag_map_path is not None:
@ -125,17 +125,17 @@ def train(
    msg.text(f"Training pipeline: {pipeline}")
    disabled_pipes = None
    pipes_added = False
-    msg.text("Training pipeline: {}".format(pipeline))
+    msg.text(f"Training pipeline: {pipeline}")
    if use_gpu >= 0:
        activated_gpu = None
        try:
            activated_gpu = set_gpu(use_gpu)
        except Exception as e:
-            msg.warn("Exception: {}".format(e))
+            msg.warn(f"Exception: {e}")
        if activated_gpu is not None:
-            msg.text("Using GPU: {}".format(use_gpu))
+            msg.text(f"Using GPU: {use_gpu}")
        else:
-            msg.warn("Unable to activate GPU: {}".format(use_gpu))
+            msg.warn(f"Unable to activate GPU: {use_gpu}")
            msg.text("Using CPU only")
            use_gpu = -1
    if base_model:
@ -158,11 +158,11 @@ def train(
                    "positive_label": textcat_positive_label,
                }
            if pipe not in nlp.pipe_names:
-                msg.text("Adding component to base model '{}'".format(pipe))
+                msg.text(f"Adding component to base model '{pipe}'")
                nlp.add_pipe(nlp.create_pipe(pipe, config=pipe_cfg))
                pipes_added = True
            elif replace_components:
-                msg.text("Replacing component from base model '{}'".format(pipe))
+                msg.text(f"Replacing component from base model '{pipe}'")
                nlp.replace_pipe(pipe, nlp.create_pipe(pipe, config=pipe_cfg))
                pipes_added = True
            else:
@ -180,7 +180,7 @@ def train(
                            f"Existing cfg: {base_cfg}, provided cfg: {pipe_cfg}",
                            exits=1,
                        )
-                msg.text("Extending component from base model '{}'".format(pipe))
+                msg.text(f"Extending component from base model '{pipe}'")
        disabled_pipes = nlp.disable_pipes(
            [p for p in nlp.pipe_names if p not in pipeline]
        )
@ -377,7 +377,7 @@ def train(
                            msg.warn(
                                "Did you provide the same parameters during 'train' as during 'pretrain'?"
                            )
-                        msg.fail("Original error message: {}".format(e), exits=1)
+                        msg.fail(f"Original error message: {e}", exits=1)
                    if raw_text:
                        # If raw text is available, perform 'rehearsal' updates,
                        # which use unlabelled data to reduce overfitting.
@ -504,11 +504,7 @@ def train(
                        )
                        break
    except Exception as e:
-        msg.warn(
+        msg.warn(f"Aborting and saving final best model. Encountered exception: {e}")
            "Aborting and saving the final best model. Encountered exception: {}".format(
                e
            )
        )
    finally:
        best_pipes = nlp.pipe_names
        if disabled_pipes:
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -1,19 +1,20 @@
 from typing import Optional, Dict, List, Union, Sequence
 import plac
 from thinc.util import require_gpu
 from wasabi import msg
 from pathlib import Path
 import thinc
 import thinc.schedules
-from thinc.model import Model
+from thinc.api import Model
 from spacy.gold import GoldCorpus
 import spacy
 from spacy.pipeline.tok2vec import Tok2VecListener
 from typing import Optional, Dict, List, Union, Sequence
 from pydantic import BaseModel, FilePath, StrictInt
 import tqdm
-from ..ml import component_models
+# TODO: relative imports?
-from .. import util
+import spacy
 from spacy.gold import GoldCorpus
 from spacy.pipeline.tok2vec import Tok2VecListener
 from spacy.ml import component_models
 from spacy import util
 registry = util.registry
@ -153,10 +154,9 @@ def create_tb_parser_model(
    hidden_width: StrictInt = 64,
    maxout_pieces: StrictInt = 3,
 ):
-    from thinc.layers import Linear, chain, list2array
+    from thinc.api import Linear, chain, list2array, use_ops, zero_init
    from spacy.ml._layers import PrecomputableAffine
    from spacy.syntax._parser_model import ParserModel
    from thinc.api import use_ops, zero_init
    token_vector_width = tok2vec.get_dim("nO")
    tok2vec = chain(tok2vec, list2array())
@ -221,13 +221,9 @@ def train_from_config_cli(
 def train_from_config(
-    config_path,
+    config_path, data_paths, raw_text=None, meta_path=None, output_path=None,
    data_paths,
    raw_text=None,
    meta_path=None,
    output_path=None,
 ):
-    msg.info("Loading config from: {}".format(config_path))
+    msg.info(f"Loading config from: {config_path}")
    config = util.load_from_config(config_path, create_objects=True)
    use_gpu = config["training"]["use_gpu"]
    if use_gpu >= 0:
@ -241,9 +237,7 @@ def train_from_config(
    msg.info("Loading training corpus")
    corpus = GoldCorpus(data_paths["train"], data_paths["dev"], limit=limit)
    msg.info("Initializing the nlp pipeline")
-    nlp.begin_training(
+    nlp.begin_training(lambda: corpus.train_examples, device=use_gpu)
        lambda: corpus.train_examples, device=use_gpu
    )
    train_batches = create_train_batches(nlp, corpus, config["training"])
    evaluate = create_evaluation_callback(nlp, optimizer, corpus, config["training"])
@ -260,7 +254,7 @@ def train_from_config(
        config["training"]["eval_frequency"],
    )
-    msg.info("Training. Initial learn rate: {}".format(optimizer.learn_rate))
+    msg.info(f"Training. Initial learn rate: {optimizer.learn_rate}")
    print_row = setup_printer(config)
    try:
@ -414,7 +408,7 @@ def subdivide_batch(batch):
 def setup_printer(config):
    score_cols = config["training"]["scores"]
    score_widths = [max(len(col), 6) for col in score_cols]
-    loss_cols = ["Loss {}".format(pipe) for pipe in config["nlp"]["pipeline"]]
+    loss_cols = [f"Loss {pipe}" for pipe in config["nlp"]["pipeline"]]
    loss_widths = [max(len(col), 8) for col in loss_cols]
    table_header = ["#"] + loss_cols + score_cols + ["Score"]
    table_header = [col.upper() for col in table_header]
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -30,7 +30,7 @@ try:
 except ImportError:
    cupy = None
-from thinc.optimizers import Optimizer  # noqa: F401
+from thinc.api import Optimizer  # noqa: F401
 pickle = pickle
 copy_reg = copy_reg
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@ -1,4 +1,3 @@
 # Setting explicit height and max-width: none on the SVG is required for
 # Jupyter to render it properly in a cell
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@ -1,4 +1,3 @@
 def explain(term):
    """Get a description for a given POS tag, dependency label or entity type.
--- a/spacy/gold.pxd
+++ b/spacy/gold.pxd
@ -1,6 +1,6 @@
 from cymem.cymem cimport Pool
-from spacy.tokens import Doc
+from .tokens import Doc
 from .typedefs cimport attr_t
 from .syntax.transition_system cimport Transition
@ -65,5 +65,3 @@ cdef class Example:
    cdef public TokenAnnotation token_annotation
    cdef public DocAnnotation doc_annotation
    cdef public object goldparse
--- a/spacy/kb.pxd
+++ b/spacy/kb.pxd
@ -6,7 +6,7 @@ from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport FILE
-from spacy.vocab cimport Vocab
+from .vocab cimport Vocab
 from .typedefs cimport hash_t
 from .structs cimport KBEntryC, AliasC
@ -113,7 +113,7 @@ cdef class KnowledgeBase:
        return new_index
    cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
-        """ 
+        """
        Initializing the vectors and making sure the first element of each vector is a dummy,
        because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
        cf. https://github.com/explosion/preshed/issues/17
@ -169,4 +169,3 @@ cdef class Reader:
    cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
    cdef int _read(self, void* value, size_t size) except -1
--- a/spacy/lang/af/stop_words.py
+++ b/spacy/lang/af/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/stopwords-iso/stopwords-af
 STOP_WORDS = set(
--- a/spacy/lang/bg/stop_words.py
+++ b/spacy/lang/bg/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/Alir3z4/stop-words
 STOP_WORDS = set(
--- a/spacy/lang/bn/examples.py
+++ b/spacy/lang/bn/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/bn/stop_words.py
+++ b/spacy/lang/bn/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 অতএব অথচ অথবা অনুযায়ী অনেক অনেকে অনেকেই অন্তত  অবধি অবশ্য অর্থাৎ অন্য অনুযায়ী অর্ধভাগে
--- a/spacy/lang/ca/examples.py
+++ b/spacy/lang/ca/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/cs/stop_words.py
+++ b/spacy/lang/cs/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/Alir3z4/stop-words
 STOP_WORDS = set(
--- a/spacy/lang/da/examples.py
+++ b/spacy/lang/da/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/de/examples.py
+++ b/spacy/lang/de/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/de/stop_words.py
+++ b/spacy/lang/de/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 á a ab aber ach acht achte achten achter achtes ag alle allein allem allen
@ -44,7 +43,7 @@ kleines kommen kommt können könnt konnte könnte konnten kurz
 lang lange leicht leider lieber los
 machen macht machte mag magst man manche manchem manchen mancher manches mehr
-mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten 
+mein meine meinem meinen meiner meines mich mir mit mittel mochte möchte mochten
 mögen möglich mögt morgen muss muß müssen musst müsst musste mussten
 na nach nachdem nahm natürlich neben nein neue neuen neun neunte neunten neunter
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@ -1,4 +1,3 @@
 def get_pos_from_wiktionary():
    import re
    from gensim.corpora.wikicorpus import extract_pages
--- a/spacy/lang/el/norm_exceptions.py
+++ b/spacy/lang/el/norm_exceptions.py
@ -1,4 +1,3 @@
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Norms are only set if no alternative is provided in the tokenizer exceptions.
--- a/spacy/lang/el/stop_words.py
+++ b/spacy/lang/el/stop_words.py
@ -1,4 +1,3 @@
 # Stop words
 # Link to greek stop words: https://www.translatum.gr/forum/index.php?topic=3550.0?topic=3550.0
 STOP_WORDS = set(
--- a/spacy/lang/en/examples.py
+++ b/spacy/lang/en/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/en/norm_exceptions.py
+++ b/spacy/lang/en/norm_exceptions.py
@ -1,4 +1,3 @@
 _exc = {
    # Slang and abbreviations
    "cos": "because",
--- a/spacy/lang/en/stop_words.py
+++ b/spacy/lang/en/stop_words.py
@ -1,4 +1,3 @@
 # Stop words
 STOP_WORDS = set(
    """
--- a/spacy/lang/es/examples.py
+++ b/spacy/lang/es/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/es/stop_words.py
+++ b/spacy/lang/es/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 actualmente acuerdo adelante ademas además adrede afirmó agregó ahi ahora ahí
--- a/spacy/lang/et/stop_words.py
+++ b/spacy/lang/et/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/stopwords-iso/stopwords-et
 STOP_WORDS = set(
--- a/spacy/lang/fa/examples.py
+++ b/spacy/lang/fa/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/fa/generate_verbs_exc.py
+++ b/spacy/lang/fa/generate_verbs_exc.py
@ -1,4 +1,3 @@
 verb_roots = """
 #هست
 آخت#آهنج
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@ -1,4 +1,3 @@
 # Stop words from HAZM package
 STOP_WORDS = set(
    """
--- a/spacy/lang/fi/stop_words.py
+++ b/spacy/lang/fi/stop_words.py
@ -1,4 +1,3 @@
 # Source https://github.com/stopwords-iso/stopwords-fi/blob/master/stopwords-fi.txt
 # Reformatted with some minor corrections
 STOP_WORDS = set(
--- a/spacy/lang/fr/examples.py
+++ b/spacy/lang/fr/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/fr/stop_words.py
+++ b/spacy/lang/fr/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 a à â abord absolument afin ah ai aie ailleurs ainsi ait allaient allo allons
--- a/spacy/lang/ga/irish_morphology_helpers.py
+++ b/spacy/lang/ga/irish_morphology_helpers.py
@ -1,4 +1,3 @@
 # fmt: off
 consonants = ["b", "c", "d", "f", "g", "h", "j", "k", "l", "m", "n", "p", "q", "r", "s", "t", "v", "w", "x", "z"]
 broad_vowels = ["a", "á", "o", "ó", "u", "ú"]
--- a/spacy/lang/he/examples.py
+++ b/spacy/lang/he/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/hi/examples.py
+++ b/spacy/lang/hi/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/hi/stop_words.py
+++ b/spacy/lang/hi/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6
 STOP_WORDS = set(
--- a/spacy/lang/hu/examples.py
+++ b/spacy/lang/hu/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/hu/stop_words.py
+++ b/spacy/lang/hu/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 a abban ahhoz ahogy ahol aki akik akkor akár alatt amely amelyek amelyekben
--- a/spacy/lang/id/examples.py
+++ b/spacy/lang/id/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/is/stop_words.py
+++ b/spacy/lang/is/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/Xangis/extra-stopwords
 STOP_WORDS = set(
--- a/spacy/lang/it/examples.py
+++ b/spacy/lang/it/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/it/stop_words.py
+++ b/spacy/lang/it/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 a abbastanza abbia abbiamo abbiano abbiate accidenti ad adesso affinche agl
--- a/spacy/lang/ja/examples.py
+++ b/spacy/lang/ja/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/kn/stop_words.py
+++ b/spacy/lang/kn/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 ಹಲವು
--- a/spacy/lang/lt/examples.py
+++ b/spacy/lang/lt/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/lv/stop_words.py
+++ b/spacy/lang/lv/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/stopwords-iso/stopwords-lv
 STOP_WORDS = set(
--- a/spacy/lang/mr/stop_words.py
+++ b/spacy/lang/mr/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/stopwords-iso/stopwords-mr/blob/master/stopwords-mr.txt, https://github.com/6/stopwords-json/edit/master/dist/mr.json
 STOP_WORDS = set(
    """
--- a/spacy/lang/nb/examples.py
+++ b/spacy/lang/nb/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/nl/examples.py
+++ b/spacy/lang/nl/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/norm_exceptions.py
+++ b/spacy/lang/norm_exceptions.py
@ -1,4 +1,3 @@
 # These exceptions are used to add NORM values based on a token's ORTH value.
 # Individual languages can also add their own exceptions and overwrite them -
 # for example, British vs. American spelling in English.
--- a/spacy/lang/pl/examples.py
+++ b/spacy/lang/pl/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/pt/examples.py
+++ b/spacy/lang/pt/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/pt/stop_words.py
+++ b/spacy/lang/pt/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 à às área acerca ademais adeus agora ainda algo algumas alguns ali além ambas ambos antes
--- a/spacy/lang/ro/examples.py
+++ b/spacy/lang/ro/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/ru/examples.py
+++ b/spacy/lang/ru/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/ru/norm_exceptions.py
+++ b/spacy/lang/ru/norm_exceptions.py
@ -1,4 +1,3 @@
 _exc = {
    # Slang
    "прив": "привет",
--- a/spacy/lang/si/examples.py
+++ b/spacy/lang/si/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/si/stop_words.py
+++ b/spacy/lang/si/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 අතර
--- a/spacy/lang/sk/stop_words.py
+++ b/spacy/lang/sk/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/Ardevop-sk/stopwords-sk
 STOP_WORDS = set(
--- a/spacy/lang/sl/stop_words.py
+++ b/spacy/lang/sl/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/stopwords-iso/stopwords-sl
 # TODO: probably needs to be tidied up – the list seems to have month names in
 # it, which shouldn't be considered stop words.
--- a/spacy/lang/sq/examples.py
+++ b/spacy/lang/sq/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/sq/stop_words.py
+++ b/spacy/lang/sq/stop_words.py
@ -1,4 +1,3 @@
 # Source: https://github.com/andrixh/index-albanian
 STOP_WORDS = set(
--- a/spacy/lang/sr/examples.py
+++ b/spacy/lang/sr/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/sr/norm_exceptions.py
+++ b/spacy/lang/sr/norm_exceptions.py
@ -1,4 +1,3 @@
 _exc = {
    # Slang
    "ћале": "отац",
--- a/spacy/lang/sr/stop_words.py
+++ b/spacy/lang/sr/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 а
--- a/spacy/lang/sv/examples.py
+++ b/spacy/lang/sv/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/sv/stop_words.py
+++ b/spacy/lang/sv/stop_words.py
@ -1,4 +1,3 @@
 STOP_WORDS = set(
    """
 aderton adertonde adjö aldrig alla allas allt alltid alltså än andra andras
--- a/spacy/lang/ta/examples.py
+++ b/spacy/lang/ta/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/ta/stop_words.py
+++ b/spacy/lang/ta/stop_words.py
@ -1,4 +1,3 @@
 # Stop words
 STOP_WORDS = set(
--- a/spacy/lang/te/examples.py
+++ b/spacy/lang/te/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/th/norm_exceptions.py
+++ b/spacy/lang/th/norm_exceptions.py
@ -1,4 +1,3 @@
 _exc = {
    # Conjugation and Diversion invalid to Tonal form (ผันอักษรและเสียงไม่ตรงกับรูปวรรณยุกต์)
    "สนุ๊กเกอร์": "สนุกเกอร์",
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@ -34,7 +34,7 @@ URL_PATTERN = (
    r"|"
    # host & domain names
    # mods: match is case-sensitive, so include [A-Z]
-      "(?:"
+      "(?:"  # noqa: E131
        "(?:"
          "[A-Za-z0-9\u00a1-\uffff]"
          "[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
--- a/spacy/lang/tr/examples.py
+++ b/spacy/lang/tr/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
 >>> from spacy.lang.tr.examples import sentences
--- a/spacy/lang/uk/examples.py
+++ b/spacy/lang/uk/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/ur/examples.py
+++ b/spacy/lang/ur/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/xx/init.py
+++ b/spacy/lang/xx/init.py
@ -1,4 +1,3 @@
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ..norm_exceptions import BASE_NORMS
 from ...language import Language
--- a/spacy/lang/xx/examples.py
+++ b/spacy/lang/xx/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/yo/examples.py
+++ b/spacy/lang/yo/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/lang/zh/examples.py
+++ b/spacy/lang/zh/examples.py
@ -1,4 +1,3 @@
 """
 Example sentences to test spaCy and its language models.
--- a/spacy/language.py
+++ b/spacy/language.py
@ -4,7 +4,7 @@ import weakref
 import functools
 from contextlib import contextmanager
 from copy import copy, deepcopy
-from thinc.backends import get_current_ops
+from thinc.api import get_current_ops
 import srsly
 import multiprocessing as mp
 from itertools import chain, cycle
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -6,7 +6,7 @@ cimport numpy as np
 np.import_array()
 import numpy
-from thinc.util import get_array_module
+from thinc.api import get_array_module
 from .typedefs cimport attr_t, flags_t
 from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@ -3,18 +3,20 @@ from thinc.api import Model
 def CharacterEmbed(nM, nC):
    # nM: Number of dimensions per character. nC: Number of characters.
-    nO = nM*nC if (nM is not None and nC is not None) else None
+    nO = nM * nC if (nM is not None and nC is not None) else None
    return Model(
        "charembed",
        forward,
        init=init,
        dims={"nM": nM, "nC": nC, "nO": nO, "nV": 256},
-        params={"E": None}
+        params={"E": None},
    ).initialize()
 def init(model, X=None, Y=None):
-    vectors_table = model.ops.alloc3f(model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM"))
+    vectors_table = model.ops.alloc3f(
        model.get_dim("nC"), model.get_dim("nV"), model.get_dim("nM")
    )
    model.set_param("E", vectors_table)
--- a/spacy/ml/_layers.py
+++ b/spacy/ml/_layers.py
@ -1,5 +1,4 @@
-from thinc.model import Model
+from thinc.api import Model, normal_init
 from thinc.api import normal_init
 def PrecomputableAffine(nO, nI, nF, nP):
@ -20,9 +19,7 @@ def forward(model, X, is_train):
    nP = model.get_dim("nP")
    nI = model.get_dim("nI")
    W = model.get_param("W")
-    Yf = model.ops.gemm(
+    Yf = model.ops.gemm(X, W.reshape((nF * nO * nP, nI)), trans2=True)
        X, W.reshape((nF * nO * nP, nI)), trans2=True
    )
    Yf = Yf.reshape((Yf.shape[0], nF, nO, nP))
    Yf = model.ops.xp.vstack((model.get_param("pad"), Yf))
@ -37,14 +34,14 @@ def forward(model, X, is_train):
        # for b in range(nB):
        #     for f in range(nF):
        #         dYf[b, ids[b, f]] += dY[b]
-        # 
+        #
        # However, we avoid building that array for efficiency -- and just pass
        # in the indices.
        dY, ids = dY_ids
        assert dY.ndim == 3
        assert dY.shape[1] == nO, dY.shape
        assert dY.shape[2] == nP, dY.shape
-        nB = dY.shape[0]
+        # nB = dY.shape[0]
        model.inc_grad("pad", _backprop_precomputable_affine_padding(model, dY, ids))
        Xf = X[ids]
        Xf = Xf.reshape((Xf.shape[0], nF * nI))
@ -83,12 +80,12 @@ def _backprop_precomputable_affine_padding(model, dY, ids):
    #     for f in range(nF):
    #         if ids[b, f] < 0:
    #             d_padding[0, f] += dY[b]
-    # 
+    #
    # Which can be rewritten as:
    #
    # for b in range(nB):
    #     d_pad[0, ids[b] < 0] += dY[b]
-    # 
+    #
    # I don't know how to avoid the loop without building a whole array :(.
    # Cursed numpy.
    d_pad = model.ops.alloc((1, nF, nO, nP))
@ -118,7 +115,7 @@ def init(model, X=None, Y=None):
    pad = model.ops.alloc4f(1, nF, nO, nP)
    ops = model.ops
-    W = normal_init(ops, W.shape, fan_in=nF*nI)
+    W = normal_init(ops, W.shape, fan_in=nF * nI)
    model.set_param("W", W)
    model.set_param("b", b)
    model.set_param("pad", pad)
--- a/spacy/ml/component_models.py
+++ b/spacy/ml/component_models.py
@ -9,7 +9,7 @@ from thinc.api import Model, Maxout, Linear, residual, reduce_mean, list2ragged
 from thinc.api import PyTorchLSTM, add, MultiSoftmax, HashEmbed, StaticVectors
 from thinc.api import expand_window, FeatureExtractor, SparseLinear, chain
 from thinc.api import clone, concatenate, with_array, Softmax, Logistic, uniqued
-from thinc.api import zero_init, glorot_uniform_init
+from thinc.api import zero_init
 def build_text_classifier(arch, config):
@ -33,10 +33,7 @@ def build_simple_cnn_text_classifier(tok2vec, nr_class, exclusive_classes, **cfg
            output_layer = Softmax(nO=nr_class, nI=tok2vec.get_dim("nO"))
        else:
            # TODO: experiment with init_w=zero_init
-            output_layer = (
+            output_layer = Linear(nO=nr_class, nI=tok2vec.get_dim("nO")) >> Logistic()
                Linear(nO=nr_class, nI=tok2vec.get_dim("nO"))
                >> Logistic()
            )
        model = tok2vec >> list2ragged() >> reduce_mean() >> output_layer
    model.set_ref("tok2vec", tok2vec)
    model.set_dim("nO", nr_class)
@ -149,13 +146,21 @@ def Tok2Vec(
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        norm = HashEmbed(nO=width, nV=embed_size, column=cols.index(NORM), dropout=0.0)
        if subword_features:
-            prefix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0)
+            prefix = HashEmbed(
-            suffix = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0)
+                nO=width, nV=embed_size // 2, column=cols.index(PREFIX), dropout=0.0
-            shape = HashEmbed(nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0)
+            )
            suffix = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SUFFIX), dropout=0.0
            )
            shape = HashEmbed(
                nO=width, nV=embed_size // 2, column=cols.index(SHAPE), dropout=0.0
            )
        else:
            prefix, suffix, shape = (None, None, None)
        if pretrained_vectors is not None:
-            glove = StaticVectors(vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0)
+            glove = StaticVectors(
                vectors=pretrained_vectors, nO=width, column=cols.index(ID), dropout=0.0
            )
            if subword_features:
                embed = uniqued(
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@ -1,5 +1,5 @@
 import numpy
-from thinc.model import Model
+from thinc.api import Model
 from ..attrs import LOWER
@ -26,9 +26,7 @@ def forward(self, docs, is_train: bool):
    # The dtype here matches what thinc is expecting -- which differs per
    # platform (by int definition). This should be fixed once the problem
    # is fixed on Thinc's side.
-    lengths = self.ops.asarray(
+    lengths = self.ops.asarray([arr.shape[0] for arr in batch_keys], dtype=numpy.int_)
        [arr.shape[0] for arr in batch_keys], dtype=numpy.int_
    )
    batch_keys = self.ops.xp.concatenate(batch_keys)
    batch_vals = self.ops.asarray(self.ops.xp.concatenate(batch_vals), dtype="f")
@ -36,4 +34,3 @@ def forward(self, docs, is_train: bool):
        return dY
    return (batch_keys, batch_vals, lengths), backprop
--- a/spacy/ml/tok2vec.py
+++ b/spacy/ml/tok2vec.py
@ -1,11 +1,8 @@
-from thinc.layers import chain, clone, concatenate, with_array, uniqued
+from thinc.api import Model, chain, clone, concatenate, with_array, uniqued, noop
-from thinc.model import Model
+from thinc.api import with_padded, Maxout, expand_window, HashEmbed, StaticVectors
-from thinc.layers import noop, with_padded
+from thinc.api import residual, LayerNorm, FeatureExtractor
 from thinc.layers import Maxout, expand_window
 from thinc.layers import HashEmbed, StaticVectors
 from thinc.layers import residual, LayerNorm, FeatureExtractor
-from spacy.ml import _character_embed
+from ..ml import _character_embed
 from ..util import make_layer, registry
@ -93,8 +90,10 @@ def MaxoutWindowEncoder(config):
    nW = config["window_size"]
    nP = config["pieces"]
    depth = config["depth"]
-
+    cnn = (
-    cnn = expand_window(window_size=nW), Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True)
+        expand_window(window_size=nW),
        Maxout(nO=nO, nI=nO * ((nW * 2) + 1), nP=nP, dropout=0.0, normalize=True),
    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", nO)
    model.attrs["receptive_field"] = nW * depth
@ -103,13 +102,16 @@ def MaxoutWindowEncoder(config):
@registry.architectures.register("spacy.MishWindowEncoder.v1")
 def MishWindowEncoder(config):
-    from thinc.layers import Mish
+    from thinc.api import Mish
    nO = config["width"]
    nW = config["window_size"]
    depth = config["depth"]
-
+    cnn = chain(
-    cnn = chain(expand_window(window_size=nW), Mish(nO=nO, nI=nO * ((nW * 2) + 1)), LayerNorm(nO))
+        expand_window(window_size=nW),
        Mish(nO=nO, nI=nO * ((nW * 2) + 1)),
        LayerNorm(nO),
    )
    model = clone(residual(cnn), depth)
    model.set_dim("nO", nO)
    return model
@ -118,14 +120,20 @@ def MishWindowEncoder(config):
@registry.architectures.register("spacy.PretrainedVectors.v1")
 def PretrainedVectors(config):
    # TODO: actual vectors instead of name
-    return StaticVectors(vectors=config["vectors_name"], nO=config["width"], column=config["column"], dropout=0.0)
+    return StaticVectors(
        vectors=config["vectors_name"],
        nO=config["width"],
        column=config["column"],
        dropout=0.0,
    )
@registry.architectures.register("spacy.TorchBiLSTMEncoder.v1")
 def TorchBiLSTMEncoder(config):
    import torch.nn
-    # TODO FIX
+
-    from thinc.layers import PyTorchRNNWrapper
+    # TODO: FIX
    from thinc.api import PyTorchRNNWrapper
    width = config["width"]
    depth = config["depth"]
--- a/spacy/pipeline/hooks.py
+++ b/spacy/pipeline/hooks.py
@ -1,4 +1,4 @@
-from thinc.layers import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity
+from thinc.api import concatenate, reduce_max, reduce_mean, siamese, CauchySimilarity
 from .pipes import Pipe
 from ..language import component
@ -63,8 +63,7 @@ class SimilarityHook(Pipe):
    @classmethod
    def Model(cls, length):
        return siamese(
-            concatenate(reduce_max(), reduce_mean()),
+            concatenate(reduce_max(), reduce_mean()), CauchySimilarity(length * 2)
            CauchySimilarity(length * 2)
        )
    def __call__(self, doc):
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -3,8 +3,8 @@ from collections import defaultdict
 import numpy
 cimport numpy as np
-from thinc.layers import chain, list2array
+from thinc.api import chain, list2array, to_categorical, get_array_module
-from thinc.util import to_categorical, copy_array, get_array_module
+from thinc.util import copy_array
 from .. import util
 from .pipes import Pipe
--- a/spacy/pipeline/pipes.pyx
+++ b/spacy/pipeline/pipes.pyx
@ -3,11 +3,9 @@
 import numpy
 import srsly
 import random
-from thinc.layers import chain, Linear, Maxout, Softmax, LayerNorm, list2array
+from thinc.api import chain, Linear, Maxout, Softmax, LayerNorm, list2array
-from thinc.initializers import zero_init
+from thinc.api import zero_init, CosineDistance, to_categorical, get_array_module
-from thinc.loss import CosineDistance
+from thinc.api import set_dropout_rate
 from thinc.util import to_categorical, get_array_module
 from thinc.model import set_dropout_rate
 from ..tokens.doc cimport Doc
 from ..syntax.nn_parser cimport Parser
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -1,3 +1,5 @@
 from thinc.api import Model, set_dropout_rate
 from .pipes import Pipe
 from ..gold import Example
 from ..tokens import Doc
@ -5,8 +7,6 @@ from ..vocab import Vocab
 from ..language import component
 from ..util import link_vectors_to_models, minibatch, registry, eg2doc
 from thinc.model import Model, set_dropout_rate
@component("tok2vec", assigns=["doc.tensor"])
 class Tok2Vec(Pipe):
@ -39,7 +39,9 @@ class Tok2Vec(Pipe):
        self.listeners = []
    def create_listener(self):
-        listener = Tok2VecListener(upstream_name="tok2vec", width=self.model.get_dim("nO"))
+        listener = Tok2VecListener(
            upstream_name="tok2vec", width=self.model.get_dim("nO")
        )
        self.listeners.append(listener)
    def add_listener(self, listener):
@ -112,10 +114,10 @@ class Tok2Vec(Pipe):
            docs = [docs]
        set_dropout_rate(self.model, drop)
        tokvecs, bp_tokvecs = self.model.begin_update(docs)
-        
+
        def capture_losses(d_tokvecs):
            """Accumulate tok2vec loss before doing backprop."""
-            l2_loss = sum((d_t2v**2).sum() for d_t2v in d_tokvecs)
+            l2_loss = sum((d_t2v ** 2).sum() for d_t2v in d_tokvecs)
            if self.name in losses:
                losses[self.name] += l2_loss / len(d_tokvecs)
            else:
@ -133,7 +135,9 @@ class Tok2Vec(Pipe):
    def get_loss(self, docs, golds, scores):
        pass
-    def begin_training(self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs):
+    def begin_training(
        self, get_examples=lambda: [], pipeline=None, sgd=None, **kwargs
    ):
        """Allocate models and pre-process training data
        get_examples (function): Function returning example training data.
@ -151,6 +155,7 @@ class Tok2VecListener(Model):
    """A layer that gets fed its answers from an upstream connection,
    for instance from a component earlier in the pipeline.
    """
    name = "tok2vec-listener"
    def __init__(self, upstream_name, width):
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@ -11,9 +11,7 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free, realloc
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
-from thinc.layers import Linear
+from thinc.api import Linear, Model, CupyOps, NumpyOps, use_ops
 from thinc.model import Model
 from thinc.backends import CupyOps, NumpyOps, use_ops
 from thinc.backends.linalg cimport Vec, VecVec
 cimport blis.cy
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -1,11 +1,8 @@
 # cython: infer_types=True
 # cython: cdivision=True
 # cython: boundscheck=False
 import numpy
 cimport cython.parallel
 import numpy.random
 cimport numpy as np
 from itertools import islice
 from cpython.ref cimport PyObject, Py_XDECREF
 from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
 from libc.math cimport exp
@ -14,15 +11,16 @@ from libc.string cimport memset, memcpy
 from libc.stdlib cimport calloc, free
 from cymem.cymem cimport Pool
 from thinc.extra.search cimport Beam
 from thinc.layers import chain, clone, Linear, list2array
 from thinc.backends import NumpyOps, CupyOps, use_ops
 from thinc.util import get_array_module
 from thinc.backends.linalg cimport Vec, VecVec
 from thinc.initializers import zero_init
 from thinc.model import set_dropout_rate
 import srsly
-from spacy.gold import Example
+from thinc.api import chain, clone, Linear, list2array, NumpyOps, CupyOps, use_ops
 from thinc.api import get_array_module, zero_init, set_dropout_rate
 from itertools import islice
 import srsly
 import numpy.random
 import numpy
 from ..gold import Example
 from ..typedefs cimport weight_t, class_t, hash_t
 from ._parser_model cimport alloc_activations, free_activations
 from ._parser_model cimport predict_states, arg_max_if_valid
--- a/spacy/syntax/nonproj.pyx
+++ b/spacy/syntax/nonproj.pyx
@ -6,7 +6,7 @@ scheme.
 """
 from copy import copy
-from spacy.gold import Example
+from ..gold import Example
 from ..tokens.doc cimport Doc, set_children_from_heads
 from ..errors import Errors
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -1,4 +1,3 @@
 import pytest
 import numpy
 from spacy.tokens import Doc, Span
@ -274,7 +273,19 @@ def test_doc_is_nered(en_vocab):
 def test_doc_from_array_sent_starts(en_vocab):
    words = ["I", "live", "in", "New", "York", ".", "I", "like", "cats", "."]
    heads = [0, 0, 0, 0, 0, 0, 6, 6, 6, 6]
-    deps = ["ROOT", "dep", "dep", "dep", "dep", "dep", "ROOT", "dep", "dep", "dep", "dep"]
+    deps = [
        "ROOT",
        "dep",
        "dep",
        "dep",
        "dep",
        "dep",
        "ROOT",
        "dep",
        "dep",
        "dep",
        "dep",
    ]
    doc = Doc(en_vocab, words=words)
    for i, (dep, head) in enumerate(zip(deps, heads)):
        doc[i].dep_ = dep
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@ -29,7 +29,9 @@ def test_morph_props(i_has):
 def test_morph_iter(i_has):
    assert set(i_has[0].morph) == set(["PronType=prs"])
-    assert set(i_has[1].morph) == set(["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"])
+    assert set(i_has[1].morph) == set(
        ["Number=sing", "Person=three", "Tense=pres", "VerbForm=fin"]
    )
 def test_morph_get(i_has):
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@ -8,7 +8,12 @@ from ..util import get_doc
 def test_doc_retokenize_merge(en_tokenizer):
    text = "WKRO played songs by the beach boys all night"
-    attrs = {"tag": "NAMED", "lemma": "LEMMA", "ent_type": "TYPE", "morph": "Number=Plur"}
+    attrs = {
        "tag": "NAMED",
        "lemma": "LEMMA",
        "ent_type": "TYPE",
        "morph": "Number=Plur",
    }
    doc = en_tokenizer(text)
    assert len(doc) == 9
    with doc.retokenize() as retokenizer:
--- a/spacy/tests/lang/ar/test_text.py
+++ b/spacy/tests/lang/ar/test_text.py
@ -1,4 +1,3 @@
 def test_ar_tokenizer_handles_long_text(ar_tokenizer):
    text = """نجيب محفوظ مؤلف و كاتب روائي عربي، يعد من أهم الأدباء العرب خلال القرن العشرين.
     ولد نجيب محفوظ في مدينة القاهرة، حيث ترعرع و تلقى تعليمه الجامعي في جامعتها،
--- a/Show More
+++ b/Show More
`@ -1,4 +1,3 @@`

	`# Setting explicit height and max-width: none on the SVG is required for`	`# Setting explicit height and max-width: none on the SVG is required for`
	`# Jupyter to render it properly in a cell`	`# Jupyter to render it properly in a cell`
`@ -1,4 +1,3 @@`

	`def explain(term):`	`def explain(term):`
	`"""Get a description for a given POS tag, dependency label or entity type.`	`"""Get a description for a given POS tag, dependency label or entity type.`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/stopwords-iso/stopwords-af`	`# Source: https://github.com/stopwords-iso/stopwords-af`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/Alir3z4/stop-words`	`# Source: https://github.com/Alir3z4/stop-words`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`
`@ -1,4 +1,3 @@`

	`"""`	`"""`
	`Example sentences to test spaCy and its language models.`	`Example sentences to test spaCy and its language models.`
`@ -1,4 +1,3 @@`

	`# These exceptions are used to add NORM values based on a token's ORTH value.`	`# These exceptions are used to add NORM values based on a token's ORTH value.`
	`# Norms are only set if no alternative is provided in the tokenizer exceptions.`	`# Norms are only set if no alternative is provided in the tokenizer exceptions.`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6`	`# Source: https://github.com/taranjeet/hindi-tokenizer/blob/master/stopwords.txt, https://data.mendeley.com/datasets/bsr3frvvjc/1#file-a21d5092-99d7-45d8-b044-3ae9edd391c6`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/Xangis/extra-stopwords`	`# Source: https://github.com/Xangis/extra-stopwords`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/Ardevop-sk/stopwords-sk`	`# Source: https://github.com/Ardevop-sk/stopwords-sk`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`
`@ -1,4 +1,3 @@`

	`# Source: https://github.com/andrixh/index-albanian`	`# Source: https://github.com/andrixh/index-albanian`

	`STOP_WORDS = set(`	`STOP_WORDS = set(`