Merge branch 'upstream_master' into test-cli-app-init-config

2026-03-05 04:11:26 +03:00 · 2023-07-26 13:23:30 +02:00 · 2023-07-26 13:23:30 +02:00 · 0278eecabf
commit 0278eecabf
parent add6de2fa9 f8f489bcd6
575 changed files with 7656 additions and 2846 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -37,10 +37,20 @@ jobs:
        run: |
          python -m pip install black -c requirements.txt
          python -m black spacy --check
+      - name: isort
+        run: |
+          python -m pip install isort -c requirements.txt
+          python -m isort spacy --check
      - name: flake8
        run: |
          python -m pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
+      - name: cython-lint
+        run: |
+          python -m pip install cython-lint -c requirements.txt
+          # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
+          cython-lint spacy --ignore E501,W291,E266
+
  tests:
    name: Test
    needs: Validate
@ -107,22 +117,22 @@ jobs:
      - name: Test import
        run: python -W error -c "import spacy"

-#      - name: "Test download CLI"
-#        run: |
-#          python -m spacy download ca_core_news_sm
-#          python -m spacy download ca_core_news_md
-#          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
-#        if: matrix.python_version == '3.9'
-#
-#      - name: "Test download_url in info CLI"
-#        run: |
-#          python -W error -m spacy info ca_core_news_sm | grep -q download_url
-#        if: matrix.python_version == '3.9'
-#
-#      - name: "Test no warnings on load (#11713)"
-#        run: |
-#          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
-#        if: matrix.python_version == '3.9'
+      - name: "Test download CLI"
+        run: |
+          python -m spacy download ca_core_news_sm
+          python -m spacy download ca_core_news_md
+          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
+        if: matrix.python_version == '3.9'
+
+      - name: "Test download_url in info CLI"
+        run: |
+          python -W error -m spacy info ca_core_news_sm | grep -q download_url
+        if: matrix.python_version == '3.9'
+
+      - name: "Test no warnings on load (#11713)"
+        run: |
+          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
+        if: matrix.python_version == '3.9'

      - name: "Test convert CLI"
        run: |
@ -146,17 +156,17 @@ jobs:
          python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
        if: matrix.python_version == '3.9'

-#      - name: "Test assemble CLI"
-#        run: |
-#          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
-#          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
-#        if: matrix.python_version == '3.9'
-#
-#      - name: "Test assemble CLI vectors warning"
-#        run: |
-#          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
-#          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
-#        if: matrix.python_version == '3.9'
+      - name: "Test assemble CLI"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
+          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
+        if: matrix.python_version == '3.9'
+
+      - name: "Test assemble CLI vectors warning"
+        run: |
+          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
+          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
+        if: matrix.python_version == '3.9'

      - name: "Install test requirements"
        run: |
--- a/4
+++ b/4
@ -1,11 +1,11 @@
 SHELL := /bin/bash

 ifndef SPACY_EXTRAS
-override SPACY_EXTRAS = spacy-lookups-data==1.0.2 jieba spacy-pkuseg==0.0.28 sudachipy sudachidict_core pymorphy2
+override SPACY_EXTRAS = spacy-lookups-data==1.0.3
 endif

 ifndef PYVER
-override PYVER = 3.6
+override PYVER = 3.8
 endif

 VENV := ./env$(PYVER)
--- a/README.md
+++ b/README.md
@ -6,23 +6,20 @@ spaCy is a library for **advanced Natural Language Processing** in Python and
 Cython. It's built on the very latest research, and was designed from day one to
 be used in real products.

-spaCy comes with
-[pretrained pipelines](https://spacy.io/models) and
-currently supports tokenization and training for **70+ languages**. It features
-state-of-the-art speed and **neural network models** for tagging,
-parsing, **named entity recognition**, **text classification** and more,
-multi-task learning with pretrained **transformers** like BERT, as well as a
+spaCy comes with [pretrained pipelines](https://spacy.io/models) and currently
+supports tokenization and training for **70+ languages**. It features
+state-of-the-art speed and **neural network models** for tagging, parsing,
+**named entity recognition**, **text classification** and more, multi-task
+learning with pretrained **transformers** like BERT, as well as a
 production-ready [**training system**](https://spacy.io/usage/training) and easy
 model packaging, deployment and workflow management. spaCy is commercial
-open-source software, released under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
+open-source software, released under the
+[MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).

-💥 **We'd love to hear more about your experience with spaCy!**
-[Fill out our survey here.](https://form.typeform.com/to/aMel9q9f)
-
-💫 **Version 3.5 out now!**
+💫 **Version 3.6 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)

-[![Azure Pipelines](https://img.shields.io/azure-devops/build/explosion-ai/public/8/master.svg?logo=azure-pipelines&style=flat-square&label=build)](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
+[![tests](https://github.com/explosion/spaCy/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/spaCy/actions/workflows/tests.yml)
 [![Current Release Version](https://img.shields.io/github/release/explosion/spacy.svg?style=flat-square&logo=github)](https://github.com/explosion/spaCy/releases)
 [![pypi Version](https://img.shields.io/pypi/v/spacy.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/spacy/)
 [![conda Version](https://img.shields.io/conda/vn/conda-forge/spacy.svg?style=flat-square&logo=conda-forge&logoColor=white)](https://anaconda.org/conda-forge/spacy)
@ -35,22 +32,22 @@ open-source software, released under the [MIT license](https://github.com/explos

 ## 📖 Documentation

-| Documentation                 |                                                                        |
-| ----------------------------- | ---------------------------------------------------------------------- |
-| ⭐️ **[spaCy 101]**           | New to spaCy? Here's everything you need to know!                      |
-| 📚 **[Usage Guides]**         | How to use spaCy and its features.                                     |
-| 🚀 **[New in v3.0]**          | New features, backwards incompatibilities and migration guide.         |
-| 🪐 **[Project Templates]**    | End-to-end workflows you can clone, modify and run.                    |
-| 🎛 **[API Reference]**         | The detailed reference for spaCy's API.                                |
-| 📦 **[Models]**               | Download trained pipelines for spaCy.                                  |
-| 🌌 **[Universe]**             | Plugins, extensions, demos and books from the spaCy ecosystem.         |
-| ⚙️ **[spaCy VS Code Extension]** | Additional tooling and features for working with spaCy's config files. |
-| 👩‍🏫 **[Online Course]** | Learn spaCy in this free and interactive online course. |
-| 📺 **[Videos]** | Our YouTube channel with video tutorials, talks and more. |
-| 🛠 **[Changelog]** | Changes and version history. |
-| 💝 **[Contribute]** | How to contribute to the spaCy project and code base. |
-| <a href="https://explosion.ai/spacy-tailored-pipelines"><img src="https://user-images.githubusercontent.com/13643239/152853098-1c761611-ccb0-4ec6-9066-b234552831fe.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Get a custom spaCy pipeline, tailor-made for your NLP problem by spaCy's core developers. Streamlined, production-ready, predictable and maintainable. Start by completing our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-pipelines)** |
-| <a href="https://explosion.ai/spacy-tailored-analysis"><img src="https://user-images.githubusercontent.com/1019791/206151300-b00cd189-e503-4797-aa1e-1bb6344062c5.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Bespoke advice for problem solving, strategy and analysis for applied NLP projects. Services include data strategy, code reviews, pipeline design and annotation coaching. Curious? Fill in our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-analysis)** |
+| Documentation                                                                                                                                                                                                             |                                                                                                                                                                                                                                                                                                                                              |
+| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ⭐️ **[spaCy 101]**                                                                                                                                                                                                       | New to spaCy? Here's everything you need to know!                                                                                                                                                                                                                                                                                            |
+| 📚 **[Usage Guides]**                                                                                                                                                                                                     | How to use spaCy and its features.                                                                                                                                                                                                                                                                                                           |
+| 🚀 **[New in v3.0]**                                                                                                                                                                                                      | New features, backwards incompatibilities and migration guide.                                                                                                                                                                                                                                                                               |
+| 🪐 **[Project Templates]**                                                                                                                                                                                                | End-to-end workflows you can clone, modify and run.                                                                                                                                                                                                                                                                                          |
+| 🎛 **[API Reference]**                                                                                                                                                                                                     | The detailed reference for spaCy's API.                                                                                                                                                                                                                                                                                                      |
+| 📦 **[Models]**                                                                                                                                                                                                           | Download trained pipelines for spaCy.                                                                                                                                                                                                                                                                                                        |
+| 🌌 **[Universe]**                                                                                                                                                                                                         | Plugins, extensions, demos and books from the spaCy ecosystem.                                                                                                                                                                                                                                                                               |
+| ⚙️ **[spaCy VS Code Extension]**                                                                                                                                                                                          | Additional tooling and features for working with spaCy's config files.                                                                                                                                                                                                                                                                       |
+| 👩‍🏫 **[Online Course]**                                                                                                                                                                                                    | Learn spaCy in this free and interactive online course.                                                                                                                                                                                                                                                                                      |
+| 📺 **[Videos]**                                                                                                                                                                                                           | Our YouTube channel with video tutorials, talks and more.                                                                                                                                                                                                                                                                                    |
+| 🛠 **[Changelog]**                                                                                                                                                                                                         | Changes and version history.                                                                                                                                                                                                                                                                                                                 |
+| 💝 **[Contribute]**                                                                                                                                                                                                       | How to contribute to the spaCy project and code base.                                                                                                                                                                                                                                                                                        |
+| <a href="https://explosion.ai/spacy-tailored-pipelines"><img src="https://user-images.githubusercontent.com/13643239/152853098-1c761611-ccb0-4ec6-9066-b234552831fe.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Get a custom spaCy pipeline, tailor-made for your NLP problem by spaCy's core developers. Streamlined, production-ready, predictable and maintainable. Start by completing our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-pipelines)**                 |
+| <a href="https://explosion.ai/spacy-tailored-analysis"><img src="https://user-images.githubusercontent.com/1019791/206151300-b00cd189-e503-4797-aa1e-1bb6344062c5.png" width="125" alt="spaCy Tailored Pipelines"/></a>   | Bespoke advice for problem solving, strategy and analysis for applied NLP projects. Services include data strategy, code reviews, pipeline design and annotation coaching. Curious? Fill in our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-analysis)** |

 [spacy 101]: https://spacy.io/usage/spacy-101
 [new in v3.0]: https://spacy.io/usage/v3
@ -58,7 +55,7 @@ open-source software, released under the [MIT license](https://github.com/explos
 [api reference]: https://spacy.io/api/
 [models]: https://spacy.io/models
 [universe]: https://spacy.io/universe
-[spaCy VS Code Extension]: https://github.com/explosion/spacy-vscode
+[spacy vs code extension]: https://github.com/explosion/spacy-vscode
 [videos]: https://www.youtube.com/c/ExplosionAI
 [online course]: https://course.spacy.io
 [project templates]: https://github.com/explosion/projects
@ -92,7 +89,9 @@ more people can benefit from it.
 - State-of-the-art speed
 - Production-ready **training system**
 - Linguistically-motivated **tokenization**
- Components for named **entity recognition**, part-of-speech-tagging, dependency parsing, sentence segmentation, **text classification**, lemmatization, morphological analysis, entity linking and more
+- Components for named **entity recognition**, part-of-speech-tagging,
+  dependency parsing, sentence segmentation, **text classification**,
+  lemmatization, morphological analysis, entity linking and more
 - Easily extensible with **custom components** and attributes
 - Support for custom models in **PyTorch**, **TensorFlow** and other frameworks
 - Built in **visualizers** for syntax and NER
@ -118,8 +117,8 @@ For detailed installation instructions, see the
 ### pip

 Using pip, spaCy releases are available as source packages and binary wheels.
-Before you install spaCy and its dependencies, make sure that
-your `pip`, `setuptools` and `wheel` are up to date.
+Before you install spaCy and its dependencies, make sure that your `pip`,
+`setuptools` and `wheel` are up to date.

 ```bash
 pip install -U pip setuptools wheel
@ -174,9 +173,9 @@ with the new version.

 ## 📦 Download model packages

-Trained pipelines for spaCy can be installed as **Python packages**. This
-means that they're a component of your application, just like any other module.
-Models can be installed using spaCy's [`download`](https://spacy.io/api/cli#download)
+Trained pipelines for spaCy can be installed as **Python packages**. This means
+that they're a component of your application, just like any other module. Models
+can be installed using spaCy's [`download`](https://spacy.io/api/cli#download)
 command, or manually by pointing pip to a path or URL.

 | Documentation              |                                                                  |
@ -242,8 +241,7 @@ do that depends on your system.
 | **Mac**     | Install a recent version of [XCode](https://developer.apple.com/xcode/), including the so-called "Command Line Tools". macOS and OS X ship with Python and git preinstalled.                                                                                        |
 | **Windows** | Install a version of the [Visual C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/) or [Visual Studio Express](https://visualstudio.microsoft.com/vs/express/) that matches the version that was used to compile your Python interpreter. |

-For more details
-and instructions, see the documentation on
+For more details and instructions, see the documentation on
 [compiling spaCy from source](https://spacy.io/usage#source) and the
 [quickstart widget](https://spacy.io/usage#section-quickstart) to get the right
 commands for your platform and Python version.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,3 +9,6 @@ requires = [
    "numpy>=1.15.0",
 ]
 build-backend = "setuptools.build_meta"
+
+[tool.isort]
+profile = "black"
--- a/requirements.txt
+++ b/requirements.txt
@ -38,3 +38,5 @@ types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
 black==22.3.0
+cython-lint>=0.15.0; python_version >= "3.7"
+isort>=5.0,<6.0
--- a/setup.py
+++ b/setup.py
@ -1,10 +1,9 @@
 #!/usr/bin/env python
 from setuptools import Extension, setup, find_packages
 import sys
-import platform
 import numpy
-from distutils.command.build_ext import build_ext
-from distutils.sysconfig import get_python_inc
+from setuptools.command.build_ext import build_ext
+from sysconfig import get_path
 from pathlib import Path
 import shutil
 from Cython.Build import cythonize
@ -88,30 +87,6 @@ COPY_FILES = {
 }


-def is_new_osx():
-    """Check whether we're on OSX >= 10.7"""
-    if sys.platform != "darwin":
-        return False
-    mac_ver = platform.mac_ver()[0]
-    if mac_ver.startswith("10"):
-        minor_version = int(mac_ver.split(".")[1])
-        if minor_version >= 7:
-            return True
-        else:
-            return False
-    return False
-
-
-if is_new_osx():
-    # On Mac, use libc++ because Apple deprecated use of
-    # libstdc
-    COMPILE_OPTIONS["other"].append("-stdlib=libc++")
-    LINK_OPTIONS["other"].append("-lc++")
-    # g++ (used by unix compiler on mac) links to libstdc++ as a default lib.
-    # See: https://stackoverflow.com/questions/1653047/avoid-linking-to-libstdc
-    LINK_OPTIONS["other"].append("-nodefaultlibs")
-
-
 # By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
 # http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
 class build_ext_options:
@ -204,7 +179,7 @@ def setup_package():

    include_dirs = [
        numpy.get_include(),
-        get_python_inc(plat_specific=True),
+        get_path("include"),
    ]
    ext_modules = []
    ext_modules.append(
--- a/spacy/init.py
+++ b/spacy/init.py
@ -1,6 +1,6 @@
-from typing import Union, Iterable, Dict, Any
-from pathlib import Path
 import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, Union

 # set library-specific custom warning handling before doing anything else
 from .errors import setup_default_warnings
@ -8,20 +8,17 @@ from .errors import setup_default_warnings
 setup_default_warnings()  # noqa: E402

 # These are imported as part of the API
-from thinc.api import prefer_gpu, require_gpu, require_cpu  # noqa: F401
-from thinc.api import Config
+from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401

 from . import pipeline  # noqa: F401
-from .cli.info import info  # noqa: F401
-from .glossary import explain  # noqa: F401
-from .about import __version__  # noqa: F401
-from .util import registry, logger  # noqa: F401
-
-from .errors import Errors
-from .language import Language
-from .vocab import Vocab
 from . import util
-
+from .about import __version__  # noqa: F401
+from .cli.info import info  # noqa: F401
+from .errors import Errors
+from .glossary import explain  # noqa: F401
+from .language import Language
+from .util import logger, registry  # noqa: F401
+from .vocab import Vocab

 if sys.maxunicode == 65535:
    raise SystemError(Errors.E130)
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.6.0.dev0"
+__version__ = "3.6.0"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@ -1,6 +1,7 @@
 # Reserve 64 values for flag features
 from . cimport symbols

+
 cdef enum attr_id_t:
    NULL_ATTR
    IS_ALPHA
@ -95,4 +96,4 @@ cdef enum attr_id_t:
    ENT_ID = symbols.ENT_ID

    IDX
-    SENT_END
+    SENT_END
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@ -117,7 +117,7 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
        if "pos" in stringy_attrs:
            stringy_attrs["TAG"] = stringy_attrs.pop("pos")
        if "morph" in stringy_attrs:
-            morphs = stringy_attrs.pop("morph")
+            morphs = stringy_attrs.pop("morph")  # no-cython-lint
        if "number" in stringy_attrs:
            stringy_attrs.pop("number")
        if "tenspect" in stringy_attrs:
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -1,35 +1,35 @@
 from wasabi import msg

 from ._util import app, setup_cli  # noqa: F401
+from .apply import apply  # noqa: F401
+from .assemble import assemble_cli  # noqa: F401

 # These are the actual functions, NOT the wrapped CLI commands. The CLI commands
 # are registered automatically and won't have to be imported here.
 from .benchmark_speed import benchmark_speed_cli  # noqa: F401
-from .download import download  # noqa: F401
-from .info import info  # noqa: F401
-from .package import package  # noqa: F401
-from .profile import profile  # noqa: F401
-from .train import train_cli  # noqa: F401
-from .assemble import assemble_cli  # noqa: F401
-from .pretrain import pretrain  # noqa: F401
-from .debug_data import debug_data  # noqa: F401
-from .debug_config import debug_config  # noqa: F401
-from .debug_model import debug_model  # noqa: F401
-from .debug_diff import debug_diff  # noqa: F401
-from .evaluate import evaluate  # noqa: F401
-from .apply import apply  # noqa: F401
 from .convert import convert  # noqa: F401
-from .init_pipeline import init_pipeline_cli  # noqa: F401
-from .init_config import init_config, fill_config  # noqa: F401
-from .validate import validate  # noqa: F401
-from .project.clone import project_clone  # noqa: F401
-from .project.assets import project_assets  # noqa: F401
-from .project.run import project_run  # noqa: F401
-from .project.dvc import project_update_dvc  # noqa: F401
-from .project.push import project_push  # noqa: F401
-from .project.pull import project_pull  # noqa: F401
-from .project.document import project_document  # noqa: F401
+from .debug_config import debug_config  # noqa: F401
+from .debug_data import debug_data  # noqa: F401
+from .debug_diff import debug_diff  # noqa: F401
+from .debug_model import debug_model  # noqa: F401
+from .download import download  # noqa: F401
+from .evaluate import evaluate  # noqa: F401
 from .find_threshold import find_threshold  # noqa: F401
+from .info import info  # noqa: F401
+from .init_config import fill_config, init_config  # noqa: F401
+from .init_pipeline import init_pipeline_cli  # noqa: F401
+from .package import package  # noqa: F401
+from .pretrain import pretrain  # noqa: F401
+from .profile import profile  # noqa: F401
+from .project.assets import project_assets  # noqa: F401
+from .project.clone import project_clone  # noqa: F401
+from .project.document import project_document  # noqa: F401
+from .project.dvc import project_update_dvc  # noqa: F401
+from .project.pull import project_pull  # noqa: F401
+from .project.push import project_push  # noqa: F401
+from .project.run import project_run  # noqa: F401
+from .train import train_cli  # noqa: F401
+from .validate import validate  # noqa: F401


@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -1,26 +1,44 @@
-from typing import Dict, Any, Union, List, Optional, Tuple, Iterable
-from typing import TYPE_CHECKING, overload
-import sys
-import shutil
-from pathlib import Path
-from wasabi import msg, Printer
-import srsly
 import hashlib
+import os
+import shutil
+import sys
+from configparser import InterpolationError
+from contextlib import contextmanager
+from pathlib import Path
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    overload,
+)
+
+import srsly
 import typer
 from click import NoSuchOption
 from click.parser import split_arg_string
-from typer.main import get_command
-from contextlib import contextmanager
 from thinc.api import Config, ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
-from configparser import InterpolationError
-import os
+from typer.main import get_command
+from wasabi import Printer, msg

+from .. import about
 from ..compat import Literal
 from ..schemas import ProjectConfigSchema, validate
-from ..util import import_file, run_command, make_tempdir, registry, logger
-from ..util import is_compatible_version, SimpleFrozenDict, ENV_VARS
-from .. import about
+from ..util import (
+    ENV_VARS,
+    SimpleFrozenDict,
+    import_file,
+    is_compatible_version,
+    logger,
+    make_tempdir,
+    registry,
+    run_command,
+)

 if TYPE_CHECKING:
    from pathy import FluidPath  # noqa: F401
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@ -1,18 +1,15 @@
-import tqdm
-import srsly
-
 from itertools import chain
 from pathlib import Path
-from typing import Optional, List, Iterable, cast, Union
+from typing import Iterable, List, Optional, Union, cast

+import srsly
+import tqdm
 from wasabi import msg

-from ._util import app, Arg, Opt, setup_gpu, import_code, walk_directory
-
 from ..tokens import Doc, DocBin
-from ..vocab import Vocab
 from ..util import ensure_path, load_model
-
+from ..vocab import Vocab
+from ._util import Arg, Opt, app, import_code, setup_gpu, walk_directory

 path_help = """Location of the documents to predict on.
 Can be a single file in .spacy format or a .jsonl file.
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -1,13 +1,20 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import typer
 import logging
+from pathlib import Path
+from typing import Optional
+
+import typer
+from wasabi import msg

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code
 from .. import util
 from ..util import get_sourced_components, load_model_from_config
+from ._util import (
+    Arg,
+    Opt,
+    app,
+    import_code,
+    parse_config_overrides,
+    show_validation_error,
+)


@app.command(
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@ -1,11 +1,12 @@
-from typing import Iterable, List, Optional
 import random
-from itertools import islice
-import numpy
-from pathlib import Path
 import time
-from tqdm import tqdm
+from itertools import islice
+from pathlib import Path
+from typing import Iterable, List, Optional
+
+import numpy
 import typer
+from tqdm import tqdm
 from wasabi import msg

 from .. import util
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -1,18 +1,22 @@
-from typing import Callable, Iterable, Mapping, Optional, Any, Union
-from enum import Enum
-from pathlib import Path
-from wasabi import Printer
-import srsly
+import itertools
 import re
 import sys
-import itertools
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Iterable, Mapping, Optional, Union
+
+import srsly
+from wasabi import Printer

-from ._util import app, Arg, Opt, walk_directory
-from ..training import docs_to_json
 from ..tokens import Doc, DocBin
-from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
-from ..training.converters import conllu_to_docs
-
+from ..training import docs_to_json
+from ..training.converters import (
+    conll_ner_to_docs,
+    conllu_to_docs,
+    iob_to_docs,
+    json_to_docs,
+)
+from ._util import Arg, Opt, app, walk_directory

 # Converters are matched by file extension except for ner/iob, which are
 # matched by file extension and content. To add a converter, add a new
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@ -1,15 +1,22 @@
-from typing import Optional, Dict, Any, Union, List
 from pathlib import Path
-from wasabi import msg, table
+from typing import Any, Dict, List, Optional, Union
+
+import typer
 from thinc.api import Config
 from thinc.config import VARIABLE_RE
-import typer
+from wasabi import msg, table

-from ._util import Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli
+from .. import util
 from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
 from ..util import registry
-from .. import util
+from ._util import (
+    Arg,
+    Opt,
+    debug_cli,
+    import_code,
+    parse_config_overrides,
+    show_validation_error,
+)


@debug_cli.command(
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -1,31 +1,49 @@
-from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
-from typing import cast, overload
-from pathlib import Path
-from collections import Counter
-import sys
-import srsly
-from wasabi import Printer, MESSAGES, msg
-import typer
 import math
-import numpy
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+    cast,
+    overload,
+)

-from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
-from ._util import import_code, debug_cli, _format_number
-from ..training import Example, remove_bilu_prefix
-from ..training.initialize import get_sourced_components
-from ..schemas import ConfigSchemaTraining
-from ..pipeline import TrainablePipe
+import numpy
+import srsly
+import typer
+from wasabi import MESSAGES, Printer, msg
+
+from .. import util
+from ..compat import Literal
+from ..language import Language
+from ..morphology import Morphology
+from ..pipeline import Morphologizer, SpanCategorizer, TrainablePipe
+from ..pipeline._edit_tree_internals.edit_trees import EditTrees
 from ..pipeline._parser_internals import nonproj
 from ..pipeline._parser_internals.nonproj import DELIMITER
-from ..pipeline import Morphologizer, SpanCategorizer
-from ..pipeline._edit_tree_internals.edit_trees import EditTrees
-from ..morphology import Morphology
-from ..language import Language
+from ..schemas import ConfigSchemaTraining
+from ..training import Example, remove_bilu_prefix
+from ..training.initialize import get_sourced_components
 from ..util import registry, resolve_dot_names
-from ..compat import Literal
 from ..vectors import Mode as VectorsMode
-from .. import util
-
+from ._util import (
+    Arg,
+    Opt,
+    _format_number,
+    app,
+    debug_cli,
+    import_code,
+    parse_config_overrides,
+    show_validation_error,
+)

 # Minimum number of expected occurrences of NER label in data to train new label
 NEW_LABEL_THRESHOLD = 50
@ -212,7 +230,7 @@ def debug_data(
    else:
        msg.info("No word vectors present in the package")

-    if "spancat" in factory_names:
+    if "spancat" in factory_names or "spancat_singlelabel" in factory_names:
        model_labels_spancat = _get_labels_from_spancat(nlp)
        has_low_data_warning = False
        has_no_neg_warning = False
@ -830,7 +848,7 @@ def _compile_gold(
                    data["boundary_cross_ents"] += 1
                elif label == "-":
                    data["ner"]["-"] += 1
-        if "spancat" in factory_names:
+        if "spancat" in factory_names or "spancat_singlelabel" in factory_names:
            for spans_key in list(eg.reference.spans.keys()):
                # Obtain the span frequency
                if spans_key not in data["spancat"]:
@ -1028,7 +1046,7 @@ def _get_labels_from_spancat(nlp: Language) -> Dict[str, Set[str]]:
    pipe_names = [
        pipe_name
        for pipe_name in nlp.pipe_names
-        if nlp.get_pipe_meta(pipe_name).factory == "spancat"
+        if nlp.get_pipe_meta(pipe_name).factory in ("spancat", "spancat_singlelabel")
    ]
    labels: Dict[str, Set[str]] = {}
    for pipe_name in pipe_names:
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@ -1,13 +1,13 @@
+from pathlib import Path
 from typing import Optional

 import typer
-from wasabi import Printer, diff_strings, MarkdownRenderer
-from pathlib import Path
 from thinc.api import Config
+from wasabi import MarkdownRenderer, Printer, diff_strings

-from ._util import debug_cli, Arg, Opt, show_validation_error, parse_config_overrides
 from ..util import load_config
-from .init_config import init_config, Optimizations
+from ._util import Arg, Opt, debug_cli, parse_config_overrides, show_validation_error
+from .init_config import Optimizations, init_config


@debug_cli.command(
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -1,19 +1,32 @@
-from typing import Dict, Any, Optional
-from pathlib import Path
 import itertools
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+import typer
+from thinc.api import (
+    Model,
+    data_validation,
+    fix_random_seed,
+    set_dropout_rate,
+    set_gpu_allocator,
+)
+from wasabi import msg

 from spacy.training import Example
 from spacy.util import resolve_dot_names
-from wasabi import msg
-from thinc.api import fix_random_seed, set_dropout_rate
-from thinc.api import Model, data_validation, set_gpu_allocator
-import typer

-from ._util import Arg, Opt, debug_cli, show_validation_error
-from ._util import parse_config_overrides, string_to_list, setup_gpu
+from .. import util
 from ..schemas import ConfigSchemaTraining
 from ..util import registry
-from .. import util
+from ._util import (
+    Arg,
+    Opt,
+    debug_cli,
+    parse_config_overrides,
+    setup_gpu,
+    show_validation_error,
+    string_to_list,
+)


@debug_cli.command(
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -1,14 +1,14 @@
-from typing import Optional, Sequence
-import requests
 import sys
-from wasabi import msg
-import typer
+from typing import Optional, Sequence
+
+import requests
+import typer
+from wasabi import msg

-from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
 from .. import about
-from ..util import is_package, get_minor_version, run_command
-from ..util import is_prerelease_version
 from ..errors import OLD_MODEL_SHORTCUTS
+from ..util import get_minor_version, is_package, is_prerelease_version, run_command
+from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app


@app.command(
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -1,16 +1,16 @@
-from typing import Optional, List, Dict, Any, Union
-from wasabi import Printer
-from pathlib import Path
 import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+
 import srsly
 from thinc.api import fix_random_seed
+from wasabi import Printer

-from ..training import Corpus
-from ..tokens import Doc
-from ._util import app, Arg, Opt, setup_gpu, import_code, benchmark_cli
+from .. import displacy, util
 from ..scorer import Scorer
-from .. import util
-from .. import displacy
+from ..tokens import Doc
+from ..training import Corpus
+from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu


@benchmark_cli.command(
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -1,17 +1,17 @@
 import functools
+import logging
 import operator
 from pathlib import Path
-import logging
-from typing import Optional, Tuple, Any, Dict, List
+from typing import Any, Dict, List, Optional, Tuple

 import numpy
 import wasabi.tables

-from ..pipeline import TextCategorizer, MultiLabel_TextCategorizer
-from ..errors import Errors
-from ..training import Corpus
-from ._util import app, Arg, Opt, import_code, setup_gpu
 from .. import util
+from ..errors import Errors
+from ..pipeline import MultiLabel_TextCategorizer, TextCategorizer
+from ..training import Corpus
+from ._util import Arg, Opt, app, import_code, setup_gpu

 _DEFAULTS = {
    "n_trials": 11,
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -1,15 +1,15 @@
-from typing import Optional, Dict, Any, Union, List
-import platform
 import json
+import platform
 from pathlib import Path
-from wasabi import Printer, MarkdownRenderer
-import srsly
+from typing import Any, Dict, List, Optional, Union

-from ._util import app, Arg, Opt, string_to_list
-from .download import get_model_filename, get_latest_version
-from .. import util
-from .. import about
+import srsly
+from wasabi import MarkdownRenderer, Printer
+
+from .. import about, util
 from ..compat import importlib_metadata
+from ._util import Arg, Opt, app, string_to_list
+from .download import get_latest_version, get_model_filename


@app.command("info")
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@ -1,19 +1,26 @@
-from typing import Optional, List, Tuple
+import re
 from enum import Enum
 from pathlib import Path
-from wasabi import Printer, diff_strings
-from thinc.api import Config
+from typing import List, Optional, Tuple
+
 import srsly
-import re
 from jinja2 import Template
+from thinc.api import Config
+from wasabi import Printer, diff_strings

 from .. import util
 from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
 from ..schemas import RecommendationSchema
 from ..util import SimpleFrozenList
-from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
-from ._util import string_to_list, import_code
-
+from ._util import (
+    COMMAND,
+    Arg,
+    Opt,
+    import_code,
+    init_cli,
+    show_validation_error,
+    string_to_list,
+)

 ROOT = Path(__file__).parent / "templates"
 TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@ -1,15 +1,23 @@
-from typing import Optional
 import logging
 from pathlib import Path
-from wasabi import msg
-import typer
+from typing import Optional
+
 import srsly
+import typer
+from wasabi import msg

 from .. import util
-from ..training.initialize import init_nlp, convert_vectors
 from ..language import Language
-from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
+from ..training.initialize import convert_vectors, init_nlp
+from ._util import (
+    Arg,
+    Opt,
+    import_code,
+    init_cli,
+    parse_config_overrides,
+    setup_gpu,
+    show_validation_error,
+)


@init_cli.command("vectors")
@ -24,6 +32,7 @@ def init_vectors_cli(
    name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
+    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
    # fmt: on
 ):
    """Convert word vectors for use with spaCy. Will export an nlp object that
@ -42,6 +51,7 @@ def init_vectors_cli(
        prune=prune,
        name=name,
        mode=mode,
+        attr=attr,
    )
    msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
    nlp.to_disk(output_dir)
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -1,18 +1,18 @@
-from typing import Optional, Union, Any, Dict, List, Tuple, cast
-import shutil
-from pathlib import Path
-from wasabi import Printer, MarkdownRenderer, get_raw_input
-from thinc.api import Config
-from collections import defaultdict
-from catalogue import RegistryError
-import srsly
-import sys
 import re
+import shutil
+import sys
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union, cast

-from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
-from ..schemas import validate, ModelMetaSchema
-from .. import util
-from .. import about
+import srsly
+from catalogue import RegistryError
+from thinc.api import Config
+from wasabi import MarkdownRenderer, Printer, get_raw_input
+
+from .. import about, util
+from ..schemas import ModelMetaSchema, validate
+from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app, string_to_list


@app.command("package")
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -1,13 +1,21 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import typer
 import re
+from pathlib import Path
+from typing import Optional
+
+import typer
+from wasabi import msg

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
 from ..training.pretrain import pretrain
 from ..util import load_config
+from ._util import (
+    Arg,
+    Opt,
+    app,
+    import_code,
+    parse_config_overrides,
+    setup_gpu,
+    show_validation_error,
+)


@app.command(
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@ -1,17 +1,18 @@
-from typing import Optional, Sequence, Union, Iterator
-import tqdm
-from pathlib import Path
-import srsly
 import cProfile
+import itertools
 import pstats
 import sys
-import itertools
-from wasabi import msg, Printer
-import typer
+from pathlib import Path
+from typing import Iterator, Optional, Sequence, Union
+
+import srsly
+import tqdm
+import typer
+from wasabi import Printer, msg

-from ._util import app, debug_cli, Arg, Opt, NAME
 from ..language import Language
 from ..util import load_model
+from ._util import NAME, Arg, Opt, app, debug_cli


@debug_cli.command("profile")
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@ -1,16 +1,27 @@
-from typing import Any, Dict, Optional
-from pathlib import Path
-from wasabi import msg
 import os
 import re
 import shutil
+from pathlib import Path
+from typing import Any, Dict, Optional
+
 import requests
 import typer
+from wasabi import msg

 from ...util import ensure_path, working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
-from .._util import get_checksum, download_file, git_checkout, get_git_version
-from .._util import SimpleFrozenDict, parse_config_overrides
+from .._util import (
+    PROJECT_FILE,
+    Arg,
+    Opt,
+    SimpleFrozenDict,
+    download_file,
+    get_checksum,
+    get_git_version,
+    git_checkout,
+    load_project_config,
+    parse_config_overrides,
+    project_cli,
+)

 # Whether assets are extra if `extra` is not set.
 EXTRA_DEFAULT = False
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@ -1,13 +1,22 @@
-from typing import Optional
-from pathlib import Path
-from wasabi import msg
-import subprocess
 import re
+import subprocess
+from pathlib import Path
+from typing import Optional
+
+from wasabi import msg

 from ... import about
 from ...util import ensure_path
-from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
-from .._util import git_checkout, get_git_version, git_repo_branch_exists
+from .._util import (
+    COMMAND,
+    PROJECT_FILE,
+    Arg,
+    Opt,
+    get_git_version,
+    git_checkout,
+    git_repo_branch_exists,
+    project_cli,
+)

 DEFAULT_REPO = about.__projects__
 DEFAULT_PROJECTS_BRANCH = about.__projects_branch__
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@ -1,9 +1,9 @@
 from pathlib import Path
-from wasabi import msg, MarkdownRenderer
+
+from wasabi import MarkdownRenderer, msg

 from ...util import working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
-
+from .._util import PROJECT_FILE, Arg, Opt, load_project_config, project_cli

 DOCS_URL = "https://spacy.io"
 INTRO_PROJECT = f"""The [`{PROJECT_FILE}`]({PROJECT_FILE}) defines the data assets required by the
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@ -1,15 +1,28 @@
 """This module contains helpers and subcommands for integrating spaCy projects
 with Data Version Controk (DVC). https://dvc.org"""
-from typing import Dict, Any, List, Optional, Iterable
 import subprocess
 from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional
+
 from wasabi import msg

-from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
-from .._util import Arg, Opt, NAME, COMMAND
-from ...util import working_dir, split_command, join_command, run_command
-from ...util import SimpleFrozenList
-
+from ...util import (
+    SimpleFrozenList,
+    join_command,
+    run_command,
+    split_command,
+    working_dir,
+)
+from .._util import (
+    COMMAND,
+    NAME,
+    PROJECT_FILE,
+    Arg,
+    Opt,
+    get_hash,
+    load_project_config,
+    project_cli,
+)

 DVC_CONFIG = "dvc.yaml"
 DVC_DIR = ".dvc"
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@ -1,9 +1,9 @@
 from pathlib import Path
+
 from wasabi import msg
-from .remote_storage import RemoteStorage
-from .remote_storage import get_command_hash
-from .._util import project_cli, Arg, logger
-from .._util import load_project_config
+
+from .._util import Arg, load_project_config, logger, project_cli
+from .remote_storage import RemoteStorage, get_command_hash
 from .run import update_lockfile


--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@ -1,9 +1,9 @@
 from pathlib import Path
+
 from wasabi import msg
-from .remote_storage import RemoteStorage
-from .remote_storage import get_content_hash, get_command_hash
-from .._util import load_project_config
-from .._util import project_cli, Arg, logger
+
+from .._util import Arg, load_project_config, logger, project_cli
+from .remote_storage import RemoteStorage, get_command_hash, get_content_hash


@project_cli.command("push")
--- a/spacy/cli/project/remote_storage.py
+++ b/spacy/cli/project/remote_storage.py
@ -1,18 +1,25 @@
-from typing import Optional, List, Dict, TYPE_CHECKING
+import hashlib
 import os
 import site
-import hashlib
-import urllib.parse
 import tarfile
+import urllib.parse
 from pathlib import Path
+from typing import TYPE_CHECKING, Dict, List, Optional
+
 from wasabi import msg

-from .._util import get_hash, get_checksum, upload_file, download_file
-from .._util import ensure_pathy, make_tempdir
-from ...util import get_minor_version, ENV_VARS, check_bool_env_var
-from ...git_info import GIT_VERSION
 from ... import about
 from ...errors import Errors
+from ...git_info import GIT_VERSION
+from ...util import ENV_VARS, check_bool_env_var, get_minor_version
+from .._util import (
+    download_file,
+    ensure_pathy,
+    get_checksum,
+    get_hash,
+    make_tempdir,
+    upload_file,
+)

 if TYPE_CHECKING:
    from pathy import FluidPath  # noqa: F401
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -1,20 +1,39 @@
-from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
 import os.path
-from pathlib import Path
-
-from wasabi import msg
-from wasabi.util import locale_escape
 import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
 import srsly
 import typer
+from wasabi import msg
+from wasabi.util import locale_escape

 from ... import about
 from ...git_info import GIT_VERSION
-from ...util import working_dir, run_command, split_command, is_cwd, join_command
-from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
-from ...util import check_bool_env_var, SimpleFrozenDict
-from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
-from .._util import get_checksum, project_cli, Arg, Opt, COMMAND, parse_config_overrides
+from ...util import (
+    ENV_VARS,
+    SimpleFrozenDict,
+    SimpleFrozenList,
+    check_bool_env_var,
+    is_cwd,
+    is_minor_version_match,
+    join_command,
+    run_command,
+    split_command,
+    working_dir,
+)
+from .._util import (
+    COMMAND,
+    PROJECT_FILE,
+    PROJECT_LOCK,
+    Arg,
+    Opt,
+    get_checksum,
+    get_hash,
+    load_project_config,
+    parse_config_overrides,
+    project_cli,
+)


@project_cli.command(
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -3,7 +3,7 @@ the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
 {%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
-{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
+{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "span_finder", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
 [paths]
 train = null
 dev = null
@ -28,7 +28,7 @@ lang = "{{ lang }}"
 tok2vec/transformer. #}
 {%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
 {%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
+{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "span_finder" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@ -127,6 +127,30 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"
 {% endif -%}

+{% if "span_finder" in components -%}
+[components.span_finder]
+factory = "span_finder"
+max_length = 25
+min_length = null
+scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
+spans_key = "sc"
+threshold = 0.5
+
+[components.span_finder.model]
+@architectures = "spacy.SpanFinder.v1"
+
+[components.span_finder.model.scorer]
+@layers = "spacy.LinearLogistic.v1"
+nO = 2
+
+[components.span_finder.model.tok2vec]
+@architectures = "spacy-transformers.TransformerListener.v1"
+grad_factor = 1.0
+
+[components.span_finder.model.tok2vec.pooling]
+@layers = "reduce_mean.v1"
+{% endif -%}
+
 {% if "spancat" in components -%}
 [components.spancat]
 factory = "spancat"
@ -392,6 +416,27 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 {% endif %}

+{% if "span_finder" in components %}
+[components.span_finder]
+factory = "span_finder"
+max_length = 25
+min_length = null
+scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
+spans_key = "sc"
+threshold = 0.5
+
+[components.span_finder.model]
+@architectures = "spacy.SpanFinder.v1"
+
+[components.span_finder.model.scorer]
+@layers = "spacy.LinearLogistic.v1"
+nO = 2
+
+[components.span_finder.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+{% endif %}
+
 {% if "spancat" in components %}
 [components.spancat]
 factory = "spancat"
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -1,15 +1,23 @@
-from typing import Optional, Dict, Any, Union
-from pathlib import Path
-from wasabi import msg
-import typer
 import logging
 import sys
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import typer
+from wasabi import msg

-from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
-from ._util import import_code, setup_gpu
-from ..training.loop import train as train_nlp
-from ..training.initialize import init_nlp
 from .. import util
+from ..training.initialize import init_nlp
+from ..training.loop import train as train_nlp
+from ._util import (
+    Arg,
+    Opt,
+    app,
+    import_code,
+    parse_config_overrides,
+    setup_gpu,
+    show_validation_error,
+)


@app.command(
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -1,14 +1,21 @@
-from typing import Tuple
-from pathlib import Path
 import sys
-import requests
-from wasabi import msg, Printer
 import warnings
+from pathlib import Path
+from typing import Tuple
+
+import requests
+from wasabi import Printer, msg

-from ._util import app
 from .. import about
-from ..util import get_package_version, get_installed_models, get_minor_version
-from ..util import get_package_path, get_model_meta, is_compatible_version
+from ..util import (
+    get_installed_models,
+    get_minor_version,
+    get_model_meta,
+    get_package_path,
+    get_package_version,
+    is_compatible_version,
+)
+from ._util import app


@app.command("validate")
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -1,5 +1,6 @@
 """Helpers for Python and platform compatibility."""
 import sys
+
 from thinc.util import copy_array

 try:
--- a/spacy/displacy/init.py
+++ b/spacy/displacy/init.py
@ -4,15 +4,13 @@ spaCy's built in visualization suite for dependencies and named entities.
 DOCS: https://spacy.io/api/top-level#displacy
 USAGE: https://spacy.io/usage/visualizers
 """
-from typing import Union, Iterable, Optional, Dict, Any, Callable
 import warnings
+from typing import Any, Callable, Dict, Iterable, Optional, Union

-from .render import DependencyRenderer, EntityRenderer, SpanRenderer
-from ..tokens import Doc, Span
 from ..errors import Errors, Warnings
-from ..util import is_in_jupyter
-from ..util import find_available_port
-
+from ..tokens import Doc, Span
+from ..util import find_available_port, is_in_jupyter
+from .render import DependencyRenderer, EntityRenderer, SpanRenderer

 _html = {}
 RENDER_WRAPPER = None
@ -68,7 +66,7 @@ def render(
    if jupyter or (jupyter is None and is_in_jupyter()):
        # return HTML rendered by IPython display()
        # See #4840 for details on span wrapper to disable mathjax
-        from IPython.core.display import display, HTML
+        from IPython.core.display import HTML, display

        return display(HTML('<span class="tex2jax_ignore">{}</span>'.format(html)))
    return html
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -1,15 +1,28 @@
-from typing import Any, Dict, List, Optional, Tuple, Union
 import uuid
-import itertools
+from typing import Any, Dict, List, Optional, Tuple, Union

 from ..errors import Errors
 from ..util import escape_html, minify_html, registry
-from .templates import TPL_DEP_ARCS, TPL_DEP_SVG, TPL_DEP_WORDS
-from .templates import TPL_DEP_WORDS_LEMMA, TPL_ENT, TPL_ENT_RTL, TPL_ENTS
-from .templates import TPL_FIGURE, TPL_KB_LINK, TPL_PAGE, TPL_SPAN
-from .templates import TPL_SPAN_RTL, TPL_SPAN_SLICE, TPL_SPAN_SLICE_RTL
-from .templates import TPL_SPAN_START, TPL_SPAN_START_RTL, TPL_SPANS
-from .templates import TPL_TITLE
+from .templates import (
+    TPL_DEP_ARCS,
+    TPL_DEP_SVG,
+    TPL_DEP_WORDS,
+    TPL_DEP_WORDS_LEMMA,
+    TPL_ENT,
+    TPL_ENT_RTL,
+    TPL_ENTS,
+    TPL_FIGURE,
+    TPL_KB_LINK,
+    TPL_PAGE,
+    TPL_SPAN,
+    TPL_SPAN_RTL,
+    TPL_SPAN_SLICE,
+    TPL_SPAN_SLICE_RTL,
+    TPL_SPAN_START,
+    TPL_SPAN_START_RTL,
+    TPL_SPANS,
+    TPL_TITLE,
+)

 DEFAULT_LANG = "en"
 DEFAULT_DIR = "ltr"
@ -204,7 +217,7 @@ class SpanRenderer:
                    + (self.offset_step * (len(entities) - 1))
                )
                markup += self.span_template.format(
-                    text=token["text"],
+                    text=escape_html(token["text"]),
                    span_slices=slices,
                    span_starts=starts,
                    total_height=total_height,
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -1,4 +1,5 @@
 import warnings
+
 from .compat import Literal


@ -215,6 +216,9 @@ class Warnings(metaclass=ErrorsWithCodes):
    W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
            "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
    W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
+    W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
+            "key attribute for vectors, configure it through Vectors(attr=) or "
+            "'spacy init vectors --attr'")


 class Errors(metaclass=ErrorsWithCodes):
@ -738,8 +742,8 @@ class Errors(metaclass=ErrorsWithCodes):
            "model from a shortcut, which is obsolete as of spaCy v3.0. To "
            "load the model, use its full name instead:\n\n"
            "nlp = spacy.load(\"{full}\")\n\nFor more details on the available "
-            "models, see the models directory: https://spacy.io/models. If you "
-            "want to create a blank model, use spacy.blank: "
+            "models, see the models directory: https://spacy.io/models and if "
+            "you want to create a blank model, use spacy.blank: "
            "nlp = spacy.blank(\"{name}\")")
    E942 = ("Executing `after_{name}` callback failed. Expected the function to "
            "return an initialized nlp object but got: {value}. Maybe "
@ -970,6 +974,13 @@ class Errors(metaclass=ErrorsWithCodes):
    E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
             "or use `auto_select_port=True` to pick an available port automatically.")
    E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
+    E1052 = ("Unable to copy spans: the character offsets for the span at "
+             "index {i} in the span group do not align with the tokenization "
+             "in the target doc.")
+    E1053 = ("Both 'min_length' and 'max_length' should be larger than 0, but found"
+             " 'min_length': {min_length}, 'max_length': {max_length}")
+    E1054 = ("The text, including whitespace, must match between reference and "
+             "predicted docs when training {component}.")


 # Deprecated model shortcuts, only used in errors and warnings
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@ -1,4 +1,5 @@
 import warnings
+
 from .errors import Warnings


--- a/spacy/kb/init.py
+++ b/spacy/kb/init.py
@ -1,3 +1,3 @@
+from .candidate import Candidate, get_candidates, get_candidates_batch
 from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
-from .candidate import Candidate, get_candidates, get_candidates_batch
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@ -1,8 +1,11 @@
-from .kb cimport KnowledgeBase
 from libcpp.vector cimport vector
-from ..typedefs cimport hash_t

-# Object used by the Entity Linker that summarizes one entity-alias candidate combination.
+from ..typedefs cimport hash_t
+from .kb cimport KnowledgeBase
+
+
+# Object used by the Entity Linker that summarizes one entity-alias candidate
+# combination.
 cdef class Candidate:
    cdef readonly KnowledgeBase kb
    cdef hash_t entity_hash
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@ -1,19 +1,31 @@
 # cython: infer_types=True, profile=True

 from typing import Iterable
+
 from .kb cimport KnowledgeBase
+
 from ..tokens import Span

+
 cdef class Candidate:
-    """A `Candidate` object refers to a textual mention (`alias`) that may or may not be resolved
-    to a specific `entity` from a Knowledge Base. This will be used as input for the entity linking
-    algorithm which will disambiguate the various candidates to the correct one.
+    """A `Candidate` object refers to a textual mention (`alias`) that may or
+    may not be resolved to a specific `entity` from a Knowledge Base. This
+    will be used as input for the entity linking algorithm which will
+    disambiguate the various candidates to the correct one.
    Each candidate (alias, entity) pair is assigned a certain prior probability.

    DOCS: https://spacy.io/api/kb/#candidate-init
    """

-    def __init__(self, KnowledgeBase kb, entity_hash, entity_freq, entity_vector, alias_hash, prior_prob):
+    def __init__(
+        self,
+        KnowledgeBase kb,
+        entity_hash,
+        entity_freq,
+        entity_vector,
+        alias_hash,
+        prior_prob
+    ):
        self.kb = kb
        self.entity_hash = entity_hash
        self.entity_freq = entity_freq
@ -56,7 +68,8 @@ cdef class Candidate:

 def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
    """
-    Return candidate entities for a given mention and fetching appropriate entries from the index.
+    Return candidate entities for a given mention and fetching appropriate
+    entries from the index.
    kb (KnowledgeBase): Knowledge base to query.
    mention (Span): Entity mention for which to identify candidates.
    RETURNS (Iterable[Candidate]): Identified candidates.
@ -64,9 +77,12 @@ def get_candidates(kb: KnowledgeBase, mention: Span) -> Iterable[Candidate]:
    return kb.get_candidates(mention)


-def get_candidates_batch(kb: KnowledgeBase, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
+def get_candidates_batch(
+        kb: KnowledgeBase, mentions: Iterable[Span]
+) -> Iterable[Iterable[Candidate]]:
    """
-    Return candidate entities for the given mentions and fetching appropriate entries from the index.
+    Return candidate entities for the given mentions and fetching appropriate entries
+    from the index.
    kb (KnowledgeBase): Knowledge base to query.
    mention (Iterable[Span]): Entity mentions for which to identify candidates.
    RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
--- a/spacy/kb/kb.pxd
+++ b/spacy/kb/kb.pxd
@ -2,8 +2,10 @@

 from cymem.cymem cimport Pool
 from libc.stdint cimport int64_t
+
 from ..vocab cimport Vocab

+
 cdef class KnowledgeBase:
    cdef Pool mem
    cdef readonly Vocab vocab
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@ -2,17 +2,19 @@

 from pathlib import Path
 from typing import Iterable, Tuple, Union
+
 from cymem.cymem cimport Pool

-from .candidate import Candidate
+from ..errors import Errors
 from ..tokens import Span
 from ..util import SimpleFrozenList
-from ..errors import Errors
+from .candidate import Candidate


 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """A `KnowledgeBase` instance stores unique identifiers for entities and
+    their textual aliases, to support entity linking of named entities to
+    real-world concepts.
    This is an abstract class and requires its operations to be implemented.

    DOCS: https://spacy.io/api/kb
@ -30,10 +32,13 @@ cdef class KnowledgeBase:
        self.entity_vector_length = entity_vector_length
        self.mem = Pool()

-    def get_candidates_batch(self, mentions: Iterable[Span]) -> Iterable[Iterable[Candidate]]:
+    def get_candidates_batch(
+        self, mentions: Iterable[Span]
+    ) -> Iterable[Iterable[Candidate]]:
        """
-        Return candidate entities for specified texts. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for specified texts. Each candidate defines
+        the entity, the original alias, and the prior probability of that
+        alias resolving to that entity.
        If no candidate is found for a given text, an empty list is returned.
        mentions (Iterable[Span]): Mentions for which to get candidates.
        RETURNS (Iterable[Iterable[Candidate]]): Identified candidates.
@ -42,14 +47,17 @@ cdef class KnowledgeBase:

    def get_candidates(self, mention: Span) -> Iterable[Candidate]:
        """
-        Return candidate entities for specified text. Each candidate defines the entity, the original alias,
+        Return candidate entities for specified text. Each candidate defines
+        the entity, the original alias,
        and the prior probability of that alias resolving to that entity.
        If the no candidate is found for a given text, an empty list is returned.
        mention (Span): Mention for which to get candidates.
        RETURNS (Iterable[Candidate]): Identified candidates.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_candidates", name=self.__name__
+            )
        )

    def get_vectors(self, entities: Iterable[str]) -> Iterable[Iterable[float]]:
@ -67,7 +75,9 @@ cdef class KnowledgeBase:
        RETURNS (Iterable[float]): Vector for specified entity.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_vector", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="get_vector", name=self.__name__
+            )
        )

    def to_bytes(self, **kwargs) -> bytes:
@ -75,7 +85,9 @@ cdef class KnowledgeBase:
        RETURNS (bytes): Current state as binary string.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_bytes", name=self.__name__
+            )
        )

    def from_bytes(self, bytes_data: bytes, *, exclude: Tuple[str] = tuple()):
@ -84,25 +96,35 @@ cdef class KnowledgeBase:
        exclude (Tuple[str]): Properties to exclude when restoring KB.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_bytes", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_bytes", name=self.__name__
+            )
        )

-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def to_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
        """
        Write KnowledgeBase content to disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="to_disk", name=self.__name__
+            )
        )

-    def from_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def from_disk(
+            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
+    ) -> None:
        """
        Load KnowledgeBase content from disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
+            Errors.E1045.format(
+                parent="KnowledgeBase", method="from_disk", name=self.__name__
+            )
        )
--- a/spacy/kb/kb_in_memory.pxd
+++ b/spacy/kb/kb_in_memory.pxd
@ -1,11 +1,11 @@
 """Knowledge-base for entity or concept linking."""
-from preshed.maps cimport PreshMap
-from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport FILE
+from libcpp.vector cimport vector
+from preshed.maps cimport PreshMap

+from ..structs cimport AliasC, KBEntryC
 from ..typedefs cimport hash_t
-from ..structs cimport KBEntryC, AliasC
 from .kb cimport KnowledgeBase

 ctypedef vector[KBEntryC] entry_vec
@ -55,23 +55,28 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    # optional data, we can let users configure a DB as the backend for this.
    cdef object _features_table

-
    cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
        """Add an entity vector to the vectors table."""
        cdef int64_t new_index = self._vectors_table.size()
        self._vectors_table.push_back(entity_vector)
        return new_index

-
-    cdef inline int64_t c_add_entity(self, hash_t entity_hash, float freq,
-                                     int32_t vector_index, int feats_row) nogil:
+    cdef inline int64_t c_add_entity(
+        self,
+        hash_t entity_hash,
+        float freq,
+        int32_t vector_index,
+        int feats_row
+    ) nogil:
        """Add an entry to the vector of entries.
-        After calling this method, make sure to update also the _entry_index using the return value"""
+        After calling this method, make sure to update also the _entry_index
+        using the return value"""
        # This is what we'll map the entity hash key to. It's where the entry will sit
        # in the vector of entries, so we can get it later.
        cdef int64_t new_index = self._entries.size()

-        # Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
+        # Avoid struct initializer to enable nogil, cf.
+        # https://github.com/cython/cython/issues/1642
        cdef KBEntryC entry
        entry.entity_hash = entity_hash
        entry.vector_index = vector_index
@ -81,11 +86,17 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        self._entries.push_back(entry)
        return new_index

-    cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
-        """Connect a mention to a list of potential entities with their prior probabilities .
-        After calling this method, make sure to update also the _alias_index using the return value"""
-        # This is what we'll map the alias hash key to. It's where the alias will be defined
-        # in the vector of aliases.
+    cdef inline int64_t c_add_aliases(
+        self,
+        hash_t alias_hash,
+        vector[int64_t] entry_indices,
+        vector[float] probs
+    ) nogil:
+        """Connect a mention to a list of potential entities with their prior
+        probabilities. After calling this method, make sure to update also the
+        _alias_index using the return value"""
+        # This is what we'll map the alias hash key to. It's where the alias will be
+        # defined in the vector of aliases.
        cdef int64_t new_index = self._aliases_table.size()

        # Avoid struct initializer to enable nogil
@ -98,8 +109,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
        """
-        Initializing the vectors and making sure the first element of each vector is a dummy,
-        because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
+        Initializing the vectors and making sure the first element of each vector is a
+        dummy, because the PreshMap maps pointing to indices in these vectors can not
+        contain 0 as value.
        cf. https://github.com/explosion/preshed/issues/17
        """
        cdef int32_t dummy_value = 0
@ -130,12 +142,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 cdef class Writer:
    cdef FILE* _fp

-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1
    cdef int write_vector_element(self, float element) except -1
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1

    cdef int write_alias_length(self, int64_t alias_length) except -1
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1
    cdef int write_alias(self, int64_t entry_index, float prob) except -1

    cdef int _write(self, void* value, size_t size) except -1
@ -143,12 +161,18 @@ cdef class Writer:
 cdef class Reader:
    cdef FILE* _fp

-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1
    cdef int read_vector_element(self, float* element) except -1
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1

    cdef int read_alias_length(self, int64_t* alias_length) except -1
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1
    cdef int read_alias(self, int64_t* entry_index, float* prob) except -1

    cdef int _read(self, void* value, size_t size) except -1
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@ -1,29 +1,35 @@
 # cython: infer_types=True, profile=True
-from typing import Iterable, Callable, Dict, Any, Union
+from typing import Any, Callable, Dict, Iterable

 import srsly
-from preshed.maps cimport PreshMap
-from cpython.exc cimport PyErr_SetFromErrno
-from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek
-from libc.stdint cimport int32_t, int64_t
-from libcpp.vector cimport vector

-from pathlib import Path
+from cpython.exc cimport PyErr_SetFromErrno
+from libc.stdint cimport int32_t, int64_t
+from libc.stdio cimport fclose, feof, fopen, fread, fseek, fwrite
+from libcpp.vector cimport vector
+from preshed.maps cimport PreshMap
+
 import warnings
+from pathlib import Path

 from ..tokens import Span
+
 from ..typedefs cimport hash_t
-from ..errors import Errors, Warnings
+
 from .. import util
+from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList, ensure_path
+
 from ..vocab cimport Vocab
 from .kb cimport KnowledgeBase
+
 from .candidate import Candidate as Candidate


 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
-    to support entity linking of named entities to real-world concepts.
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities
+    and their textual aliases, to support entity linking of named entities to
+    real-world concepts.

    DOCS: https://spacy.io/api/inmemorylookupkb
    """
@ -66,7 +72,8 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    def add_entity(self, str entity, float freq, vector[float] entity_vector):
        """
-        Add an entity to the KB, optionally specifying its log probability based on corpus frequency
+        Add an entity to the KB, optionally specifying its log probability
+        based on corpus frequency.
        Return the hash of the entity ID/name at the end.
        """
        cdef hash_t entity_hash = self.vocab.strings.add(entity)
@ -78,14 +85,20 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        # Raise an error if the provided entity vector is not of the correct length
        if len(entity_vector) != self.entity_vector_length:
-            raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+            raise ValueError(
+                Errors.E141.format(
+                    found=len(entity_vector), required=self.entity_vector_length
+                )
+            )

        vector_index = self.c_add_vector(entity_vector=entity_vector)

-        new_index = self.c_add_entity(entity_hash=entity_hash,
-                                      freq=freq,
-                                      vector_index=vector_index,
-                                      feats_row=-1)  # Features table currently not implemented
+        new_index = self.c_add_entity(
+            entity_hash=entity_hash,
+            freq=freq,
+            vector_index=vector_index,
+            feats_row=-1
+        )  # Features table currently not implemented
        self._entry_index[entity_hash] = new_index

        return entity_hash
@ -110,7 +123,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            else:
                entity_vector = vector_list[i]
                if len(entity_vector) != self.entity_vector_length:
-                    raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+                    raise ValueError(
+                        Errors.E141.format(
+                            found=len(entity_vector),
+                            required=self.entity_vector_length
+                        )
+                    )

                entry.entity_hash = entity_hash
                entry.freq = freq_list[i]
@ -144,11 +162,15 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        previous_alias_nr = self.get_size_aliases()
        # Throw an error if the length of entities and probabilities are not the same
        if not len(entities) == len(probabilities):
-            raise ValueError(Errors.E132.format(alias=alias,
-                                                entities_length=len(entities),
-                                                probabilities_length=len(probabilities)))
+            raise ValueError(
+                Errors.E132.format(
+                    alias=alias,
+                    entities_length=len(entities),
+                    probabilities_length=len(probabilities))
+            )

-        # Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors)
+        # Throw an error if the probabilities sum up to more than 1 (allow for
+        # some rounding errors)
        prob_sum = sum(probabilities)
        if prob_sum > 1.00001:
            raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
@ -165,40 +187,47 @@ cdef class InMemoryLookupKB(KnowledgeBase):

        for entity, prob in zip(entities, probabilities):
            entity_hash = self.vocab.strings[entity]
-            if not entity_hash in self._entry_index:
+            if entity_hash not in self._entry_index:
                raise ValueError(Errors.E134.format(entity=entity))

            entry_index = <int64_t>self._entry_index.get(entity_hash)
            entry_indices.push_back(int(entry_index))
            probs.push_back(float(prob))

-        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
+        new_index = self.c_add_aliases(
+            alias_hash=alias_hash, entry_indices=entry_indices, probs=probs
+        )
        self._alias_index[alias_hash] = new_index

        if previous_alias_nr + 1 != self.get_size_aliases():
            raise RuntimeError(Errors.E891.format(alias=alias))
        return alias_hash

-    def append_alias(self, str alias, str entity, float prior_prob, ignore_warnings=False):
+    def append_alias(
+        self, str alias, str entity, float prior_prob, ignore_warnings=False
+    ):
        """
-        For an alias already existing in the KB, extend its potential entities with one more.
+        For an alias already existing in the KB, extend its potential entities
+        with one more.
        Throw a warning if either the alias or the entity is unknown,
        or when the combination is already previously recorded.
        Throw an error if this entity+prior prob would exceed the sum of 1.
-        For efficiency, it's best to use the method `add_alias` as much as possible instead of this one.
+        For efficiency, it's best to use the method `add_alias` as much as
+        possible instead of this one.
        """
        # Check if the alias exists in the KB
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            raise ValueError(Errors.E176.format(alias=alias))

        # Check if the entity exists in the KB
        cdef hash_t entity_hash = self.vocab.strings[entity]
-        if not entity_hash in self._entry_index:
+        if entity_hash not in self._entry_index:
            raise ValueError(Errors.E134.format(entity=entity))
        entry_index = <int64_t>self._entry_index.get(entity_hash)

-        # Throw an error if the prior probabilities (including the new one) sum up to more than 1
+        # Throw an error if the prior probabilities (including the new one)
+        # sum up to more than 1
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
        current_sum = sum([p for p in alias_entry.probs])
@ -231,12 +260,13 @@ cdef class InMemoryLookupKB(KnowledgeBase):

    def get_alias_candidates(self, str alias) -> Iterable[Candidate]:
        """
-        Return candidate entities for an alias. Each candidate defines the entity, the original alias,
-        and the prior probability of that alias resolving to that entity.
+        Return candidate entities for an alias. Each candidate defines the
+        entity, the original alias, and the prior probability of that alias
+        resolving to that entity.
        If the alias is not known in the KB, and empty list is returned.
        """
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            return []
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
@ -244,10 +274,14 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        return [Candidate(kb=self,
                          entity_hash=self._entries[entry_index].entity_hash,
                          entity_freq=self._entries[entry_index].freq,
-                          entity_vector=self._vectors_table[self._entries[entry_index].vector_index],
+                          entity_vector=self._vectors_table[
+                              self._entries[entry_index].vector_index
+                          ],
                          alias_hash=alias_hash,
                          prior_prob=prior_prob)
-                for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs)
+                for (entry_index, prior_prob) in zip(
+                    alias_entry.entry_indices, alias_entry.probs
+                )
                if entry_index != 0]

    def get_vector(self, str entity):
@ -261,8 +295,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        return self._vectors_table[self._entries[entry_index].vector_index]

    def get_prior_prob(self, str entity, str alias):
-        """ Return the prior probability of a given alias being linked to a given entity,
-        or return 0.0 when this combination is not known in the knowledge base"""
+        """ Return the prior probability of a given alias being linked to a
+        given entity, or return 0.0 when this combination is not known in the
+        knowledge base."""
        cdef hash_t alias_hash = self.vocab.strings[alias]
        cdef hash_t entity_hash = self.vocab.strings[entity]

@ -273,7 +308,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        entry_index = self._entry_index[entity_hash]

        alias_entry = self._aliases_table[alias_index]
-        for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs):
+        for (entry_index, prior_prob) in zip(
+            alias_entry.entry_indices, alias_entry.probs
+        ):
            if self._entries[entry_index].entity_hash == entity_hash:
                return prior_prob

@ -283,13 +320,19 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        """Serialize the current state to a binary string.
        """
        def serialize_header():
-            header = (self.get_size_entities(), self.get_size_aliases(), self.entity_vector_length)
+            header = (
+                self.get_size_entities(),
+                self.get_size_aliases(),
+                self.entity_vector_length
+            )
            return srsly.json_dumps(header)

        def serialize_entries():
            i = 1
            tuples = []
-            for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+            for entry_hash, entry_index in sorted(
+                self._entry_index.items(), key=lambda x: x[1]
+            ):
                entry = self._entries[entry_index]
                assert entry.entity_hash == entry_hash
                assert entry_index == i
@ -302,7 +345,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            headers = []
            indices_lists = []
            probs_lists = []
-            for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+            for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+            ):
                alias = self._aliases_table[alias_index]
                assert alias_index == i
                candidate_length = len(alias.entry_indices)
@ -360,7 +405,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            indices = srsly.json_loads(all_data[1])
            probs = srsly.json_loads(all_data[2])
            for header, indices, probs in zip(headers, indices, probs):
-                alias_hash, candidate_length = header
+                alias_hash, _candidate_length = header
                alias.entry_indices = indices
                alias.probs = probs
                self._aliases_table[i] = alias
@ -409,10 +454,14 @@ cdef class InMemoryLookupKB(KnowledgeBase):
                writer.write_vector_element(element)
            i = i+1

-        # dumping the entry records in the order in which they are in the _entries vector.
-        # index 0 is a dummy object not stored in the _entry_index and can be ignored.
+        # dumping the entry records in the order in which they are in the
+        # _entries vector.
+        # index 0 is a dummy object not stored in the _entry_index and can
+        # be ignored.
        i = 1
-        for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+        for entry_hash, entry_index in sorted(
+            self._entry_index.items(), key=lambda x: x[1]
+        ):
            entry = self._entries[entry_index]
            assert entry.entity_hash == entry_hash
            assert entry_index == i
@ -424,7 +473,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        # dumping the aliases in the order in which they are in the _alias_index vector.
        # index 0 is a dummy object not stored in the _aliases_table and can be ignored.
        i = 1
-        for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+        for alias_hash, alias_index in sorted(
+                self._alias_index.items(), key=lambda x: x[1]
+        ):
            alias = self._aliases_table[alias_index]
            assert alias_index == i

@ -530,7 +581,8 @@ cdef class Writer:
    def __init__(self, path):
        assert isinstance(path, Path)
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'wb')
        if not self._fp:
            raise IOError(Errors.E146.format(path=path))
@ -540,14 +592,18 @@ cdef class Writer:
        cdef size_t status = fclose(self._fp)
        assert status == 0

-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1:
+    cdef int write_header(
+        self, int64_t nr_entries, int64_t entity_vector_length
+    ) except -1:
        self._write(&nr_entries, sizeof(nr_entries))
        self._write(&entity_vector_length, sizeof(entity_vector_length))

    cdef int write_vector_element(self, float element) except -1:
        self._write(&element, sizeof(element))

-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1:
+    cdef int write_entry(
+        self, hash_t entry_hash, float entry_freq, int32_t vector_index
+    ) except -1:
        self._write(&entry_hash, sizeof(entry_hash))
        self._write(&entry_freq, sizeof(entry_freq))
        self._write(&vector_index, sizeof(vector_index))
@ -556,7 +612,9 @@ cdef class Writer:
    cdef int write_alias_length(self, int64_t alias_length) except -1:
        self._write(&alias_length, sizeof(alias_length))

-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1:
+    cdef int write_alias_header(
+        self, hash_t alias_hash, int64_t candidate_length
+    ) except -1:
        self._write(&alias_hash, sizeof(alias_hash))
        self._write(&candidate_length, sizeof(candidate_length))

@ -572,16 +630,19 @@ cdef class Writer:
 cdef class Reader:
    def __init__(self, path):
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
+            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'rb')
        if not self._fp:
            PyErr_SetFromErrno(IOError)
-        status = fseek(self._fp, 0, 0)  # this can be 0 if there is no header
+        fseek(self._fp, 0, 0)  # this can be 0 if there is no header

    def __dealloc__(self):
        fclose(self._fp)

-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1:
+    cdef int read_header(
+        self, int64_t* nr_entries, int64_t* entity_vector_length
+    ) except -1:
        status = self._read(nr_entries, sizeof(int64_t))
        if status < 1:
            if feof(self._fp):
@ -601,7 +662,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="vector element"))

-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1:
+    cdef int read_entry(
+        self, hash_t* entity_hash, float* freq, int32_t* vector_index
+    ) except -1:
        status = self._read(entity_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
@ -632,7 +695,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="alias length"))

-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1:
+    cdef int read_alias_header(
+        self, hash_t* alias_hash, int64_t* candidate_length
+    ) except -1:
        status = self._read(alias_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
--- a/spacy/lang/af/init.py
+++ b/spacy/lang/af/init.py
@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults


 class AfrikaansDefaults(BaseDefaults):
--- a/spacy/lang/am/init.py
+++ b/spacy/lang/am/init.py
@ -1,12 +1,11 @@
-from .stop_words import STOP_WORDS
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
-
+from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc


 class AmharicDefaults(BaseDefaults):
--- a/spacy/lang/am/punctuation.py
+++ b/spacy/lang/am/punctuation.py
@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+    ALPHA_UPPER,
+    CURRENCY,
+    LIST_ELLIPSES,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    UNITS,
+)

 _list_punct = LIST_PUNCT + "፡ ። ፣ ፤ ፥ ፦ ፧ ፠ ፨".strip().split()

--- a/spacy/lang/am/tokenizer_exceptions.py
+++ b/spacy/lang/am/tokenizer_exceptions.py
@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH

 _exc = {}

--- a/spacy/lang/ar/init.py
+++ b/spacy/lang/ar/init.py
@ -1,8 +1,8 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from ...language import Language, BaseDefaults


 class ArabicDefaults(BaseDefaults):
--- a/spacy/lang/ar/punctuation.py
+++ b/spacy/lang/ar/punctuation.py
@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
-from ..char_classes import UNITS, ALPHA_UPPER
+from ..char_classes import (
+    ALPHA_UPPER,
+    CURRENCY,
+    LIST_ELLIPSES,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    UNITS,
+)

 _suffixes = (
    LIST_PUNCT
--- a/spacy/lang/ar/tokenizer_exceptions.py
+++ b/spacy/lang/ar/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {}

--- a/spacy/lang/az/init.py
+++ b/spacy/lang/az/init.py
@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS


 class AzerbaijaniDefaults(BaseDefaults):
--- a/spacy/lang/az/lex_attrs.py
+++ b/spacy/lang/az/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM

-
 # Eleven, twelve etc. are written separate: on bir, on iki

 _num_words = [
--- a/spacy/lang/bg/init.py
+++ b/spacy/lang/bg/init.py
@ -1,12 +1,14 @@
+from ...attrs import LANG
+from ...language import BaseDefaults, Language
+from ...util import update_exc
+from ..punctuation import (
+    COMBINING_DIACRITICS_TOKENIZER_INFIXES,
+    COMBINING_DIACRITICS_TOKENIZER_SUFFIXES,
+)
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from .lex_attrs import LEX_ATTRS
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .lex_attrs import LEX_ATTRS
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_INFIXES
-from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
-from ...attrs import LANG
-from ...util import update_exc


 class BulgarianDefaults(BaseDefaults):
--- a/spacy/lang/bg/lex_attrs.py
+++ b/spacy/lang/bg/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM

-
 _num_words = [
    "нула",
    "едно",
--- a/spacy/lang/bg/tokenizer_exceptions.py
+++ b/spacy/lang/bg/tokenizer_exceptions.py
@ -4,8 +4,7 @@ References:
    (countries, occupations, fields of studies and more).
 """

-from ...symbols import ORTH, NORM
-
+from ...symbols import NORM, ORTH

 _exc = {}

--- a/spacy/lang/bn/init.py
+++ b/spacy/lang/bn/init.py
@ -1,10 +1,12 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
-from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults
+
+from ...language import BaseDefaults, Language
 from ...pipeline import Lemmatizer
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class BengaliDefaults(BaseDefaults):
--- a/spacy/lang/bn/punctuation.py
+++ b/spacy/lang/bn/punctuation.py
@ -1,6 +1,14 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
-from ..char_classes import ALPHA_LOWER, ALPHA, HYPHENS, CONCAT_QUOTES, UNITS
-
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    CONCAT_QUOTES,
+    HYPHENS,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    UNITS,
+)

 _currency = r"\$¢£€¥฿৳"
 _quotes = CONCAT_QUOTES.replace("'", "")
--- a/spacy/lang/bn/tokenizer_exceptions.py
+++ b/spacy/lang/bn/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {}

--- a/spacy/lang/ca/init.py
+++ b/spacy/lang/ca/init.py
@ -1,14 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional

 from thinc.api import Model

-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from ...language import BaseDefaults, Language
 from .lemmatizer import CatalanLemmatizer
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class CatalanDefaults(BaseDefaults):
--- a/spacy/lang/ca/lex_attrs.py
+++ b/spacy/lang/ca/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM

-
 _num_words = [
    "zero",
    "un",
--- a/spacy/lang/ca/punctuation.py
+++ b/spacy/lang/ca/punctuation.py
@ -1,9 +1,18 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
-from ..char_classes import LIST_CURRENCY
-from ..char_classes import CURRENCY
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT
-from ..char_classes import merge_chars, _units
-
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    CURRENCY,
+    LIST_CURRENCY,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    PUNCT,
+    _units,
+    merge_chars,
+)

 ELISION = " ' ’ ".strip().replace(" ", "").replace("\n", "")

--- a/spacy/lang/ca/syntax_iterators.py
+++ b/spacy/lang/ca/syntax_iterators.py
@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN
+from typing import Iterator, Tuple, Union
+
 from ...errors import Errors
+from ...symbols import NOUN, PROPN
+from ...tokens import Doc, Span


 def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
--- a/spacy/lang/ca/tokenizer_exceptions.py
+++ b/spacy/lang/ca/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {}

--- a/spacy/lang/cs/init.py
+++ b/spacy/lang/cs/init.py
@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS


 class CzechDefaults(BaseDefaults):
--- a/spacy/lang/da/init.py
+++ b/spacy/lang/da/init.py
@ -1,9 +1,9 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
+from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class DanishDefaults(BaseDefaults):
--- a/spacy/lang/da/lex_attrs.py
+++ b/spacy/lang/da/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM

-
 # Source http://fjern-uv.dk/tal.php
 _num_words = """nul
 en et to tre fire fem seks syv otte ni ti
--- a/spacy/lang/da/punctuation.py
+++ b/spacy/lang/da/punctuation.py
@ -1,8 +1,13 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+)
 from ..punctuation import TOKENIZER_SUFFIXES

-
 _quotes = CONCAT_QUOTES.replace("'", "")

 _infixes = (
--- a/spacy/lang/da/syntax_iterators.py
+++ b/spacy/lang/da/syntax_iterators.py
@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
-from ...tokens import Doc, Span
-from ...symbols import NOUN, PROPN, PRON, VERB, AUX
+from typing import Iterator, Tuple, Union
+
 from ...errors import Errors
+from ...symbols import AUX, NOUN, PRON, PROPN, VERB
+from ...tokens import Doc, Span


 def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
--- a/spacy/lang/da/tokenizer_exceptions.py
+++ b/spacy/lang/da/tokenizer_exceptions.py
@ -2,10 +2,9 @@
 Tokenizer Exceptions.
 Source: https://forkortelse.dk/ and various others.
 """
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {}

--- a/spacy/lang/de/init.py
+++ b/spacy/lang/de/init.py
@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class GermanDefaults(BaseDefaults):
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@ -1,9 +1,18 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
-from ..char_classes import CURRENCY, UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    CURRENCY,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    PUNCT,
+    UNITS,
+)
 from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES

-
 _prefixes = ["``"] + BASE_TOKENIZER_PREFIXES

 _suffixes = (
--- a/spacy/lang/de/syntax_iterators.py
+++ b/spacy/lang/de/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union

-from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span


--- a/spacy/lang/de/tokenizer_exceptions.py
+++ b/spacy/lang/de/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {
    "auf'm": [{ORTH: "auf"}, {ORTH: "'m", NORM: "dem"}],
--- a/spacy/lang/dsb/init.py
+++ b/spacy/lang/dsb/init.py
@ -1,6 +1,6 @@
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults


 class LowerSorbianDefaults(BaseDefaults):
--- a/spacy/lang/el/init.py
+++ b/spacy/lang/el/init.py
@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
 from thinc.api import Model

-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
 from .lemmatizer import GreekLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class GreekDefaults(BaseDefaults):
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@ -1,5 +1,6 @@
 def get_pos_from_wiktionary():
    import re
+
    from gensim.corpora.wikicorpus import extract_pages

    regex = re.compile(r"==={{(\w+)\|el}}===")
--- a/spacy/lang/el/punctuation.py
+++ b/spacy/lang/el/punctuation.py
@ -1,6 +1,16 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
-from ..char_classes import LIST_ICONS, ALPHA_LOWER, ALPHA_UPPER, ALPHA, HYPHENS
-from ..char_classes import CONCAT_QUOTES, CURRENCY
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    CURRENCY,
+    HYPHENS,
+    LIST_CURRENCY,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
+)

 _units = (
    "km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm nm yd in ft "
--- a/spacy/lang/el/syntax_iterators.py
+++ b/spacy/lang/el/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union

-from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span


--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {}

--- a/spacy/lang/en/init.py
+++ b/spacy/lang/en/init.py
@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
 from thinc.api import Model

-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .punctuation import TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
 from .lemmatizer import EnglishLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
+from .punctuation import TOKENIZER_INFIXES
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class EnglishDefaults(BaseDefaults):
--- a/spacy/lang/en/punctuation.py
+++ b/spacy/lang/en/punctuation.py
@ -1,5 +1,12 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    HYPHENS,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+)

 _infixes = (
    LIST_ELLIPSES
--- a/spacy/lang/en/syntax_iterators.py
+++ b/spacy/lang/en/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union

-from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span


--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@ -1,8 +1,8 @@
 from typing import Dict, List
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
-from ...util import update_exc

+from ...symbols import NORM, ORTH
+from ...util import update_exc
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc: Dict[str, List[Dict]] = {}
 _exclude = [
--- a/spacy/lang/es/init.py
+++ b/spacy/lang/es/init.py
@ -1,12 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
+
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .stop_words import STOP_WORDS
-from .lex_attrs import LEX_ATTRS
+
+from ...language import BaseDefaults, Language
 from .lemmatizer import SpanishLemmatizer
-from .syntax_iterators import SYNTAX_ITERATORS
+from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
+from .syntax_iterators import SYNTAX_ITERATORS
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS


 class SpanishDefaults(BaseDefaults):
--- a/spacy/lang/es/lemmatizer.py
+++ b/spacy/lang/es/lemmatizer.py
@ -1,5 +1,5 @@
-from typing import List, Optional, Tuple
 import re
+from typing import List, Optional, Tuple

 from ...pipeline import Lemmatizer
 from ...tokens import Token
--- a/spacy/lang/es/lex_attrs.py
+++ b/spacy/lang/es/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM

-
 _num_words = [
    "cero",
    "uno",
--- a/spacy/lang/es/punctuation.py
+++ b/spacy/lang/es/punctuation.py
@ -1,8 +1,17 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES
-from ..char_classes import LIST_ICONS, CURRENCY, LIST_UNITS, PUNCT
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
-from ..char_classes import merge_chars
-
+from ..char_classes import (
+    ALPHA,
+    ALPHA_LOWER,
+    ALPHA_UPPER,
+    CONCAT_QUOTES,
+    CURRENCY,
+    LIST_ELLIPSES,
+    LIST_ICONS,
+    LIST_PUNCT,
+    LIST_QUOTES,
+    LIST_UNITS,
+    PUNCT,
+    merge_chars,
+)

 _list_units = [u for u in LIST_UNITS if u != "%"]
 _units = merge_chars(" ".join(_list_units))
--- a/spacy/lang/es/syntax_iterators.py
+++ b/spacy/lang/es/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union

-from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
+from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span


--- a/spacy/lang/es/tokenizer_exceptions.py
+++ b/spacy/lang/es/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS

 _exc = {
    "pal": [{ORTH: "pa"}, {ORTH: "l", NORM: "el"}],
--- a/spacy/lang/et/init.py
+++ b/spacy/lang/et/init.py
@ -1,5 +1,5 @@
+from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
-from ...language import Language, BaseDefaults


 class EstonianDefaults(BaseDefaults):
--- a/spacy/lang/eu/init.py
+++ b/spacy/lang/eu/init.py
@ -1,7 +1,7 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS


 class BasqueDefaults(BaseDefaults):
--- a/Show More
+++ b/Show More