Merge branch 'v4' into feature/docwise-generator-batching

# Conflicts: # spacy/kb/kb.pyx # spacy/kb/kb_in_memory.pyx # spacy/ml/models/entity_linker.py # spacy/pipeline/entity_linker.py # spacy/tests/pipeline/test_entity_linker.py # website/docs/api/entitylinker.mdx
2025-09-13 23:52:38 +03:00 · 2023-07-27 14:28:06 +02:00 · 2023-07-27 14:28:06 +02:00 · 8aa59c4f65
commit 8aa59c4f65
parent c655b36555 eaaac5a08c
606 changed files with 11468 additions and 3480 deletions
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@ -1,129 +0,0 @@
 parameters:
  python_version: ''
  architecture: 'x64'
  num_build_jobs: 2
 steps:
  - task: UsePythonVersion@0
    inputs:
      versionSpec: ${{ parameters.python_version }}
      architecture: ${{ parameters.architecture }}
      allowUnstable: true
  - bash: |
      echo "##vso[task.setvariable variable=python_version]${{ parameters.python_version }}"
    displayName: 'Set variables'
  - script: |
      python -m pip install -U build pip setuptools
      python -m pip install -U -r requirements.txt
    displayName: "Install dependencies"
  - script: |
      python -m build --sdist
    displayName: "Build sdist"
  - script: |
      python -m mypy spacy
    displayName: 'Run mypy'
    condition: ne(variables['python_version'], '3.6')
  - task: DeleteFiles@1
    inputs:
      contents: "spacy"
    displayName: "Delete source directory"
  - task: DeleteFiles@1
    inputs:
      contents: "*.egg-info"
    displayName: "Delete egg-info directory"
  - script: |
      python -m pip freeze > installed.txt
      python -m pip uninstall -y -r installed.txt
    displayName: "Uninstall all packages"
  - bash: |
      SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
      SPACY_NUM_BUILD_JOBS=${{ parameters.num_build_jobs }} python -m pip install dist/$SDIST
    displayName: "Install from sdist"
  - script: |
      python -W error -c "import spacy"
    displayName: "Test import"
 #  - script: |
 #      python -m spacy download ca_core_news_sm
 #      python -m spacy download ca_core_news_md
 #      python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
 #    displayName: 'Test download CLI'
 #    condition: eq(variables['python_version'], '3.8')
 #
 #  - script: |
 #      python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
 #    displayName: 'Test no warnings on load (#11713)'
 #    condition: eq(variables['python_version'], '3.8')
 #
 #  - script: |
 #      python -m spacy download ca_core_news_sm 2>&1 | grep -q skipping
 #    displayName: 'Test skip re-download (#12188)'
 #    condition: eq(variables['python_version'], '3.8')
 #  - script: |
 #      python -W error -m spacy info ca_core_news_sm | grep -q download_url
 #    displayName: 'Test download_url in info CLI'
 #    condition: eq(variables['python_version'] '3.8')
  - script: |
      python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
    displayName: 'Test convert CLI'
    condition: eq(variables['python_version'], '3.8')
  - script: |
      python -m spacy init config -p ner -l ca ner.cfg
      python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
    displayName: 'Test debug config CLI'
    condition: eq(variables['python_version'], '3.8')
  - script: |
      # will have errors due to sparse data, check for summary in output
      python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
    displayName: 'Test debug data CLI'
    condition: eq(variables['python_version'], '3.8')
  - script: |
      python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
    displayName: 'Test train CLI'
    condition: eq(variables['python_version'], '3.8')
 #  - script: |
 #      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
 #      PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
 #    displayName: 'Test assemble CLI'
 #    condition: eq(variables['python_version'], '3.8')
 #
 #  - script: |
 #      python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
 #      python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
 #    displayName: 'Test assemble CLI vectors warning'
 #    condition: eq(variables['python_version'], '3.8')
  - script: |
      python -m pip install -U -r requirements.txt
    displayName: "Install test requirements"
  - script: |
      python -m pytest --pyargs spacy -W error
    displayName: "Run CPU tests"
  - script: |
      python -m pip install 'spacy[apple]'
      python -m pytest --pyargs spacy
    displayName: "Run CPU tests with thinc-apple-ops"
    condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
  - script: |
      python .github/validate_universe_json.py website/meta/universe.json
    displayName: 'Test website/meta/universe.json'
    condition: eq(variables['python_version'], '3.8')
--- a/.github/workflows/autoblack.yml
+++ b/.github/workflows/autoblack.yml
@ -1,45 +0,0 @@
 # GitHub Action that uses Black to reformat all Python code and submits a PR
 # in regular intervals. Inspired by: https://github.com/cclauss/autoblack
 name: autoblack
 on:
  workflow_dispatch:  # allow manual trigger
  schedule:
    - cron: '0 8 * * 5'  # every Friday at 8am UTC
 jobs:
  autoblack:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
        with:
            ref: ${{ github.head_ref }}
      - uses: actions/setup-python@v4
      - run: pip install black -c requirements.txt
      - name: Auto-format code if needed
        run: black spacy
      # We can't run black --check here because that returns a non-zero excit
      # code and makes GitHub think the action failed
      - name: Check for modified files
        id: git-check
        run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
      - name: Create Pull Request
        if: steps.git-check.outputs.modified == 'true'
        uses: peter-evans/create-pull-request@v4
        with:
            title: Auto-format code with black
            labels: meta
            commit-message: Auto-format code with black
            committer: GitHub <noreply@github.com>
            author: explosion-bot <explosion-bot@users.noreply.github.com>
            body: _This PR is auto-generated._
            branch: autoblack
            delete-branch: true
            draft: false
      - name: Check outputs
        if: steps.git-check.outputs.modified == 'true'
        run: |
          echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}"
          echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}"
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@ -8,6 +8,7 @@ on:
 jobs:
  explosion-bot:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - name: Dump GitHub context
--- a/.github/workflows/issue-manager.yml
+++ b/.github/workflows/issue-manager.yml
@ -13,6 +13,7 @@ on:
 jobs:
  issue-manager:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - uses: tiangolo/issue-manager@0.4.0
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@ -13,6 +13,7 @@ concurrency:
 jobs:
  action:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - uses: dessant/lock-threads@v4
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@ -7,6 +7,7 @@ on:
 jobs:
  build:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -0,0 +1,179 @@
 name: tests
 on:
  push:
    branches-ignore:
      - "spacy.io"
      - "nightly.spacy.io"
      - "v2.spacy.io"
    paths-ignore:
      - "*.md"
      - "*.mdx"
      - "website/**"
      - ".github/workflows/**"
  pull_request:
    types: [opened, synchronize, reopened, edited]
    paths-ignore:
      - "*.md"
      - "*.mdx"
      - "website/**"
 jobs:
  validate:
    name: Validate
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
        uses: actions/checkout@v3
      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
          architecture: x64
      - name: black
        run: |
          python -m pip install black -c requirements.txt
          python -m black spacy --check
      - name: isort
        run: |
          python -m pip install isort -c requirements.txt
          python -m isort spacy --check
      - name: flake8
        run: |
          python -m pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
      - name: cython-lint
        run: |
          python -m pip install cython-lint -c requirements.txt
          # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
          cython-lint spacy --ignore E501,W291,E266
  tests:
    name: Test
    needs: Validate
    strategy:
      fail-fast: true
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
        python_version: ["3.11"]
        include:
          - os: macos-latest
            python_version: "3.8"
          - os: ubuntu-20.04
            python_version: "3.9"
          - os: windows-latest
            python_version: "3.10"
    runs-on: ${{ matrix.os }}
    steps:
      - name: Check out repo
        uses: actions/checkout@v3
      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python_version }}
          architecture: x64
      - name: Install dependencies
        run: |
          python -m pip install -U build pip setuptools
          python -m pip install -U -r requirements.txt
      - name: Build sdist
        run: |
          python -m build --sdist
      - name: Run mypy
        run: |
          python -m mypy spacy
      - name: Delete source directory and .egg-info
        run: |
          rm -rf spacy *.egg-info
        shell: bash
      - name: Uninstall all packages
        run: |
          python -m pip freeze
          python -m pip freeze --exclude pywin32 > installed.txt
          python -m pip uninstall -y -r installed.txt
      - name: Install from sdist
        run: |
          SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
          SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
        shell: bash
      - name: Test import
        run: python -W error -c "import spacy"
      #      - name: "Test download CLI"
      #        run: |
      #          python -m spacy download ca_core_news_sm
      #          python -m spacy download ca_core_news_md
      #          python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
      #        if: matrix.python_version == '3.9'
      #
      #      - name: "Test download_url in info CLI"
      #        run: |
      #          python -W error -m spacy info ca_core_news_sm | grep -q download_url
      #        if: matrix.python_version == '3.9'
      #
      #      - name: "Test no warnings on load (#11713)"
      #        run: |
      #          python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
      #        if: matrix.python_version == '3.9'
      - name: "Test convert CLI"
        run: |
          python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
        if: matrix.python_version == '3.9'
      - name: "Test debug config CLI"
        run: |
          python -m spacy init config -p ner -l ca ner.cfg
          python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
        if: matrix.python_version == '3.9'
      - name: "Test debug data CLI"
        run: |
          # will have errors due to sparse data, check for summary in output
          python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
        if: matrix.python_version == '3.9'
      - name: "Test train CLI"
        run: |
          python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
        if: matrix.python_version == '3.9'
      #      - name: "Test assemble CLI"
      #        run: |
      #          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
      #          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
      #        if: matrix.python_version == '3.9'
      #
      #      - name: "Test assemble CLI vectors warning"
      #        run: |
      #          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
      #          python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
      #        if: matrix.python_version == '3.9'
      - name: "Install test requirements"
        run: |
          python -m pip install -U -r requirements.txt
      - name: "Run CPU tests"
        run: |
          python -m pytest --pyargs spacy -W error
        if: "!(startsWith(matrix.os, 'macos') && matrix.python_version == '3.11')"
      - name: "Run CPU tests with thinc-apple-ops"
        run: |
          python -m pip install 'spacy[apple]'
          python -m pytest --pyargs spacy
        if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@ -0,0 +1,33 @@
 name: universe validation
 on:
  push:
    branches-ignore:
      - "spacy.io"
      - "nightly.spacy.io"
      - "v2.spacy.io"
    paths:
      - "website/meta/universe.json"
  pull_request:
    types: [opened, synchronize, reopened, edited]
    paths:
      - "website/meta/universe.json"
 jobs:
  validate:
    name: Validate
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
        uses: actions/checkout@v3
      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
          architecture: x64
      - name: Validate website/meta/universe.json
        run: |
          python .github/validate_universe_json.py website/meta/universe.json
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 SHELL := /bin/bash
 ifndef SPACY_EXTRAS
-override SPACY_EXTRAS = spacy-lookups-data==1.0.2 jieba spacy-pkuseg==0.0.28 sudachipy sudachidict_core pymorphy2
+override SPACY_EXTRAS = spacy-lookups-data==1.0.3
 endif
 ifndef PYVER
--- a/README.md
+++ b/README.md
@ -16,6 +16,9 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
 model packaging, deployment and workflow management. spaCy is commercial
 open-source software, released under the [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).
 💥 **We'd love to hear more about your experience with spaCy!**
 [Fill out our survey here.](https://form.typeform.com/to/aMel9q9f)
 💫 **Version 3.5 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)
@ -32,19 +35,20 @@ open-source software, released under the [MIT license](https://github.com/explos
 ## 📖 Documentation
-| Documentation                                                                                                                                                                                                             |                                                                                                                                                                                                                                                                                                                              |
+| Documentation                 |                                                                        |
-| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| ----------------------------- | ---------------------------------------------------------------------- |
-| ⭐️ **[spaCy 101]**                                                                                                                                                                                                       | New to spaCy? Here's everything you need to know!                                                                                                                                                                                                                                                                            |
+| ⭐️ **[spaCy 101]**           | New to spaCy? Here's everything you need to know!                      |
-| 📚 **[Usage Guides]**                                                                                                                                                                                                     | How to use spaCy and its features.                                                                                                                                                                                                                                                                                           |
+| 📚 **[Usage Guides]**         | How to use spaCy and its features.                                     |
-| 🚀 **[New in v3.0]**                                                                                                                                                                                                      | New features, backwards incompatibilities and migration guide.                                                                                                                                                                                                                                                               |
+| 🚀 **[New in v3.0]**          | New features, backwards incompatibilities and migration guide.         |
-| 🪐 **[Project Templates]**                                                                                                                                                                                                | End-to-end workflows you can clone, modify and run.                                                                                                                                                                                                                                                                          |
+| 🪐 **[Project Templates]**    | End-to-end workflows you can clone, modify and run.                    |
-| 🎛 **[API Reference]**                                                                                                                                                                                                     | The detailed reference for spaCy's API.                                                                                                                                                                                                                                                                                      |
+| 🎛 **[API Reference]**         | The detailed reference for spaCy's API.                                |
-| 📦 **[Models]**                                                                                                                                                                                                           | Download trained pipelines for spaCy.                                                                                                                                                                                                                                                                                        |
+| 📦 **[Models]**               | Download trained pipelines for spaCy.                                  |
-| 🌌 **[Universe]**                                                                                                                                                                                                         | Plugins, extensions, demos and books from the spaCy ecosystem.                                                                                                                                                                                                                                                               |
+| 🌌 **[Universe]**             | Plugins, extensions, demos and books from the spaCy ecosystem.         |
-| 👩‍🏫 **[Online Course]**                                                                                                                                                                                                    | Learn spaCy in this free and interactive online course.                                                                                                                                                                                                                                                                      |
+| ⚙️ **[spaCy VS Code Extension]** | Additional tooling and features for working with spaCy's config files. |
-| 📺 **[Videos]**                                                                                                                                                                                                           | Our YouTube channel with video tutorials, talks and more.                                                                                                                                                                                                                                                                    |
+| 👩‍🏫 **[Online Course]** | Learn spaCy in this free and interactive online course. |
-| 🛠 **[Changelog]**                                                                                                                                                                                                         | Changes and version history.                                                                                                                                                                                                                                                                                                 |
+| 📺 **[Videos]** | Our YouTube channel with video tutorials, talks and more. |
-| 💝 **[Contribute]**                                                                                                                                                                                                       | How to contribute to the spaCy project and code base.                                                                                                                                                                                                                                                                        |
+| 🛠 **[Changelog]** | Changes and version history. |
 | 💝 **[Contribute]** | How to contribute to the spaCy project and code base. |
 | <a href="https://explosion.ai/spacy-tailored-pipelines"><img src="https://user-images.githubusercontent.com/13643239/152853098-1c761611-ccb0-4ec6-9066-b234552831fe.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Get a custom spaCy pipeline, tailor-made for your NLP problem by spaCy's core developers. Streamlined, production-ready, predictable and maintainable. Start by completing our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-pipelines)** |
 | <a href="https://explosion.ai/spacy-tailored-analysis"><img src="https://user-images.githubusercontent.com/1019791/206151300-b00cd189-e503-4797-aa1e-1bb6344062c5.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Bespoke advice for problem solving, strategy and analysis for applied NLP projects. Services include data strategy, code reviews, pipeline design and annotation coaching. Curious? Fill in our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-analysis)** |
@ -54,13 +58,13 @@ open-source software, released under the [MIT license](https://github.com/explos
 [api reference]: https://spacy.io/api/
 [models]: https://spacy.io/models
 [universe]: https://spacy.io/universe
 [spaCy VS Code Extension]: https://github.com/explosion/spacy-vscode
 [videos]: https://www.youtube.com/c/ExplosionAI
 [online course]: https://course.spacy.io
 [project templates]: https://github.com/explosion/projects
 [changelog]: https://spacy.io/usage#changelog
 [contribute]: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
 ## 💬 Where to ask questions
 The spaCy project is maintained by the [spaCy team](https://explosion.ai/about).
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@ -1,99 +0,0 @@
 trigger:
  batch: true
  branches:
    include:
      - "*"
    exclude:
      - "spacy.io"
      - "nightly.spacy.io"
      - "v2.spacy.io"
  paths:
    exclude:
      - "website/*"
      - "*.md"
      - "*.mdx"
      - ".github/workflows/*"
 pr:
  paths:
    exclude:
      - "*.md"
      - "*.mdx"
      - "website/docs/*"
      - "website/src/*"
      - "website/meta/*.tsx"
      - "website/meta/*.mjs"
      - "website/meta/languages.json"
      - "website/meta/site.json"
      - "website/meta/sidebars.json"
      - "website/meta/type-annotations.json"
      - "website/pages/*"
      - ".github/workflows/*"
 jobs:
  # Check formatting and linting. Perform basic checks for most important errors
  # (syntax etc.) Uses the config defined in setup.cfg and overwrites the
  # selected codes.
  - job: "Validate"
    pool:
      vmImage: "ubuntu-latest"
    steps:
      - task: UsePythonVersion@0
        inputs:
          versionSpec: "3.8"
      - script: |
          pip install black -c requirements.txt
          python -m black spacy --check
        displayName: "black"
      - script: |
          pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
        displayName: "flake8"
  - job: "Test"
    dependsOn: "Validate"
    strategy:
      matrix:
        # We're only running one platform per Python version to speed up builds
        #        Python38Linux:
        #          imageName: "ubuntu-latest"
        #          python.version: "3.8"
        #        Python38Windows:
        #          imageName: "windows-latest"
        #          python.version: "3.8"
        Python38Mac:
          imageName: "macos-latest"
          python.version: "3.8"
        Python39Linux:
          imageName: "ubuntu-latest"
          python.version: "3.9"
        #        Python39Windows:
        #          imageName: "windows-latest"
        #          python.version: "3.9"
        #        Python39Mac:
        #          imageName: "macos-latest"
        #          python.version: "3.9"
        #        Python310Linux:
        #          imageName: "ubuntu-latest"
        #          python.version: "3.10"
        Python310Windows:
          imageName: "windows-latest"
          python.version: "3.10"
        #        Python310Mac:
        #          imageName: "macos-latest"
        #          python.version: "3.10"
        Python311Linux:
          imageName: 'ubuntu-latest'
          python.version: '3.11'
        Python311Windows:
          imageName: 'windows-latest'
          python.version: '3.11'
        Python311Mac:
          imageName: 'macos-latest'
          python.version: '3.11'
      maxParallel: 4
    pool:
      vmImage: $(imageName)
    steps:
      - template: .github/azure-steps.yml
        parameters:
          python_version: '$(python.version)'
--- a/build-constraints.txt
+++ b/build-constraints.txt
@ -1,6 +1,4 @@
 # build version constraints for use with wheelwright + multibuild
 numpy==1.15.0; python_version<='3.7' and platform_machine!='aarch64'
 numpy==1.19.2; python_version<='3.7' and platform_machine=='aarch64'
 numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
 numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
 numpy==1.19.3; python_version=='3.9'
--- a/pyproject.toml
+++ b/pyproject.toml
@ -9,3 +9,6 @@ requires = [
    "numpy>=1.15.0",
 ]
 build-backend = "setuptools.build_meta"
 [tool.isort]
 profile = "black"
--- a/requirements.txt
+++ b/requirements.txt
@ -9,7 +9,7 @@ murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer>=0.3.0,<0.8.0
+typer>=0.3.0,<0.10.0
 pathy>=0.10.0
 smart-open>=5.2.1,<7.0.0
 # Third party dependencies
@ -30,10 +30,11 @@ pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.8.0,<6.0.0
 hypothesis>=3.27.0,<7.0.0
-mypy>=0.990,<1.1.0; platform_machine != "aarch64" and python_version >= "3.7"
+mypy>=0.990,<1.1.0; platform_machine != "aarch64"
 types-dataclasses>=0.1.3; python_version < "3.7"
 types-mock>=0.1.1
 types-setuptools>=57.0.0
 types-requests
 types-setuptools>=57.0.0
 black==22.3.0
 cython-lint>=0.15.0; python_version >= "3.7"
 isort>=5.0,<6.0
--- a/setup.cfg
+++ b/setup.cfg
@ -30,6 +30,14 @@ project_urls =
 zip_safe = false
 include_package_data = true
 python_requires = >=3.8
 setup_requires =
    cython>=0.25,<3.0
    numpy>=1.15.0
    # We also need our Cython packages here to compile against
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
    murmurhash>=0.28.0,<1.1.0
    thinc>=9.0.0.dev2,<9.1.0
 install_requires =
    # Our libraries
    spacy-legacy>=4.0.0.dev0,<4.1.0
@ -42,7 +50,7 @@ install_requires =
    srsly>=2.4.3,<3.0.0
    catalogue>=2.0.6,<2.1.0
    # Third-party dependencies
-    typer>=0.3.0,<0.8.0
+    typer>=0.3.0,<0.10.0
    pathy>=0.10.0
    smart-open>=5.2.1,<7.0.0
    tqdm>=4.38.0,<5.0.0
@ -67,41 +75,41 @@ transformers =
 ray =
    spacy_ray>=0.1.0,<1.0.0
 cuda =
-    cupy>=5.0.0b4,<12.0.0
+    cupy>=5.0.0b4,<13.0.0
 cuda80 =
-    cupy-cuda80>=5.0.0b4,<12.0.0
+    cupy-cuda80>=5.0.0b4,<13.0.0
 cuda90 =
-    cupy-cuda90>=5.0.0b4,<12.0.0
+    cupy-cuda90>=5.0.0b4,<13.0.0
 cuda91 =
-    cupy-cuda91>=5.0.0b4,<12.0.0
+    cupy-cuda91>=5.0.0b4,<13.0.0
 cuda92 =
-    cupy-cuda92>=5.0.0b4,<12.0.0
+    cupy-cuda92>=5.0.0b4,<13.0.0
 cuda100 =
-    cupy-cuda100>=5.0.0b4,<12.0.0
+    cupy-cuda100>=5.0.0b4,<13.0.0
 cuda101 =
-    cupy-cuda101>=5.0.0b4,<12.0.0
+    cupy-cuda101>=5.0.0b4,<13.0.0
 cuda102 =
-    cupy-cuda102>=5.0.0b4,<12.0.0
+    cupy-cuda102>=5.0.0b4,<13.0.0
 cuda110 =
-    cupy-cuda110>=5.0.0b4,<12.0.0
+    cupy-cuda110>=5.0.0b4,<13.0.0
 cuda111 =
-    cupy-cuda111>=5.0.0b4,<12.0.0
+    cupy-cuda111>=5.0.0b4,<13.0.0
 cuda112 =
-    cupy-cuda112>=5.0.0b4,<12.0.0
+    cupy-cuda112>=5.0.0b4,<13.0.0
 cuda113 =
-    cupy-cuda113>=5.0.0b4,<12.0.0
+    cupy-cuda113>=5.0.0b4,<13.0.0
 cuda114 =
-    cupy-cuda114>=5.0.0b4,<12.0.0
+    cupy-cuda114>=5.0.0b4,<13.0.0
 cuda115 =
-    cupy-cuda115>=5.0.0b4,<12.0.0
+    cupy-cuda115>=5.0.0b4,<13.0.0
 cuda116 =
-    cupy-cuda116>=5.0.0b4,<12.0.0
+    cupy-cuda116>=5.0.0b4,<13.0.0
 cuda117 =
-    cupy-cuda117>=5.0.0b4,<12.0.0
+    cupy-cuda117>=5.0.0b4,<13.0.0
 cuda11x =
-    cupy-cuda11x>=11.0.0,<12.0.0
+    cupy-cuda11x>=11.0.0,<13.0.0
 cuda-autodetect =
-    cupy-wheel>=11.0.0,<12.0.0
+    cupy-wheel>=11.0.0,<13.0.0
 apple =
    thinc-apple-ops>=0.1.0.dev0,<1.0.0
 # Language tokenizers with external dependencies
--- a/spacy/init.py
+++ b/spacy/init.py
@ -1,6 +1,6 @@
 from typing import Union, Iterable, Dict, Any
 from pathlib import Path
 import sys
 from pathlib import Path
 from typing import Any, Dict, Iterable, Union
 # set library-specific custom warning handling before doing anything else
 from .errors import setup_default_warnings
@ -8,20 +8,17 @@ from .errors import setup_default_warnings
 setup_default_warnings()  # noqa: E402
 # These are imported as part of the API
-from thinc.api import prefer_gpu, require_gpu, require_cpu  # noqa: F401
+from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401
 from thinc.api import Config
 from . import pipeline  # noqa: F401
 from .cli.info import info  # noqa: F401
 from .glossary import explain  # noqa: F401
 from .about import __version__  # noqa: F401
 from .util import registry, logger  # noqa: F401
 from .errors import Errors
 from .language import Language
 from .vocab import Vocab
 from . import util
-
+from .about import __version__  # noqa: F401
 from .cli.info import info  # noqa: F401
 from .errors import Errors
 from .glossary import explain  # noqa: F401
 from .language import Language
 from .util import logger, registry  # noqa: F401
 from .vocab import Vocab
 if sys.maxunicode == 65535:
    raise SystemError(Errors.E130)
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,6 +1,6 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "4.0.0.dev0"
+__version__ = "4.0.0.dev1"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 __projects__ = "https://github.com/explosion/projects"
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@ -1,5 +1,6 @@
 from . cimport symbols
 cdef enum attr_id_t:
    NULL_ATTR = 0
    IS_ALPHA = symbols.IS_ALPHA
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -1,35 +1,35 @@
 from wasabi import msg
 from ._util import app, setup_cli  # noqa: F401
 from .apply import apply  # noqa: F401
 from .assemble import assemble_cli  # noqa: F401
 # These are the actual functions, NOT the wrapped CLI commands. The CLI commands
 # are registered automatically and won't have to be imported here.
 from .benchmark_speed import benchmark_speed_cli  # noqa: F401
 from .download import download  # noqa: F401
 from .info import info  # noqa: F401
 from .package import package  # noqa: F401
 from .profile import profile  # noqa: F401
 from .train import train_cli  # noqa: F401
 from .assemble import assemble_cli  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .debug_data import debug_data  # noqa: F401
 from .debug_config import debug_config  # noqa: F401
 from .debug_model import debug_model  # noqa: F401
 from .debug_diff import debug_diff  # noqa: F401
 from .evaluate import evaluate  # noqa: F401
 from .apply import apply  # noqa: F401
 from .convert import convert  # noqa: F401
-from .init_pipeline import init_pipeline_cli  # noqa: F401
+from .debug_config import debug_config  # noqa: F401
-from .init_config import init_config, fill_config  # noqa: F401
+from .debug_data import debug_data  # noqa: F401
-from .validate import validate  # noqa: F401
+from .debug_diff import debug_diff  # noqa: F401
-from .project.clone import project_clone  # noqa: F401
+from .debug_model import debug_model  # noqa: F401
-from .project.assets import project_assets  # noqa: F401
+from .download import download  # noqa: F401
-from .project.run import project_run  # noqa: F401
+from .evaluate import evaluate  # noqa: F401
 from .project.dvc import project_update_dvc  # noqa: F401
 from .project.push import project_push  # noqa: F401
 from .project.pull import project_pull  # noqa: F401
 from .project.document import project_document  # noqa: F401
 from .find_threshold import find_threshold  # noqa: F401
 from .info import info  # noqa: F401
 from .init_config import fill_config, init_config  # noqa: F401
 from .init_pipeline import init_pipeline_cli  # noqa: F401
 from .package import package  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .profile import profile  # noqa: F401
 from .project.assets import project_assets  # noqa: F401
 from .project.clone import project_clone  # noqa: F401
 from .project.document import project_document  # noqa: F401
 from .project.dvc import project_update_dvc  # noqa: F401
 from .project.pull import project_pull  # noqa: F401
 from .project.push import project_push  # noqa: F401
 from .project.run import project_run  # noqa: F401
 from .train import train_cli  # noqa: F401
 from .validate import validate  # noqa: F401
@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -1,26 +1,45 @@
 from typing import Dict, Any, Union, List, Optional, Tuple, Iterable, Literal
 from typing import TYPE_CHECKING, overload
 import sys
 import shutil
 from pathlib import Path
 from wasabi import msg, Printer
 import srsly
 import hashlib
 import os
 import shutil
 import sys
 from configparser import InterpolationError
 from contextlib import contextmanager
 from pathlib import Path
 from typing import (
    TYPE_CHECKING,
    Any,
    Dict,
    Iterable,
    List,
    Literal,
    Optional,
    Tuple,
    Union,
    overload,
 )
 import srsly
 import typer
 from click import NoSuchOption
 from click.parser import split_arg_string
 from typer.main import get_command
 from contextlib import contextmanager
 from thinc.api import Config, ConfigValidationError, require_gpu
 from thinc.util import gpu_is_available
-from configparser import InterpolationError
+from typer.main import get_command
-import os
+from wasabi import Printer, msg
 from ..schemas import ProjectConfigSchema, validate
 from ..util import import_file, run_command, make_tempdir, registry, logger
 from ..util import is_compatible_version, SimpleFrozenDict, ENV_VARS
 from ..errors import RENAMED_LANGUAGE_CODES
 from .. import about
 from ..errors import RENAMED_LANGUAGE_CODES
 from ..schemas import ProjectConfigSchema, validate
 from ..util import (
    ENV_VARS,
    SimpleFrozenDict,
    import_file,
    is_compatible_version,
    logger,
    make_tempdir,
    registry,
    run_command,
 )
 if TYPE_CHECKING:
    from pathy import FluidPath  # noqa: F401
--- a/spacy/cli/apply.py
+++ b/spacy/cli/apply.py
@ -1,18 +1,15 @@
 import tqdm
 import srsly
 from itertools import chain
 from pathlib import Path
-from typing import Optional, List, Iterable, cast, Union
+from typing import Iterable, List, Optional, Union, cast
 import srsly
 import tqdm
 from wasabi import msg
 from ._util import app, Arg, Opt, setup_gpu, import_code, walk_directory
 from ..tokens import Doc, DocBin
 from ..vocab import Vocab
 from ..util import ensure_path, load_model
-
+from ..vocab import Vocab
 from ._util import Arg, Opt, app, import_code, setup_gpu, walk_directory
 path_help = """Location of the documents to predict on.
 Can be a single file in .spacy format or a .jsonl file.
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -1,13 +1,20 @@
 from typing import Optional
 from pathlib import Path
 from wasabi import msg
 import typer
 import logging
 from pathlib import Path
 from typing import Optional
 import typer
 from wasabi import msg
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code
 from .. import util
 from ..util import get_sourced_components, load_model_from_config
 from ._util import (
    Arg,
    Opt,
    app,
    import_code,
    parse_config_overrides,
    show_validation_error,
 )
@app.command(
--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@ -1,11 +1,12 @@
 from typing import Iterable, List, Optional
 import random
 from itertools import islice
 import numpy
 from pathlib import Path
 import time
-from tqdm import tqdm
+from itertools import islice
 from pathlib import Path
 from typing import Iterable, List, Optional
 import numpy
 import typer
 from tqdm import tqdm
 from wasabi import msg
 from .. import util
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -1,18 +1,22 @@
-from typing import Callable, Iterable, Mapping, Optional, Any, Union
+import itertools
 from enum import Enum
 from pathlib import Path
 from wasabi import Printer
 import srsly
 import re
 import sys
-import itertools
+from enum import Enum
 from pathlib import Path
 from typing import Any, Callable, Iterable, Mapping, Optional, Union
 import srsly
 from wasabi import Printer
 from ._util import app, Arg, Opt, _handle_renamed_language_codes, walk_directory
 from ..training import docs_to_json
 from ..tokens import Doc, DocBin
-from ..training.converters import iob_to_docs, conll_ner_to_docs, json_to_docs
+from ..training import docs_to_json
-from ..training.converters import conllu_to_docs
+from ..training.converters import (
-
+    conll_ner_to_docs,
    conllu_to_docs,
    iob_to_docs,
    json_to_docs,
 )
 from ._util import Arg, Opt, _handle_renamed_language_codes, app, walk_directory
 # Converters are matched by file extension except for ner/iob, which are
 # matched by file extension and content. To add a converter, add a new
--- a/spacy/cli/debug_config.py
+++ b/spacy/cli/debug_config.py
@ -1,15 +1,22 @@
 from typing import Optional, Dict, Any, Union, List
 from pathlib import Path
-from wasabi import msg, table
+from typing import Any, Dict, List, Optional, Union
 import typer
 from thinc.api import Config
 from thinc.config import VARIABLE_RE
-import typer
+from wasabi import msg, table
-from ._util import Arg, Opt, show_validation_error, parse_config_overrides
+from .. import util
 from ._util import import_code, debug_cli
 from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
 from ..util import registry
-from .. import util
+from ._util import (
    Arg,
    Opt,
    debug_cli,
    import_code,
    parse_config_overrides,
    show_validation_error,
 )
@debug_cli.command(
--- a/spacy/cli/debug_data.py
+++ b/spacy/cli/debug_data.py
@ -1,29 +1,49 @@
 from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple, Union
 from typing import Literal, cast, overload
 from pathlib import Path
 from collections import Counter
 import sys
 import srsly
 from wasabi import Printer, MESSAGES, msg
 import typer
 import math
 import sys
 from collections import Counter
 from pathlib import Path
 from typing import (
    Any,
    Dict,
    Iterable,
    List,
    Literal,
    Optional,
    Sequence,
    Set,
    Tuple,
    Union,
    cast,
    overload,
 )
-from ._util import app, Arg, Opt, show_validation_error, parse_config_overrides
+import numpy
-from ._util import import_code, debug_cli, _format_number
+import srsly
-from ..training import Example, remove_bilu_prefix
+import typer
-from ..training.initialize import get_sourced_components
+from wasabi import MESSAGES, Printer, msg
-from ..schemas import ConfigSchemaTraining
+
-from ..pipeline import TrainablePipe
+from .. import util
 from ..language import Language
 from ..morphology import Morphology
 from ..pipeline import Morphologizer, SpanCategorizer, TrainablePipe
 from ..pipeline._edit_tree_internals.edit_trees import EditTrees
 from ..pipeline._parser_internals import nonproj
 from ..pipeline._parser_internals.nonproj import DELIMITER
-from ..pipeline import Morphologizer, SpanCategorizer
+from ..schemas import ConfigSchemaTraining
-from ..pipeline._edit_tree_internals.edit_trees import EditTrees
+from ..training import Example, remove_bilu_prefix
-from ..morphology import Morphology
+from ..training.initialize import get_sourced_components
 from ..language import Language
 from ..util import registry, resolve_dot_names
 from ..vectors import Mode as VectorsMode
-from .. import util
+from ._util import (
-
+    Arg,
    Opt,
    _format_number,
    app,
    debug_cli,
    import_code,
    parse_config_overrides,
    show_validation_error,
 )
 # Minimum number of expected occurrences of NER label in data to train new label
 NEW_LABEL_THRESHOLD = 50
@ -210,7 +230,7 @@ def debug_data(
    else:
        msg.info("No word vectors present in the package")
-    if "spancat" in factory_names:
+    if "spancat" in factory_names or "spancat_singlelabel" in factory_names:
        model_labels_spancat = _get_labels_from_spancat(nlp)
        has_low_data_warning = False
        has_no_neg_warning = False
@ -335,7 +355,7 @@ def debug_data(
                show=verbose,
            )
        else:
-            msg.good("Examples without ocurrences available for all labels")
+            msg.good("Examples without occurrences available for all labels")
    if "ner" in factory_names:
        # Get all unique NER labels present in the data
@ -520,9 +540,13 @@ def debug_data(
    if "tagger" in factory_names:
        msg.divider("Part-of-speech Tagging")
-        label_list = [label for label in gold_train_data["tags"]]
+        label_list, counts = zip(*gold_train_data["tags"].items())
        model_labels = _get_labels_from_model(nlp, "tagger")
        msg.info(f"{len(label_list)} label(s) in train data")
        p = numpy.array(counts)
        p = p / p.sum()
        norm_entropy = (-p * numpy.log2(p)).sum() / numpy.log2(len(label_list))
        msg.info(f"{norm_entropy} is the normalised label entropy")
        model_labels = _get_labels_from_model(nlp, "tagger")
        labels = set(label_list)
        missing_labels = model_labels - labels
        if missing_labels:
@ -824,7 +848,7 @@ def _compile_gold(
                    data["boundary_cross_ents"] += 1
                elif label == "-":
                    data["ner"]["-"] += 1
-        if "spancat" in factory_names:
+        if "spancat" in factory_names or "spancat_singlelabel" in factory_names:
            for spans_key in list(eg.reference.spans.keys()):
                # Obtain the span frequency
                if spans_key not in data["spancat"]:
@ -1022,7 +1046,7 @@ def _get_labels_from_spancat(nlp: Language) -> Dict[str, Set[str]]:
    pipe_names = [
        pipe_name
        for pipe_name in nlp.pipe_names
-        if nlp.get_pipe_meta(pipe_name).factory == "spancat"
+        if nlp.get_pipe_meta(pipe_name).factory in ("spancat", "spancat_singlelabel")
    ]
    labels: Dict[str, Set[str]] = {}
    for pipe_name in pipe_names:
--- a/spacy/cli/debug_diff.py
+++ b/spacy/cli/debug_diff.py
@ -1,13 +1,13 @@
 from pathlib import Path
 from typing import Optional
 import typer
 from wasabi import Printer, diff_strings, MarkdownRenderer
 from pathlib import Path
 from thinc.api import Config
 from wasabi import MarkdownRenderer, Printer, diff_strings
 from ._util import debug_cli, Arg, Opt, show_validation_error, parse_config_overrides
 from ..util import load_config
-from .init_config import init_config, Optimizations
+from ._util import Arg, Opt, debug_cli, parse_config_overrides, show_validation_error
 from .init_config import Optimizations, init_config
@debug_cli.command(
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -1,19 +1,32 @@
 from typing import Dict, Any, Optional
 from pathlib import Path
 import itertools
 from pathlib import Path
 from typing import Any, Dict, Optional
 import typer
 from thinc.api import (
    Model,
    data_validation,
    fix_random_seed,
    set_dropout_rate,
    set_gpu_allocator,
 )
 from wasabi import msg
 from spacy.training import Example
 from spacy.util import resolve_dot_names
 from wasabi import msg
 from thinc.api import fix_random_seed, set_dropout_rate
 from thinc.api import Model, data_validation, set_gpu_allocator
 import typer
-from ._util import Arg, Opt, debug_cli, show_validation_error
+from .. import util
 from ._util import parse_config_overrides, string_to_list, setup_gpu
 from ..schemas import ConfigSchemaTraining
 from ..util import registry
-from .. import util
+from ._util import (
    Arg,
    Opt,
    debug_cli,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
    string_to_list,
 )
@debug_cli.command(
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -1,14 +1,20 @@
 from typing import Optional, Sequence
 import requests
 import sys
-from wasabi import msg
+from typing import Optional, Sequence
-import typer
+
 import requests
 import typer
 from wasabi import msg
 from ._util import app, Arg, Opt, WHEEL_SUFFIX, SDIST_SUFFIX
 from .. import about
-from ..util import is_package, get_minor_version, run_command
+from ..util import (
-from ..util import is_prerelease_version, get_installed_models
+    get_installed_models,
-from ..util import get_package_version
+    get_minor_version,
    get_package_version,
    is_package,
    is_prerelease_version,
    run_command,
 )
 from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app
@app.command(
@ -83,11 +89,8 @@ def download(
 def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
    dl_tpl = "{m}-{v}/{m}-{v}{s}"
    egg_tpl = "#egg={m}=={v}"
    suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
    filename = dl_tpl.format(m=model_name, v=version, s=suffix)
    if sdist:
        filename += egg_tpl.format(m=model_name, v=version)
    return filename
--- a/spacy/cli/evaluate.py
+++ b/spacy/cli/evaluate.py
@ -1,16 +1,16 @@
 from typing import Optional, List, Dict, Any, Union
 from wasabi import Printer
 from pathlib import Path
 import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 import srsly
 from thinc.api import fix_random_seed
 from wasabi import Printer
-from ..training import Corpus
+from .. import displacy, util
 from ..tokens import Doc
 from ._util import app, Arg, Opt, setup_gpu, import_code, benchmark_cli
 from ..scorer import Scorer
-from .. import util
+from ..tokens import Doc
-from .. import displacy
+from ..training import Corpus
 from ._util import Arg, Opt, app, benchmark_cli, import_code, setup_gpu
@benchmark_cli.command(
@ -27,6 +27,7 @@ def evaluate_cli(
    gold_preproc: bool = Opt(False, "--gold-preproc", "-G", help="Use gold preprocessing"),
    displacy_path: Optional[Path] = Opt(None, "--displacy-path", "-dp", help="Directory to output rendered parses as HTML", exists=True, file_okay=False),
    displacy_limit: int = Opt(25, "--displacy-limit", "-dl", help="Limit of parses to render as HTML"),
    per_component: bool = Opt(False, "--per-component", "-P", help="Return scores per component, only applicable when an output JSON file is specified."),
    # fmt: on
 ):
    """
@ -50,6 +51,7 @@ def evaluate_cli(
        gold_preproc=gold_preproc,
        displacy_path=displacy_path,
        displacy_limit=displacy_limit,
        per_component=per_component,
        silent=False,
    )
@ -64,6 +66,7 @@ def evaluate(
    displacy_limit: int = 25,
    silent: bool = True,
    spans_key: str = "sc",
    per_component: bool = False,
 ) -> Dict[str, Any]:
    msg = Printer(no_print=silent, pretty=not silent)
    fix_random_seed()
@ -78,50 +81,61 @@ def evaluate(
    corpus = Corpus(data_path, gold_preproc=gold_preproc)
    nlp = util.load_model(model)
    dev_dataset = list(corpus(nlp))
-    scores = nlp.evaluate(dev_dataset)
+    scores = nlp.evaluate(dev_dataset, per_component=per_component)
-    metrics = {
+    if per_component:
-        "TOK": "token_acc",
+        data = scores
-        "TAG": "tag_acc",
+        if output is None:
-        "POS": "pos_acc",
+            msg.warn(
-        "MORPH": "morph_acc",
+                "The per-component option is enabled but there is no output JSON file provided to save the scores to."
-        "LEMMA": "lemma_acc",
+            )
-        "UAS": "dep_uas",
+        else:
-        "LAS": "dep_las",
+            msg.info("Per-component scores will be saved to output JSON file.")
-        "NER P": "ents_p",
+    else:
-        "NER R": "ents_r",
+        metrics = {
-        "NER F": "ents_f",
+            "TOK": "token_acc",
-        "TEXTCAT": "cats_score",
+            "TAG": "tag_acc",
-        "SENT P": "sents_p",
+            "POS": "pos_acc",
-        "SENT R": "sents_r",
+            "MORPH": "morph_acc",
-        "SENT F": "sents_f",
+            "LEMMA": "lemma_acc",
-        "SPAN P": f"spans_{spans_key}_p",
+            "UAS": "dep_uas",
-        "SPAN R": f"spans_{spans_key}_r",
+            "LAS": "dep_las",
-        "SPAN F": f"spans_{spans_key}_f",
+            "NER P": "ents_p",
-        "SPEED": "speed",
+            "NER R": "ents_r",
-    }
+            "NER F": "ents_f",
-    results = {}
+            "TEXTCAT": "cats_score",
-    data = {}
+            "SENT P": "sents_p",
-    for metric, key in metrics.items():
+            "SENT R": "sents_r",
-        if key in scores:
+            "SENT F": "sents_f",
-            if key == "cats_score":
+            "SPAN P": f"spans_{spans_key}_p",
-                metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
+            "SPAN R": f"spans_{spans_key}_r",
-            if isinstance(scores[key], (int, float)):
+            "SPAN F": f"spans_{spans_key}_f",
-                if key == "speed":
+            "SPEED": "speed",
-                    results[metric] = f"{scores[key]:.0f}"
+        }
        results = {}
        data = {}
        for metric, key in metrics.items():
            if key in scores:
                if key == "cats_score":
                    metric = metric + " (" + scores.get("cats_score_desc", "unk") + ")"
                if isinstance(scores[key], (int, float)):
                    if key == "speed":
                        results[metric] = f"{scores[key]:.0f}"
                    else:
                        results[metric] = f"{scores[key]*100:.2f}"
                else:
-                    results[metric] = f"{scores[key]*100:.2f}"
+                    results[metric] = "-"
-            else:
+                data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
                results[metric] = "-"
            data[re.sub(r"[\s/]", "_", key.lower())] = scores[key]
-    msg.table(results, title="Results")
+        msg.table(results, title="Results")
-    data = handle_scores_per_type(scores, data, spans_key=spans_key, silent=silent)
+        data = handle_scores_per_type(scores, data, spans_key=spans_key, silent=silent)
    if displacy_path:
        factory_names = [nlp.get_pipe_meta(pipe).factory for pipe in nlp.pipe_names]
        docs = list(nlp.pipe(ex.reference.text for ex in dev_dataset[:displacy_limit]))
        render_deps = "parser" in factory_names
        render_ents = "ner" in factory_names
        render_spans = "spancat" in factory_names
        render_parses(
            docs,
            displacy_path,
@ -129,6 +143,7 @@ def evaluate(
            limit=displacy_limit,
            deps=render_deps,
            ents=render_ents,
            spans=render_spans,
        )
        msg.good(f"Generated {displacy_limit} parses as HTML", displacy_path)
@ -182,6 +197,7 @@ def render_parses(
    limit: int = 250,
    deps: bool = True,
    ents: bool = True,
    spans: bool = True,
 ):
    docs[0].user_data["title"] = model_name
    if ents:
@ -195,6 +211,11 @@ def render_parses(
        with (output_path / "parses.html").open("w", encoding="utf8") as file_:
            file_.write(html)
    if spans:
        html = displacy.render(docs[:limit], style="span", page=True)
        with (output_path / "spans.html").open("w", encoding="utf8") as file_:
            file_.write(html)
 def print_prf_per_type(
    msg: Printer, scores: Dict[str, Dict[str, float]], name: str, type: str
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -1,17 +1,17 @@
 import functools
 import logging
 import operator
 from pathlib import Path
-import logging
+from typing import Any, Dict, List, Optional, Tuple
 from typing import Optional, Tuple, Any, Dict, List
 import numpy
 import wasabi.tables
 from ..pipeline import TextCategorizer, MultiLabel_TextCategorizer
 from ..errors import Errors
 from ..training import Corpus
 from ._util import app, Arg, Opt, import_code, setup_gpu
 from .. import util
 from ..errors import Errors
 from ..pipeline import MultiLabel_TextCategorizer, TextCategorizer
 from ..training import Corpus
 from ._util import Arg, Opt, app, import_code, setup_gpu
 _DEFAULTS = {
    "n_trials": 11,
@ -35,7 +35,7 @@ def find_threshold_cli(
    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
    use_gpu: int = Opt(_DEFAULTS["use_gpu"], "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    gold_preproc: bool = Opt(_DEFAULTS["gold_preproc"], "--gold-preproc", "-G", help="Use gold preprocessing"),
-    verbose: bool = Opt(False, "--silent", "-V", "-VV", help="Display more information for debugging purposes"),
+    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    # fmt: on
 ):
    """
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -1,15 +1,15 @@
 from typing import Optional, Dict, Any, Union, List
 import platform
 import json
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer
 import srsly
 import importlib.metadata
 import json
 import platform
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
-from ._util import app, Arg, Opt, string_to_list
+import srsly
-from .download import get_model_filename, get_latest_version
+from wasabi import MarkdownRenderer, Printer
-from .. import util
+
-from .. import about
+from .. import about, util
 from ._util import Arg, Opt, app, string_to_list
 from .download import get_latest_version, get_model_filename
@app.command("info")
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@ -1,19 +1,27 @@
-from typing import Optional, List, Tuple
+import re
 from enum import Enum
 from pathlib import Path
-from wasabi import Printer, diff_strings
+from typing import List, Optional, Tuple
-from thinc.api import Config
+
 import srsly
 import re
 from jinja2 import Template
 from thinc.api import Config
 from wasabi import Printer, diff_strings
 from .. import util
 from ..language import DEFAULT_CONFIG_DISTILL_PATH, DEFAULT_CONFIG_PRETRAIN_PATH
 from ..schemas import RecommendationSchema
 from ..util import SimpleFrozenList
-from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
+from ._util import (
-from ._util import string_to_list, import_code, _handle_renamed_language_codes
+    COMMAND,
-
+    Arg,
    Opt,
    _handle_renamed_language_codes,
    import_code,
    init_cli,
    show_validation_error,
    string_to_list,
 )
 ROOT = Path(__file__).parent / "templates"
 TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@ -1,15 +1,24 @@
 from typing import Optional
 import logging
 from pathlib import Path
-from wasabi import msg
+from typing import Optional
-import typer
+
 import srsly
 import typer
 from wasabi import msg
 from .. import util
 from ..training.initialize import init_nlp, convert_vectors
 from ..language import Language
-from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
+from ..training.initialize import convert_vectors, init_nlp
-from ._util import import_code, setup_gpu, _handle_renamed_language_codes
+from ._util import (
    Arg,
    Opt,
    _handle_renamed_language_codes,
    import_code,
    init_cli,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
 )
@init_cli.command("vectors")
@ -23,6 +32,7 @@ def init_vectors_cli(
    mode: str = Opt("default", "--mode", "-m", help="Vectors mode: default or floret"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
    attr: str = Opt("ORTH", "--attr", "-a", help="Optional token attribute to use for vectors, e.g. LOWER or NORM"),
    # fmt: on
 ):
    """Convert word vectors for use with spaCy. Will export an nlp object that
@ -44,6 +54,7 @@ def init_vectors_cli(
        truncate=truncate,
        prune=prune,
        mode=mode,
        attr=attr,
    )
    msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
    nlp.to_disk(output_dir)
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -1,18 +1,18 @@
 from typing import Optional, Union, Any, Dict, List, Tuple, cast
 import shutil
 from pathlib import Path
 from wasabi import Printer, MarkdownRenderer, get_raw_input
 from thinc.api import Config
 from collections import defaultdict
 from catalogue import RegistryError
 import srsly
 import sys
 import re
 import shutil
 import sys
 from collections import defaultdict
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
-from ._util import app, Arg, Opt, string_to_list, WHEEL_SUFFIX, SDIST_SUFFIX
+import srsly
-from ..schemas import validate, ModelMetaSchema
+from catalogue import RegistryError
-from .. import util
+from thinc.api import Config
-from .. import about
+from wasabi import MarkdownRenderer, Printer, get_raw_input
 from .. import about, util
 from ..schemas import ModelMetaSchema, validate
 from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app, string_to_list
@app.command("package")
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@ -1,13 +1,21 @@
 from typing import Optional
 from pathlib import Path
 from wasabi import msg
 import typer
 import re
 from pathlib import Path
 from typing import Optional
 import typer
 from wasabi import msg
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu
 from ..training.pretrain import pretrain
 from ..util import load_config
 from ._util import (
    Arg,
    Opt,
    app,
    import_code,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
 )
@app.command(
@ -23,6 +31,7 @@ def pretrain_cli(
    resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
    epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
    # fmt: on
 ):
    """
@ -74,6 +83,7 @@ def pretrain_cli(
        epoch_resume=epoch_resume,
        use_gpu=use_gpu,
        silent=False,
        skip_last=skip_last,
    )
    msg.good("Successfully finished pretrain")
--- a/spacy/cli/profile.py
+++ b/spacy/cli/profile.py
@ -1,17 +1,18 @@
 from typing import Optional, Sequence, Union, Iterator
 import tqdm
 from pathlib import Path
 import srsly
 import cProfile
 import itertools
 import pstats
 import sys
-import itertools
+from pathlib import Path
-from wasabi import msg, Printer
+from typing import Iterator, Optional, Sequence, Union
-import typer
+
 import srsly
 import tqdm
 import typer
 from wasabi import Printer, msg
 from ._util import app, debug_cli, Arg, Opt, NAME
 from ..language import Language
 from ..util import load_model
 from ._util import NAME, Arg, Opt, app, debug_cli
@debug_cli.command("profile")
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@ -1,16 +1,27 @@
 from typing import Any, Dict, Optional
 from pathlib import Path
 from wasabi import msg
 import os
 import re
 import shutil
 from pathlib import Path
 from typing import Any, Dict, Optional
 import requests
 import typer
 from wasabi import msg
 from ...util import ensure_path, working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
+from .._util import (
-from .._util import get_checksum, download_file, git_checkout, get_git_version
+    PROJECT_FILE,
-from .._util import SimpleFrozenDict, parse_config_overrides
+    Arg,
    Opt,
    SimpleFrozenDict,
    download_file,
    get_checksum,
    get_git_version,
    git_checkout,
    load_project_config,
    parse_config_overrides,
    project_cli,
 )
 # Whether assets are extra if `extra` is not set.
 EXTRA_DEFAULT = False
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@ -1,13 +1,22 @@
 from typing import Optional
 from pathlib import Path
 from wasabi import msg
 import subprocess
 import re
 import subprocess
 from pathlib import Path
 from typing import Optional
 from wasabi import msg
 from ... import about
 from ...util import ensure_path
-from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
+from .._util import (
-from .._util import git_checkout, get_git_version, git_repo_branch_exists
+    COMMAND,
    PROJECT_FILE,
    Arg,
    Opt,
    get_git_version,
    git_checkout,
    git_repo_branch_exists,
    project_cli,
 )
 DEFAULT_REPO = about.__projects__
 DEFAULT_PROJECTS_BRANCH = about.__projects_branch__
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@ -1,9 +1,9 @@
 from pathlib import Path
-from wasabi import msg, MarkdownRenderer
+
 from wasabi import MarkdownRenderer, msg
 from ...util import working_dir
-from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config
+from .._util import PROJECT_FILE, Arg, Opt, load_project_config, project_cli
 DOCS_URL = "https://spacy.io"
 INTRO_PROJECT = f"""The [`{PROJECT_FILE}`]({PROJECT_FILE}) defines the data assets required by the
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@ -1,15 +1,28 @@
 """This module contains helpers and subcommands for integrating spaCy projects
 with Data Version Controk (DVC). https://dvc.org"""
 from typing import Dict, Any, List, Optional, Iterable
 import subprocess
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional
 from wasabi import msg
-from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
+from ...util import (
-from .._util import Arg, Opt, NAME, COMMAND
+    SimpleFrozenList,
-from ...util import working_dir, split_command, join_command, run_command
+    join_command,
-from ...util import SimpleFrozenList
+    run_command,
-
+    split_command,
    working_dir,
 )
 from .._util import (
    COMMAND,
    NAME,
    PROJECT_FILE,
    Arg,
    Opt,
    get_hash,
    load_project_config,
    project_cli,
 )
 DVC_CONFIG = "dvc.yaml"
 DVC_DIR = ".dvc"
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@ -1,9 +1,9 @@
 from pathlib import Path
 from wasabi import msg
-from .remote_storage import RemoteStorage
+
-from .remote_storage import get_command_hash
+from .._util import Arg, load_project_config, logger, project_cli
-from .._util import project_cli, Arg, logger
+from .remote_storage import RemoteStorage, get_command_hash
 from .._util import load_project_config
 from .run import update_lockfile
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@ -1,9 +1,9 @@
 from pathlib import Path
 from wasabi import msg
-from .remote_storage import RemoteStorage
+
-from .remote_storage import get_content_hash, get_command_hash
+from .._util import Arg, load_project_config, logger, project_cli
-from .._util import load_project_config
+from .remote_storage import RemoteStorage, get_command_hash, get_content_hash
 from .._util import project_cli, Arg, logger
@project_cli.command("push")
--- a/spacy/cli/project/remote_storage.py
+++ b/spacy/cli/project/remote_storage.py
@ -1,18 +1,25 @@
-from typing import Optional, List, Dict, TYPE_CHECKING
+import hashlib
 import os
 import site
 import hashlib
 import urllib.parse
 import tarfile
 import urllib.parse
 from pathlib import Path
 from typing import TYPE_CHECKING, Dict, List, Optional
 from wasabi import msg
 from .._util import get_hash, get_checksum, upload_file, download_file
 from .._util import ensure_pathy, make_tempdir
 from ...util import get_minor_version, ENV_VARS, check_bool_env_var
 from ...git_info import GIT_VERSION
 from ... import about
 from ...errors import Errors
 from ...git_info import GIT_VERSION
 from ...util import ENV_VARS, check_bool_env_var, get_minor_version
 from .._util import (
    download_file,
    ensure_pathy,
    get_checksum,
    get_hash,
    make_tempdir,
    upload_file,
 )
 if TYPE_CHECKING:
    from pathy import FluidPath  # noqa: F401
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -1,20 +1,39 @@
 from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
 import os.path
 from pathlib import Path
 from wasabi import msg
 from wasabi.util import locale_escape
 import sys
 from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
 import srsly
 import typer
 from wasabi import msg
 from wasabi.util import locale_escape
 from ... import about
 from ...git_info import GIT_VERSION
-from ...util import working_dir, run_command, split_command, is_cwd, join_command
+from ...util import (
-from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS
+    ENV_VARS,
-from ...util import check_bool_env_var, SimpleFrozenDict
+    SimpleFrozenDict,
-from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
+    SimpleFrozenList,
-from .._util import get_checksum, project_cli, Arg, Opt, COMMAND, parse_config_overrides
+    check_bool_env_var,
    is_cwd,
    is_minor_version_match,
    join_command,
    run_command,
    split_command,
    working_dir,
 )
 from .._util import (
    COMMAND,
    PROJECT_FILE,
    PROJECT_LOCK,
    Arg,
    Opt,
    get_checksum,
    get_hash,
    load_project_config,
    parse_config_overrides,
    project_cli,
 )
@project_cli.command(
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -3,7 +3,7 @@ the docs and the init config command. It encodes various best practices and
 can help generate the best possible configuration, given a user's requirements. #}
 {%- set use_transformer = hardware != "cpu" and transformer_data -%}
 {%- set transformer = transformer_data[optimize] if use_transformer else {} -%}
-{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "spancat", "trainable_lemmatizer"] -%}
+{%- set listener_components = ["tagger", "morphologizer", "parser", "ner", "textcat", "textcat_multilabel", "entity_linker", "span_finder", "spancat", "spancat_singlelabel", "trainable_lemmatizer"] -%}
 [paths]
 train = null
 dev = null
@ -24,8 +24,11 @@ gpu_allocator = null
 lang = "{{ lang }}"
 {%- set has_textcat = ("textcat" in components or "textcat_multilabel" in components) -%}
 {%- set with_accuracy = optimize == "accuracy" -%}
-{%- set has_accurate_textcat = has_textcat and with_accuracy -%}
+{# The BOW textcat doesn't need a source of features, so it can omit the
-{%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "spancat" in components or "trainable_lemmatizer" in components or "entity_linker" in components or has_accurate_textcat) -%}
+tok2vec/transformer. #}
 {%- set with_accuracy_or_transformer = (use_transformer or with_accuracy) -%}
 {%- set textcat_needs_features = has_textcat and with_accuracy_or_transformer -%}
 {%- if ("tagger" in components or "morphologizer" in components or "parser" in components or "ner" in components or "span_finder" in components or "spancat" in components or "spancat_singlelabel" in components or "trainable_lemmatizer" in components or "entity_linker" in components or textcat_needs_features) -%}
 {%- set full_pipeline = ["transformer" if use_transformer else "tok2vec"] + components -%}
 {%- else -%}
 {%- set full_pipeline = components -%}
@ -122,6 +125,30 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"
 {% endif -%}
 {% if "span_finder" in components -%}
 [components.span_finder]
 factory = "span_finder"
 max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
 threshold = 0.5
 [components.span_finder.model]
@architectures = "spacy.SpanFinder.v1"
 [components.span_finder.model.scorer]
@layers = "spacy.LinearLogistic.v1"
 nO = 2
 [components.span_finder.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
 grad_factor = 1.0
 [components.span_finder.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
 {% endif -%}
 {% if "spancat" in components -%}
 [components.spancat]
 factory = "spancat"
@ -154,6 +181,36 @@ grad_factor = 1.0
 sizes = [1,2,3]
 {% endif -%}
 {% if "spancat_singlelabel" in components %}
 [components.spancat_singlelabel]
 factory = "spancat_singlelabel"
 negative_weight = 1.0
 allow_overlap = true
 scorer = {"@scorers":"spacy.spancat_scorer.v1"}
 spans_key = "sc"
 [components.spancat_singlelabel.model]
@architectures = "spacy.SpanCategorizer.v1"
 [components.spancat_singlelabel.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
 hidden_size = 128
 [components.spancat_singlelabel.model.scorer]
@layers = "Softmax.v2"
 [components.spancat_singlelabel.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
 grad_factor = 1.0
 [components.spancat_singlelabel.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
 [components.spancat_singlelabel.suggester]
@misc = "spacy.ngram_suggester.v1"
 sizes = [1,2,3]
 {% endif %}
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
@ -219,10 +276,16 @@ no_output_layer = false
 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = true
-ngram_size = 1
+nO = null
-no_output_layer = false
+
 [components.textcat.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
 grad_factor = 1.0
 [components.textcat.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
@ -250,10 +313,16 @@ no_output_layer = false
 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v2"
+@architectures = "spacy.TextCatCNN.v2"
 exclusive_classes = false
-ngram_size = 1
+nO = null
-no_output_layer = false
+
 [components.textcat_multilabel.model.tok2vec]
@architectures = "spacy-transformers.TransformerListener.v1"
 grad_factor = 1.0
 [components.textcat_multilabel.model.tok2vec.pooling]
@layers = "reduce_mean.v1"
 {%- endif %}
 {%- endif %}
@ -284,6 +353,7 @@ maxout_pieces = 3
 {% if "morphologizer" in components %}
 [components.morphologizer]
 factory = "morphologizer"
 label_smoothing = 0.05
 [components.morphologizer.model]
@architectures = "spacy.Tagger.v2"
@ -297,6 +367,7 @@ width = ${components.tok2vec.model.encode.width}
 {% if "tagger" in components %}
 [components.tagger]
 factory = "tagger"
 label_smoothing = 0.05
 [components.tagger.model]
@architectures = "spacy.Tagger.v2"
@ -341,6 +412,27 @@ nO = null
 width = ${components.tok2vec.model.encode.width}
 {% endif %}
 {% if "span_finder" in components %}
 [components.span_finder]
 factory = "span_finder"
 max_length = 25
 min_length = null
 scorer = {"@scorers":"spacy.span_finder_scorer.v1"}
 spans_key = "sc"
 threshold = 0.5
 [components.span_finder.model]
@architectures = "spacy.SpanFinder.v1"
 [components.span_finder.model.scorer]
@layers = "spacy.LinearLogistic.v1"
 nO = 2
 [components.span_finder.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
 width = ${components.tok2vec.model.encode.width}
 {% endif %}
 {% if "spancat" in components %}
 [components.spancat]
 factory = "spancat"
@ -370,6 +462,33 @@ width = ${components.tok2vec.model.encode.width}
 sizes = [1,2,3]
 {% endif %}
 {% if "spancat_singlelabel" in components %}
 [components.spancat_singlelabel]
 factory = "spancat_singlelabel"
 negative_weight = 1.0
 allow_overlap = true
 scorer = {"@scorers":"spacy.spancat_scorer.v1"}
 spans_key = "sc"
 [components.spancat_singlelabel.model]
@architectures = "spacy.SpanCategorizer.v1"
 [components.spancat_singlelabel.model.reducer]
@layers = "spacy.mean_max_reducer.v1"
 hidden_size = 128
 [components.spancat_singlelabel.model.scorer]
@layers = "Softmax.v2"
 [components.spancat_singlelabel.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
 width = ${components.tok2vec.model.encode.width}
 [components.spancat_singlelabel.suggester]
@misc = "spacy.ngram_suggester.v1"
 sizes = [1,2,3]
 {% endif %}
 {% if "trainable_lemmatizer" in components -%}
 [components.trainable_lemmatizer]
 factory = "trainable_lemmatizer"
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -1,15 +1,23 @@
 from typing import Optional, Dict, Any, Union
 from pathlib import Path
 from wasabi import msg
 import typer
 import logging
 import sys
 from pathlib import Path
 from typing import Any, Dict, Optional, Union
 import typer
 from wasabi import msg
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu
 from ..training.loop import train as train_nlp
 from ..training.initialize import init_nlp
 from .. import util
 from ..training.initialize import init_nlp
 from ..training.loop import train as train_nlp
 from ._util import (
    Arg,
    Opt,
    app,
    import_code,
    parse_config_overrides,
    setup_gpu,
    show_validation_error,
 )
@app.command(
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -1,14 +1,21 @@
 from typing import Tuple
 from pathlib import Path
 import sys
 import requests
 from wasabi import msg, Printer
 import warnings
 from pathlib import Path
 from typing import Tuple
 import requests
 from wasabi import Printer, msg
 from ._util import app
 from .. import about
-from ..util import get_package_version, get_installed_models, get_minor_version
+from ..util import (
-from ..util import get_package_path, get_model_meta, is_compatible_version
+    get_installed_models,
    get_minor_version,
    get_model_meta,
    get_package_path,
    get_package_version,
    is_compatible_version,
 )
 from ._util import app
@app.command("validate")
--- a/spacy/compat.py
+++ b/spacy/compat.py
@ -1,5 +1,6 @@
 """Helpers for Python and platform compatibility."""
 import sys
 from thinc.util import copy_array
 try:
--- a/spacy/displacy/init.py
+++ b/spacy/displacy/init.py
@ -4,15 +4,13 @@ spaCy's built in visualization suite for dependencies and named entities.
 DOCS: https://spacy.io/api/top-level#displacy
 USAGE: https://spacy.io/usage/visualizers
 """
 from typing import Union, Iterable, Optional, Dict, Any, Callable
 import warnings
 from typing import Any, Callable, Dict, Iterable, Optional, Union
 from .render import DependencyRenderer, EntityRenderer, SpanRenderer
 from ..tokens import Doc, Span
 from ..errors import Errors, Warnings
-from ..util import is_in_jupyter
+from ..tokens import Doc, Span
-from ..util import find_available_port
+from ..util import find_available_port, is_in_jupyter
-
+from .render import DependencyRenderer, EntityRenderer, SpanRenderer
 _html = {}
 RENDER_WRAPPER = None
@ -68,7 +66,7 @@ def render(
    if jupyter or (jupyter is None and is_in_jupyter()):
        # return HTML rendered by IPython display()
        # See #4840 for details on span wrapper to disable mathjax
-        from IPython.core.display import display, HTML
+        from IPython.core.display import HTML, display
        return display(HTML('<span class="tex2jax_ignore">{}</span>'.format(html)))
    return html
@ -125,13 +123,17 @@ def app(environ, start_response):
    return [res]
-def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
+def parse_deps(
    orig_doc: Union[Doc, Span], options: Dict[str, Any] = {}
 ) -> Dict[str, Any]:
    """Generate dependency parse in {'words': [], 'arcs': []} format.
-    orig_doc (Doc): Document to parse.
+    orig_doc (Union[Doc, Span]): Document to parse.
    options (Dict[str, Any]): Dependency parse specific visualisation options.
    RETURNS (dict): Generated dependency parse keyed by words and arcs.
    """
    if isinstance(orig_doc, Span):
        orig_doc = orig_doc.as_doc()
    doc = Doc(orig_doc.vocab).from_bytes(
        orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
    )
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -1,15 +1,28 @@
 from typing import Any, Dict, List, Optional, Tuple, Union
 import uuid
-import itertools
+from typing import Any, Dict, List, Optional, Tuple, Union
 from ..errors import Errors
 from ..util import escape_html, minify_html, registry
-from .templates import TPL_DEP_ARCS, TPL_DEP_SVG, TPL_DEP_WORDS
+from .templates import (
-from .templates import TPL_DEP_WORDS_LEMMA, TPL_ENT, TPL_ENT_RTL, TPL_ENTS
+    TPL_DEP_ARCS,
-from .templates import TPL_FIGURE, TPL_KB_LINK, TPL_PAGE, TPL_SPAN
+    TPL_DEP_SVG,
-from .templates import TPL_SPAN_RTL, TPL_SPAN_SLICE, TPL_SPAN_SLICE_RTL
+    TPL_DEP_WORDS,
-from .templates import TPL_SPAN_START, TPL_SPAN_START_RTL, TPL_SPANS
+    TPL_DEP_WORDS_LEMMA,
-from .templates import TPL_TITLE
+    TPL_ENT,
    TPL_ENT_RTL,
    TPL_ENTS,
    TPL_FIGURE,
    TPL_KB_LINK,
    TPL_PAGE,
    TPL_SPAN,
    TPL_SPAN_RTL,
    TPL_SPAN_SLICE,
    TPL_SPAN_SLICE_RTL,
    TPL_SPAN_START,
    TPL_SPAN_START_RTL,
    TPL_SPANS,
    TPL_TITLE,
 )
 DEFAULT_LANG = "en"
 DEFAULT_DIR = "ltr"
@ -204,7 +217,7 @@ class SpanRenderer:
                    + (self.offset_step * (len(entities) - 1))
                )
                markup += self.span_template.format(
-                    text=token["text"],
+                    text=escape_html(token["text"]),
                    span_slices=slices,
                    span_starts=starts,
                    total_height=total_height,
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -1,5 +1,5 @@
 from typing import Literal
 import warnings
 from typing import Literal
 class ErrorsWithCodes(type):
@ -208,6 +208,9 @@ class Warnings(metaclass=ErrorsWithCodes):
    W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
            "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
    W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.")
    W125 = ("The StaticVectors key_attr is no longer used. To set a custom "
            "key attribute for vectors, configure it through Vectors(attr=) or "
            "'spacy init vectors --attr'")
    # v4 warning strings
    W400 = ("`use_upper=False` is ignored, the upper layer is always enabled")
@ -546,6 +549,8 @@ class Errors(metaclass=ErrorsWithCodes):
            "during training, make sure to include it in 'annotating components'")
    # New errors added in v3.x
    E850 = ("The PretrainVectors objective currently only supports default or "
            "floret vectors, not {mode} vectors.")
    E851 = ("The 'textcat' component labels should only have values of 0 or 1, "
            "but found value of '{val}'.")
    E852 = ("The tar file pulled from the remote attempted an unsafe path "
@ -954,6 +959,14 @@ class Errors(metaclass=ErrorsWithCodes):
             "with `displacy.serve(doc, port=port)`")
    E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port=port)` "
             "or use `auto_select_port=True` to pick an available port automatically.")
    E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
    E1052 = ("Unable to copy spans: the character offsets for the span at "
             "index {i} in the span group do not align with the tokenization "
             "in the target doc.")
    E1053 = ("Both 'min_length' and 'max_length' should be larger than 0, but found"
             " 'min_length': {min_length}, 'max_length': {max_length}")
    E1054 = ("The text, including whitespace, must match between reference and "
             "predicted docs when training {component}.")
    # v4 error strings
    E4000 = ("Expected a Doc as input, but got: '{type}'")
@ -968,6 +981,7 @@ class Errors(metaclass=ErrorsWithCodes):
    E4007 = ("Span {var} {value} must be {op} Span {existing_var} "
             "{existing_value}.")
    E4008 = ("Span {pos}_char {value} does not correspond to a token {pos}.")
    E4009 = ("The '{attr}' parameter should be 'None' or 'True', but found '{value}'.")
 RENAMED_LANGUAGE_CODES = {"xx": "mul", "is": "isl"}
--- a/spacy/glossary.py
+++ b/spacy/glossary.py
@ -1,4 +1,5 @@
 import warnings
 from .errors import Warnings
--- a/spacy/kb/init.py
+++ b/spacy/kb/init.py
@ -1,5 +1,5 @@
 from .candidate import Candidate, InMemoryCandidate
 from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
 from .candidate import Candidate, InMemoryCandidate
 __all__ = ["KnowledgeBase", "InMemoryLookupKB", "Candidate", "InMemoryCandidate"]
--- a/spacy/kb/candidate.pxd
+++ b/spacy/kb/candidate.pxd
@ -1,6 +1,8 @@
 from libcpp.vector cimport vector
-from .kb_in_memory cimport InMemoryLookupKB
+
 from ..typedefs cimport hash_t
 from .kb_in_memory cimport InMemoryLookupKB
 cdef class Candidate:
    pass
@ -9,7 +11,7 @@ cdef class Candidate:
 cdef class InMemoryCandidate(Candidate):
    cdef readonly hash_t _entity_hash
    cdef readonly hash_t _alias_hash
-    cpdef vector[float] _entity_vector
+    cdef vector[float] _entity_vector
    cdef float _prior_prob
    cdef readonly InMemoryLookupKB _kb
    cdef float _entity_freq
--- a/spacy/kb/candidate.pyx
+++ b/spacy/kb/candidate.pyx
@ -1,8 +1,10 @@
 # cython: infer_types=True, profile=True
 from .kb_in_memory cimport InMemoryLookupKB
 from ..errors import Errors
 cdef class Candidate:
    """A `Candidate` object refers to a textual mention that may or may not be resolved
    to a specific entity from a Knowledge Base. This will be used as input for the entity linking
--- a/spacy/kb/kb.pxd
+++ b/spacy/kb/kb.pxd
@ -2,8 +2,10 @@
 from cymem.cymem cimport Pool
 from libc.stdint cimport int64_t
 from ..vocab cimport Vocab
 cdef class KnowledgeBase:
    cdef Pool mem
    cdef readonly Vocab vocab
--- a/spacy/kb/kb.pyx
+++ b/spacy/kb/kb.pyx
@ -1,18 +1,20 @@
 # cython: infer_types=True, profile=True
 from pathlib import Path
-from typing import Iterable, Tuple, Union, Iterator
+from typing import Iterable, Iterator, Tuple, Union
 from cymem.cymem cimport Pool
-from .candidate import Candidate
+from ..errors import Errors
 from ..tokens import Span, SpanGroup
 from ..util import SimpleFrozenList
-from ..errors import Errors
+from .candidate import Candidate
 cdef class KnowledgeBase:
-    """A `KnowledgeBase` instance stores unique identifiers for entities and their textual aliases,
+    """A `KnowledgeBase` instance stores unique identifiers for entities and
-    to support entity linking of named entities to real-world concepts.
+    their textual aliases, to support entity linking of named entities to
    real-world concepts.
    This is an abstract class and requires its operations to be implemented.
    DOCS: https://spacy.io/api/kb
@ -40,7 +42,9 @@ cdef class KnowledgeBase:
        RETURNS (Iterator[Iterable[Iterable[Candidate]]]): Identified candidates.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_candidates", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="get_candidates", name=self.__name__
            )
        )
    def get_vectors(self, entities: Iterable[str]) -> Iterable[Iterable[float]]:
@ -58,7 +62,9 @@ cdef class KnowledgeBase:
        RETURNS (Iterable[float]): Vector for specified entity.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="get_vector", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="get_vector", name=self.__name__
            )
        )
    def to_bytes(self, **kwargs) -> bytes:
@ -66,7 +72,9 @@ cdef class KnowledgeBase:
        RETURNS (bytes): Current state as binary string.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_bytes", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="to_bytes", name=self.__name__
            )
        )
    def from_bytes(self, bytes_data: bytes, *, exclude: Tuple[str] = tuple()):
@ -75,27 +83,37 @@ cdef class KnowledgeBase:
        exclude (Tuple[str]): Properties to exclude when restoring KB.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_bytes", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="from_bytes", name=self.__name__
            )
        )
-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def to_disk(
            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
    ) -> None:
        """
        Write KnowledgeBase content to disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="to_disk", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="to_disk", name=self.__name__
            )
        )
-    def from_disk(self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()) -> None:
+    def from_disk(
            self, path: Union[str, Path], exclude: Iterable[str] = SimpleFrozenList()
    ) -> None:
        """
        Load KnowledgeBase content from disk.
        path (Union[str, Path]): Target file path.
        exclude (Iterable[str]): List of components to exclude.
        """
        raise NotImplementedError(
-            Errors.E1045.format(parent="KnowledgeBase", method="from_disk", name=self.__name__)
+            Errors.E1045.format(
                parent="KnowledgeBase", method="from_disk", name=self.__name__
            )
        )
    @property
--- a/spacy/kb/kb_in_memory.pxd
+++ b/spacy/kb/kb_in_memory.pxd
@ -1,11 +1,11 @@
 """Knowledge-base for entity or concept linking."""
 from preshed.maps cimport PreshMap
 from libcpp.vector cimport vector
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport FILE
 from libcpp.vector cimport vector
 from preshed.maps cimport PreshMap
 from ..structs cimport AliasC, KBEntryC
 from ..typedefs cimport hash_t
 from ..structs cimport KBEntryC, AliasC
 from .kb cimport KnowledgeBase
 ctypedef vector[KBEntryC] entry_vec
@ -55,23 +55,28 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    # optional data, we can let users configure a DB as the backend for this.
    cdef object _features_table
    cdef inline int64_t c_add_vector(self, vector[float] entity_vector) nogil:
        """Add an entity vector to the vectors table."""
        cdef int64_t new_index = self._vectors_table.size()
        self._vectors_table.push_back(entity_vector)
        return new_index
-
+    cdef inline int64_t c_add_entity(
-    cdef inline int64_t c_add_entity(self, hash_t entity_hash, float freq,
+        self,
-                                     int32_t vector_index, int feats_row) nogil:
+        hash_t entity_hash,
        float freq,
        int32_t vector_index,
        int feats_row
    ) nogil:
        """Add an entry to the vector of entries.
-        After calling this method, make sure to update also the _entry_index using the return value"""
+        After calling this method, make sure to update also the _entry_index
        using the return value"""
        # This is what we'll map the entity hash key to. It's where the entry will sit
        # in the vector of entries, so we can get it later.
        cdef int64_t new_index = self._entries.size()
-        # Avoid struct initializer to enable nogil, cf https://github.com/cython/cython/issues/1642
+        # Avoid struct initializer to enable nogil, cf.
        # https://github.com/cython/cython/issues/1642
        cdef KBEntryC entry
        entry.entity_hash = entity_hash
        entry.vector_index = vector_index
@ -81,11 +86,17 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        self._entries.push_back(entry)
        return new_index
-    cdef inline int64_t c_add_aliases(self, hash_t alias_hash, vector[int64_t] entry_indices, vector[float] probs) nogil:
+    cdef inline int64_t c_add_aliases(
-        """Connect a mention to a list of potential entities with their prior probabilities .
+        self,
-        After calling this method, make sure to update also the _alias_index using the return value"""
+        hash_t alias_hash,
-        # This is what we'll map the alias hash key to. It's where the alias will be defined
+        vector[int64_t] entry_indices,
-        # in the vector of aliases.
+        vector[float] probs
    ) nogil:
        """Connect a mention to a list of potential entities with their prior
        probabilities. After calling this method, make sure to update also the
        _alias_index using the return value"""
        # This is what we'll map the alias hash key to. It's where the alias will be
        # defined in the vector of aliases.
        cdef int64_t new_index = self._aliases_table.size()
        # Avoid struct initializer to enable nogil
@ -98,8 +109,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    cdef inline void _create_empty_vectors(self, hash_t dummy_hash) nogil:
        """
-        Initializing the vectors and making sure the first element of each vector is a dummy,
+        Initializing the vectors and making sure the first element of each vector is a
-        because the PreshMap maps pointing to indices in these vectors can not contain 0 as value
+        dummy, because the PreshMap maps pointing to indices in these vectors can not
        contain 0 as value.
        cf. https://github.com/explosion/preshed/issues/17
        """
        cdef int32_t dummy_value = 0
@ -130,12 +142,18 @@ cdef class InMemoryLookupKB(KnowledgeBase):
 cdef class Writer:
    cdef FILE* _fp
-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1
+    cdef int write_header(
        self, int64_t nr_entries, int64_t entity_vector_length
    ) except -1
    cdef int write_vector_element(self, float element) except -1
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1
+    cdef int write_entry(
        self, hash_t entry_hash, float entry_freq, int32_t vector_index
    ) except -1
    cdef int write_alias_length(self, int64_t alias_length) except -1
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1
+    cdef int write_alias_header(
        self, hash_t alias_hash, int64_t candidate_length
    ) except -1
    cdef int write_alias(self, int64_t entry_index, float prob) except -1
    cdef int _write(self, void* value, size_t size) except -1
@ -143,12 +161,18 @@ cdef class Writer:
 cdef class Reader:
    cdef FILE* _fp
-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1
+    cdef int read_header(
        self, int64_t* nr_entries, int64_t* entity_vector_length
    ) except -1
    cdef int read_vector_element(self, float* element) except -1
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1
+    cdef int read_entry(
        self, hash_t* entity_hash, float* freq, int32_t* vector_index
    ) except -1
    cdef int read_alias_length(self, int64_t* alias_length) except -1
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1
+    cdef int read_alias_header(
        self, hash_t* alias_hash, int64_t* candidate_length
    ) except -1
    cdef int read_alias(self, int64_t* entry_index, float* prob) except -1
    cdef int _read(self, void* value, size_t size) except -1
--- a/spacy/kb/kb_in_memory.pyx
+++ b/spacy/kb/kb_in_memory.pyx
@ -1,29 +1,35 @@
 # cython: infer_types=True, profile=True
-from typing import Iterable, Callable, Dict, Any, Union, Iterator
+from typing import Any, Callable, Dict, Iterable, Union
 import srsly
-from preshed.maps cimport PreshMap
+
 from cpython.exc cimport PyErr_SetFromErrno
 from libc.stdio cimport fopen, fclose, fread, fwrite, feof, fseek
 from libc.stdint cimport int32_t, int64_t
 from libc.stdio cimport fclose, feof, fopen, fread, fseek, fwrite
 from libcpp.vector cimport vector
 from preshed.maps cimport PreshMap
 from pathlib import Path
 import warnings
 from pathlib import Path
 from ..tokens import Span, SpanGroup
 from ..tokens import SpanGroup
 from ..typedefs cimport hash_t
-from ..errors import Errors, Warnings
+
 from .. import util
 from ..errors import Errors, Warnings
 from ..util import SimpleFrozenList, ensure_path
 from ..vocab cimport Vocab
 from .kb cimport KnowledgeBase
 from .candidate import InMemoryCandidate
 cdef class InMemoryLookupKB(KnowledgeBase):
-    """An `InMemoryLookupKB` instance stores unique identifiers for entities and their textual aliases,
+    """An `InMemoryLookupKB` instance stores unique identifiers for entities
-    to support entity linking of named entities to real-world concepts.
+    and their textual aliases, to support entity linking of named entities to
    real-world concepts.
    DOCS: https://spacy.io/api/inmemorylookupkb
    """
@ -66,7 +72,8 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    def add_entity(self, str entity, float freq, vector[float] entity_vector):
        """
-        Add an entity to the KB, optionally specifying its log probability based on corpus frequency
+        Add an entity to the KB, optionally specifying its log probability
        based on corpus frequency.
        Return the hash of the entity ID/name at the end.
        """
        cdef hash_t entity_hash = self.vocab.strings.add(entity)
@ -78,14 +85,20 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        # Raise an error if the provided entity vector is not of the correct length
        if len(entity_vector) != self.entity_vector_length:
-            raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+            raise ValueError(
                Errors.E141.format(
                    found=len(entity_vector), required=self.entity_vector_length
                )
            )
        vector_index = self.c_add_vector(entity_vector=entity_vector)
-        new_index = self.c_add_entity(entity_hash=entity_hash,
+        new_index = self.c_add_entity(
-                                      freq=freq,
+            entity_hash=entity_hash,
-                                      vector_index=vector_index,
+            freq=freq,
-                                      feats_row=-1)  # Features table currently not implemented
+            vector_index=vector_index,
            feats_row=-1
        )  # Features table currently not implemented
        self._entry_index[entity_hash] = new_index
        return entity_hash
@ -110,7 +123,12 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            else:
                entity_vector = vector_list[i]
                if len(entity_vector) != self.entity_vector_length:
-                    raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
+                    raise ValueError(
                        Errors.E141.format(
                            found=len(entity_vector),
                            required=self.entity_vector_length
                        )
                    )
                entry.entity_hash = entity_hash
                entry.freq = freq_list[i]
@ -144,11 +162,15 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        previous_alias_nr = self.get_size_aliases()
        # Throw an error if the length of entities and probabilities are not the same
        if not len(entities) == len(probabilities):
-            raise ValueError(Errors.E132.format(alias=alias,
+            raise ValueError(
-                                                entities_length=len(entities),
+                Errors.E132.format(
-                                                probabilities_length=len(probabilities)))
+                    alias=alias,
                    entities_length=len(entities),
                    probabilities_length=len(probabilities))
            )
-        # Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors)
+        # Throw an error if the probabilities sum up to more than 1 (allow for
        # some rounding errors)
        prob_sum = sum(probabilities)
        if prob_sum > 1.00001:
            raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
@ -165,40 +187,47 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        for entity, prob in zip(entities, probabilities):
            entity_hash = self.vocab.strings[entity]
-            if not entity_hash in self._entry_index:
+            if entity_hash not in self._entry_index:
                raise ValueError(Errors.E134.format(entity=entity))
            entry_index = <int64_t>self._entry_index.get(entity_hash)
            entry_indices.push_back(int(entry_index))
            probs.push_back(float(prob))
-        new_index = self.c_add_aliases(alias_hash=alias_hash, entry_indices=entry_indices, probs=probs)
+        new_index = self.c_add_aliases(
            alias_hash=alias_hash, entry_indices=entry_indices, probs=probs
        )
        self._alias_index[alias_hash] = new_index
        if previous_alias_nr + 1 != self.get_size_aliases():
            raise RuntimeError(Errors.E891.format(alias=alias))
        return alias_hash
-    def append_alias(self, str alias, str entity, float prior_prob, ignore_warnings=False):
+    def append_alias(
        self, str alias, str entity, float prior_prob, ignore_warnings=False
    ):
        """
-        For an alias already existing in the KB, extend its potential entities with one more.
+        For an alias already existing in the KB, extend its potential entities
        with one more.
        Throw a warning if either the alias or the entity is unknown,
        or when the combination is already previously recorded.
        Throw an error if this entity+prior prob would exceed the sum of 1.
-        For efficiency, it's best to use the method `add_alias` as much as possible instead of this one.
+        For efficiency, it's best to use the method `add_alias` as much as
        possible instead of this one.
        """
        # Check if the alias exists in the KB
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            raise ValueError(Errors.E176.format(alias=alias))
        # Check if the entity exists in the KB
        cdef hash_t entity_hash = self.vocab.strings[entity]
-        if not entity_hash in self._entry_index:
+        if entity_hash not in self._entry_index:
            raise ValueError(Errors.E134.format(entity=entity))
        entry_index = <int64_t>self._entry_index.get(entity_hash)
-        # Throw an error if the prior probabilities (including the new one) sum up to more than 1
+        # Throw an error if the prior probabilities (including the new one)
        # sum up to more than 1
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
        current_sum = sum([p for p in alias_entry.probs])
@ -232,12 +261,13 @@ cdef class InMemoryLookupKB(KnowledgeBase):
    def _get_alias_candidates(self, str alias) -> Iterable[InMemoryCandidate]:
        """
-        Return candidate entities for an alias. Each candidate defines the entity, the original alias,
+        Return candidate entities for an alias. Each candidate defines the
-        and the prior probability of that alias resolving to that entity.
+        entity, the original alias, and the prior probability of that alias
        resolving to that entity.
        If the alias is not known in the KB, and empty list is returned.
        """
        cdef hash_t alias_hash = self.vocab.strings[alias]
-        if not alias_hash in self._alias_index:
+        if alias_hash not in self._alias_index:
            return []
        alias_index = <int64_t>self._alias_index.get(alias_hash)
        alias_entry = self._aliases_table[alias_index]
@ -266,8 +296,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        return self._vectors_table[self._entries[entry_index].vector_index]
    def get_prior_prob(self, str entity, str alias):
-        """ Return the prior probability of a given alias being linked to a given entity,
+        """ Return the prior probability of a given alias being linked to a
-        or return 0.0 when this combination is not known in the knowledge base"""
+        given entity, or return 0.0 when this combination is not known in the
        knowledge base."""
        cdef hash_t alias_hash = self.vocab.strings[alias]
        cdef hash_t entity_hash = self.vocab.strings[entity]
@ -278,7 +309,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        entry_index = self._entry_index[entity_hash]
        alias_entry = self._aliases_table[alias_index]
-        for (entry_index, prior_prob) in zip(alias_entry.entry_indices, alias_entry.probs):
+        for (entry_index, prior_prob) in zip(
            alias_entry.entry_indices, alias_entry.probs
        ):
            if self._entries[entry_index].entity_hash == entity_hash:
                return prior_prob
@ -291,13 +324,19 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        """Serialize the current state to a binary string.
        """
        def serialize_header():
-            header = (self.get_size_entities(), self.get_size_aliases(), self.entity_vector_length)
+            header = (
                self.get_size_entities(),
                self.get_size_aliases(),
                self.entity_vector_length
            )
            return srsly.json_dumps(header)
        def serialize_entries():
            i = 1
            tuples = []
-            for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+            for entry_hash, entry_index in sorted(
                self._entry_index.items(), key=lambda x: x[1]
            ):
                entry = self._entries[entry_index]
                assert entry.entity_hash == entry_hash
                assert entry_index == i
@ -310,7 +349,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            headers = []
            indices_lists = []
            probs_lists = []
-            for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+            for alias_hash, alias_index in sorted(
                self._alias_index.items(), key=lambda x: x[1]
            ):
                alias = self._aliases_table[alias_index]
                assert alias_index == i
                candidate_length = len(alias.entry_indices)
@ -368,7 +409,7 @@ cdef class InMemoryLookupKB(KnowledgeBase):
            indices = srsly.json_loads(all_data[1])
            probs = srsly.json_loads(all_data[2])
            for header, indices, probs in zip(headers, indices, probs):
-                alias_hash, candidate_length = header
+                alias_hash, _candidate_length = header
                alias.entry_indices = indices
                alias.probs = probs
                self._aliases_table[i] = alias
@ -417,10 +458,14 @@ cdef class InMemoryLookupKB(KnowledgeBase):
                writer.write_vector_element(element)
            i = i+1
-        # dumping the entry records in the order in which they are in the _entries vector.
+        # dumping the entry records in the order in which they are in the
-        # index 0 is a dummy object not stored in the _entry_index and can be ignored.
+        # _entries vector.
        # index 0 is a dummy object not stored in the _entry_index and can
        # be ignored.
        i = 1
-        for entry_hash, entry_index in sorted(self._entry_index.items(), key=lambda x: x[1]):
+        for entry_hash, entry_index in sorted(
            self._entry_index.items(), key=lambda x: x[1]
        ):
            entry = self._entries[entry_index]
            assert entry.entity_hash == entry_hash
            assert entry_index == i
@ -432,7 +477,9 @@ cdef class InMemoryLookupKB(KnowledgeBase):
        # dumping the aliases in the order in which they are in the _alias_index vector.
        # index 0 is a dummy object not stored in the _aliases_table and can be ignored.
        i = 1
-        for alias_hash, alias_index in sorted(self._alias_index.items(), key=lambda x: x[1]):
+        for alias_hash, alias_index in sorted(
                self._alias_index.items(), key=lambda x: x[1]
        ):
            alias = self._aliases_table[alias_index]
            assert alias_index == i
@ -538,7 +585,8 @@ cdef class Writer:
    def __init__(self, path):
        assert isinstance(path, Path)
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'wb')
        if not self._fp:
            raise IOError(Errors.E146.format(path=path))
@ -548,14 +596,18 @@ cdef class Writer:
        cdef size_t status = fclose(self._fp)
        assert status == 0
-    cdef int write_header(self, int64_t nr_entries, int64_t entity_vector_length) except -1:
+    cdef int write_header(
        self, int64_t nr_entries, int64_t entity_vector_length
    ) except -1:
        self._write(&nr_entries, sizeof(nr_entries))
        self._write(&entity_vector_length, sizeof(entity_vector_length))
    cdef int write_vector_element(self, float element) except -1:
        self._write(&element, sizeof(element))
-    cdef int write_entry(self, hash_t entry_hash, float entry_freq, int32_t vector_index) except -1:
+    cdef int write_entry(
        self, hash_t entry_hash, float entry_freq, int32_t vector_index
    ) except -1:
        self._write(&entry_hash, sizeof(entry_hash))
        self._write(&entry_freq, sizeof(entry_freq))
        self._write(&vector_index, sizeof(vector_index))
@ -564,7 +616,9 @@ cdef class Writer:
    cdef int write_alias_length(self, int64_t alias_length) except -1:
        self._write(&alias_length, sizeof(alias_length))
-    cdef int write_alias_header(self, hash_t alias_hash, int64_t candidate_length) except -1:
+    cdef int write_alias_header(
        self, hash_t alias_hash, int64_t candidate_length
    ) except -1:
        self._write(&alias_hash, sizeof(alias_hash))
        self._write(&candidate_length, sizeof(candidate_length))
@ -580,16 +634,19 @@ cdef class Writer:
 cdef class Reader:
    def __init__(self, path):
        content = bytes(path)
-        cdef bytes bytes_loc = content.encode('utf8') if type(content) == str else content
+        cdef bytes bytes_loc = content.encode('utf8') \
            if type(content) == str else content
        self._fp = fopen(<char*>bytes_loc, 'rb')
        if not self._fp:
            PyErr_SetFromErrno(IOError)
-        status = fseek(self._fp, 0, 0)  # this can be 0 if there is no header
+        fseek(self._fp, 0, 0)  # this can be 0 if there is no header
    def __dealloc__(self):
        fclose(self._fp)
-    cdef int read_header(self, int64_t* nr_entries, int64_t* entity_vector_length) except -1:
+    cdef int read_header(
        self, int64_t* nr_entries, int64_t* entity_vector_length
    ) except -1:
        status = self._read(nr_entries, sizeof(int64_t))
        if status < 1:
            if feof(self._fp):
@ -609,7 +666,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="vector element"))
-    cdef int read_entry(self, hash_t* entity_hash, float* freq, int32_t* vector_index) except -1:
+    cdef int read_entry(
        self, hash_t* entity_hash, float* freq, int32_t* vector_index
    ) except -1:
        status = self._read(entity_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
@ -640,7 +699,9 @@ cdef class Reader:
                return 0  # end of file
            raise IOError(Errors.E145.format(param="alias length"))
-    cdef int read_alias_header(self, hash_t* alias_hash, int64_t* candidate_length) except -1:
+    cdef int read_alias_header(
        self, hash_t* alias_hash, int64_t* candidate_length
    ) except -1:
        status = self._read(alias_hash, sizeof(hash_t))
        if status < 1:
            if feof(self._fp):
--- a/spacy/lang/af/init.py
+++ b/spacy/lang/af/init.py
@ -1,5 +1,5 @@
 from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
 from ...language import Language, BaseDefaults
 class AfrikaansDefaults(BaseDefaults):
--- a/spacy/lang/am/init.py
+++ b/spacy/lang/am/init.py
@ -1,12 +1,11 @@
-from .stop_words import STOP_WORDS
+from ...attrs import LANG
 from ...language import BaseDefaults, Language
 from ...util import update_exc
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
-
+from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...language import Language, BaseDefaults
 from ...attrs import LANG
 from ...util import update_exc
 class AmharicDefaults(BaseDefaults):
--- a/spacy/lang/am/punctuation.py
+++ b/spacy/lang/am/punctuation.py
@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
+from ..char_classes import (
-from ..char_classes import UNITS, ALPHA_UPPER
+    ALPHA_UPPER,
    CURRENCY,
    LIST_ELLIPSES,
    LIST_PUNCT,
    LIST_QUOTES,
    UNITS,
 )
 _list_punct = LIST_PUNCT + "፡ ። ፣ ፤ ፥ ፦ ፧ ፠ ፨".strip().split()
--- a/spacy/lang/am/tokenizer_exceptions.py
+++ b/spacy/lang/am/tokenizer_exceptions.py
@ -1,5 +1,4 @@
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 _exc = {}
--- a/spacy/lang/ar/init.py
+++ b/spacy/lang/ar/init.py
@ -1,8 +1,8 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from ...language import Language, BaseDefaults
 class ArabicDefaults(BaseDefaults):
--- a/spacy/lang/ar/punctuation.py
+++ b/spacy/lang/ar/punctuation.py
@ -1,5 +1,11 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
+from ..char_classes import (
-from ..char_classes import UNITS, ALPHA_UPPER
+    ALPHA_UPPER,
    CURRENCY,
    LIST_ELLIPSES,
    LIST_PUNCT,
    LIST_QUOTES,
    UNITS,
 )
 _suffixes = (
    LIST_PUNCT
--- a/spacy/lang/ar/tokenizer_exceptions.py
+++ b/spacy/lang/ar/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {}
--- a/spacy/lang/az/init.py
+++ b/spacy/lang/az/init.py
@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
 class AzerbaijaniDefaults(BaseDefaults):
--- a/spacy/lang/az/lex_attrs.py
+++ b/spacy/lang/az/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM
 # Eleven, twelve etc. are written separate: on bir, on iki
 _num_words = [
--- a/spacy/lang/bg/init.py
+++ b/spacy/lang/bg/init.py
@ -1,12 +1,14 @@
 from ...attrs import LANG
 from ...language import BaseDefaults, Language
 from ...util import update_exc
 from ..punctuation import (
    COMBINING_DIACRITICS_TOKENIZER_INFIXES,
    COMBINING_DIACRITICS_TOKENIZER_SUFFIXES,
 )
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 from .lex_attrs import LEX_ATTRS
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_INFIXES
 from ..punctuation import COMBINING_DIACRITICS_TOKENIZER_SUFFIXES
 from ...language import Language, BaseDefaults
 from ...attrs import LANG
 from ...util import update_exc
 class BulgarianDefaults(BaseDefaults):
--- a/spacy/lang/bg/lex_attrs.py
+++ b/spacy/lang/bg/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM
 _num_words = [
    "нула",
    "едно",
--- a/spacy/lang/bg/tokenizer_exceptions.py
+++ b/spacy/lang/bg/tokenizer_exceptions.py
@ -4,8 +4,7 @@ References:
    (countries, occupations, fields of studies and more).
 """
-from ...symbols import ORTH, NORM
+from ...symbols import NORM, ORTH
 _exc = {}
--- a/spacy/lang/bn/init.py
+++ b/spacy/lang/bn/init.py
@ -1,10 +1,12 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
 from ...language import Language, BaseDefaults
 from ...pipeline import Lemmatizer
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class BengaliDefaults(BaseDefaults):
--- a/spacy/lang/bn/punctuation.py
+++ b/spacy/lang/bn/punctuation.py
@ -1,6 +1,14 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
+from ..char_classes import (
-from ..char_classes import ALPHA_LOWER, ALPHA, HYPHENS, CONCAT_QUOTES, UNITS
+    ALPHA,
-
+    ALPHA_LOWER,
    CONCAT_QUOTES,
    HYPHENS,
    LIST_ELLIPSES,
    LIST_ICONS,
    LIST_PUNCT,
    LIST_QUOTES,
    UNITS,
 )
 _currency = r"\$¢£€¥฿৳"
 _quotes = CONCAT_QUOTES.replace("'", "")
--- a/spacy/lang/bn/tokenizer_exceptions.py
+++ b/spacy/lang/bn/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {}
--- a/spacy/lang/ca/init.py
+++ b/spacy/lang/ca/init.py
@ -1,14 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from ...language import Language, BaseDefaults
 from .lemmatizer import CatalanLemmatizer
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class CatalanDefaults(BaseDefaults):
--- a/spacy/lang/ca/lex_attrs.py
+++ b/spacy/lang/ca/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM
 _num_words = [
    "zero",
    "un",
--- a/spacy/lang/ca/punctuation.py
+++ b/spacy/lang/ca/punctuation.py
@ -1,9 +1,18 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_ICONS
+from ..char_classes import (
-from ..char_classes import LIST_CURRENCY
+    ALPHA,
-from ..char_classes import CURRENCY
+    ALPHA_LOWER,
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA, PUNCT
+    ALPHA_UPPER,
-from ..char_classes import merge_chars, _units
+    CONCAT_QUOTES,
-
+    CURRENCY,
    LIST_CURRENCY,
    LIST_ELLIPSES,
    LIST_ICONS,
    LIST_PUNCT,
    LIST_QUOTES,
    PUNCT,
    _units,
    merge_chars,
 )
 ELISION = " ' ’ ".strip().replace(" ", "").replace("\n", "")
--- a/spacy/lang/ca/syntax_iterators.py
+++ b/spacy/lang/ca/syntax_iterators.py
@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...tokens import Doc, Span
+
 from ...symbols import NOUN, PROPN
 from ...errors import Errors
 from ...symbols import NOUN, PROPN
 from ...tokens import Doc, Span
 def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
--- a/spacy/lang/ca/tokenizer_exceptions.py
+++ b/spacy/lang/ca/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {}
--- a/spacy/lang/cs/init.py
+++ b/spacy/lang/cs/init.py
@ -1,6 +1,6 @@
-from .stop_words import STOP_WORDS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
-from ...language import Language, BaseDefaults
+from .stop_words import STOP_WORDS
 class CzechDefaults(BaseDefaults):
--- a/spacy/lang/da/init.py
+++ b/spacy/lang/da/init.py
@ -1,9 +1,9 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class DanishDefaults(BaseDefaults):
--- a/spacy/lang/da/lex_attrs.py
+++ b/spacy/lang/da/lex_attrs.py
@ -1,6 +1,5 @@
 from ...attrs import LIKE_NUM
 # Source http://fjern-uv.dk/tal.php
 _num_words = """nul
 en et to tre fire fem seks syv otte ni ti
--- a/spacy/lang/da/punctuation.py
+++ b/spacy/lang/da/punctuation.py
@ -1,8 +1,13 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS
+from ..char_classes import (
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+    ALPHA,
    ALPHA_LOWER,
    ALPHA_UPPER,
    CONCAT_QUOTES,
    LIST_ELLIPSES,
    LIST_ICONS,
 )
 from ..punctuation import TOKENIZER_SUFFIXES
 _quotes = CONCAT_QUOTES.replace("'", "")
 _infixes = (
--- a/spacy/lang/da/syntax_iterators.py
+++ b/spacy/lang/da/syntax_iterators.py
@ -1,7 +1,8 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
-from ...tokens import Doc, Span
+
 from ...symbols import NOUN, PROPN, PRON, VERB, AUX
 from ...errors import Errors
 from ...symbols import AUX, NOUN, PRON, PROPN, VERB
 from ...tokens import Doc, Span
 def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
--- a/spacy/lang/da/tokenizer_exceptions.py
+++ b/spacy/lang/da/tokenizer_exceptions.py
@ -2,10 +2,9 @@
 Tokenizer Exceptions.
 Source: https://forkortelse.dk/ and various others.
 """
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {}
--- a/spacy/lang/de/init.py
+++ b/spacy/lang/de/init.py
@ -1,8 +1,8 @@
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
+from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
-from ...language import Language, BaseDefaults
+from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class GermanDefaults(BaseDefaults):
--- a/spacy/lang/de/punctuation.py
+++ b/spacy/lang/de/punctuation.py
@ -1,9 +1,18 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
+from ..char_classes import (
-from ..char_classes import CURRENCY, UNITS, PUNCT
+    ALPHA,
-from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
+    ALPHA_LOWER,
    ALPHA_UPPER,
    CONCAT_QUOTES,
    CURRENCY,
    LIST_ELLIPSES,
    LIST_ICONS,
    LIST_PUNCT,
    LIST_QUOTES,
    PUNCT,
    UNITS,
 )
 from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
 _prefixes = ["``"] + BASE_TOKENIZER_PREFIXES
 _suffixes = (
--- a/spacy/lang/de/syntax_iterators.py
+++ b/spacy/lang/de/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
 from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
 from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span
--- a/spacy/lang/de/tokenizer_exceptions.py
+++ b/spacy/lang/de/tokenizer_exceptions.py
@ -1,7 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
-
+from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {
    "auf'm": [{ORTH: "auf"}, {ORTH: "'m", NORM: "dem"}],
--- a/spacy/lang/dsb/init.py
+++ b/spacy/lang/dsb/init.py
@ -1,6 +1,6 @@
 from ...language import BaseDefaults, Language
 from .lex_attrs import LEX_ATTRS
 from .stop_words import STOP_WORDS
 from ...language import Language, BaseDefaults
 class LowerSorbianDefaults(BaseDefaults):
--- a/spacy/lang/el/init.py
+++ b/spacy/lang/el/init.py
@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES, TOKENIZER_INFIXES
 from .lemmatizer import GreekLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class GreekDefaults(BaseDefaults):
--- a/spacy/lang/el/get_pos_from_wiktionary.py
+++ b/spacy/lang/el/get_pos_from_wiktionary.py
@ -1,5 +1,6 @@
 def get_pos_from_wiktionary():
    import re
    from gensim.corpora.wikicorpus import extract_pages
    regex = re.compile(r"==={{(\w+)\|el}}===")
--- a/spacy/lang/el/punctuation.py
+++ b/spacy/lang/el/punctuation.py
@ -1,6 +1,16 @@
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, LIST_CURRENCY
+from ..char_classes import (
-from ..char_classes import LIST_ICONS, ALPHA_LOWER, ALPHA_UPPER, ALPHA, HYPHENS
+    ALPHA,
-from ..char_classes import CONCAT_QUOTES, CURRENCY
+    ALPHA_LOWER,
    ALPHA_UPPER,
    CONCAT_QUOTES,
    CURRENCY,
    HYPHENS,
    LIST_CURRENCY,
    LIST_ELLIPSES,
    LIST_ICONS,
    LIST_PUNCT,
    LIST_QUOTES,
 )
 _units = (
    "km km² km³ m m² m³ dm dm² dm³ cm cm² cm³ mm mm² mm³ ha µm nm yd in ft "
--- a/spacy/lang/el/syntax_iterators.py
+++ b/spacy/lang/el/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
 from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
 from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span
--- a/spacy/lang/el/tokenizer_exceptions.py
+++ b/spacy/lang/el/tokenizer_exceptions.py
@ -1,6 +1,6 @@
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
+from ...symbols import NORM, ORTH
 from ...symbols import ORTH, NORM
 from ...util import update_exc
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc = {}
--- a/spacy/lang/en/init.py
+++ b/spacy/lang/en/init.py
@ -1,13 +1,14 @@
-from typing import Optional, Callable
+from typing import Callable, Optional
 from thinc.api import Model
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
+from ...language import BaseDefaults, Language
 from .stop_words import STOP_WORDS
 from .lex_attrs import LEX_ATTRS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .punctuation import TOKENIZER_INFIXES
 from .lemmatizer import EnglishLemmatizer
-from ...language import Language, BaseDefaults
+from .lex_attrs import LEX_ATTRS
 from .punctuation import TOKENIZER_INFIXES
 from .stop_words import STOP_WORDS
 from .syntax_iterators import SYNTAX_ITERATORS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
 class EnglishDefaults(BaseDefaults):
--- a/spacy/lang/en/punctuation.py
+++ b/spacy/lang/en/punctuation.py
@ -1,5 +1,12 @@
-from ..char_classes import LIST_ELLIPSES, LIST_ICONS, HYPHENS
+from ..char_classes import (
-from ..char_classes import CONCAT_QUOTES, ALPHA_LOWER, ALPHA_UPPER, ALPHA
+    ALPHA,
    ALPHA_LOWER,
    ALPHA_UPPER,
    CONCAT_QUOTES,
    HYPHENS,
    LIST_ELLIPSES,
    LIST_ICONS,
 )
 _infixes = (
    LIST_ELLIPSES
--- a/spacy/lang/en/syntax_iterators.py
+++ b/spacy/lang/en/syntax_iterators.py
@ -1,7 +1,7 @@
-from typing import Union, Iterator, Tuple
+from typing import Iterator, Tuple, Union
 from ...symbols import NOUN, PROPN, PRON
 from ...errors import Errors
 from ...symbols import NOUN, PRON, PROPN
 from ...tokens import Doc, Span
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@ -1,8 +1,8 @@
 from typing import Dict, List
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 from ...symbols import ORTH, NORM
 from ...util import update_exc
 from ...symbols import NORM, ORTH
 from ...util import update_exc
 from ..tokenizer_exceptions import BASE_EXCEPTIONS
 _exc: Dict[str, List[Dict]] = {}
 _exclude = [
--- a/Show More
+++ b/Show More
`@ -1,4 +1,5 @@`
	`import warnings`	`import warnings`

	`from .errors import Warnings`	`from .errors import Warnings`
`@ -1,5 +1,4 @@`
	`from ...symbols import ORTH, NORM`	`from ...symbols import NORM, ORTH`


	`_exc = {}`	`_exc = {}`