2025-10-02 18:06:46 +03:00
259 changed files with 3572 additions and 12328 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1 +0,0 @@
-custom: [https://explosion.ai/merch, https://explosion.ai/tailored-solutions]
--- a/.github/workflows/cibuildwheel.yml
+++ b/.github/workflows/cibuildwheel.yml
@ -1,99 +0,0 @@
-name: Build
-
-on:
-  push:
-    tags:
-      # ytf did they invent their own syntax that's almost regex?
-      # ** matches 'zero or more of any character'
-      - 'release-v[0-9]+.[0-9]+.[0-9]+**'
-      - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**'
-jobs:
-  build_wheels:
-    name: Build wheels on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        # macos-13 is an intel runner, macos-14 is apple silicon
-        os: [ubuntu-latest, windows-latest, macos-13, macos-14, ubuntu-24.04-arm]
-
-    steps:
-      - uses: actions/checkout@v4
-      # aarch64 (arm) is built via qemu emulation
-      # QEMU is sadly too slow. We need to wait for public ARM support
-      #- name: Set up QEMU
-      #  if: runner.os == 'Linux'
-      #  uses: docker/setup-qemu-action@v3
-      #  with:
-      #    platforms: all
-      - name: Build wheels
-        uses: pypa/cibuildwheel@v2.21.3
-        env:
-          CIBW_ARCHS_LINUX: auto
-        with:
-          package-dir: .
-          output-dir: wheelhouse
-          config-file: "{package}/pyproject.toml"
-      - uses: actions/upload-artifact@v4
-        with:
-          name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
-          path: ./wheelhouse/*.whl
-
-  build_sdist:
-    name: Build source distribution
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Build sdist
-        run: pipx run build --sdist
-      - uses: actions/upload-artifact@v4
-        with:
-          name: cibw-sdist
-          path: dist/*.tar.gz
-  create_release:
-    needs: [build_wheels, build_sdist]
-    runs-on: ubuntu-latest
-    permissions:
-      contents: write
-      checks: write
-      actions: read
-      issues: read
-      packages: write
-      pull-requests: read
-      repository-projects: read
-      statuses: read
-    steps:
-      - name: Get the tag name and determine if it's a prerelease
-        id: get_tag_info
-        run: |
-          FULL_TAG=${GITHUB_REF#refs/tags/}
-          if [[ $FULL_TAG == release-* ]]; then
-            TAG_NAME=${FULL_TAG#release-}
-            IS_PRERELEASE=false
-          elif [[ $FULL_TAG == prerelease-* ]]; then
-            TAG_NAME=${FULL_TAG#prerelease-}
-            IS_PRERELEASE=true
-          else
-            echo "Tag does not match expected patterns" >&2
-            exit 1
-          fi
-          echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV
-          echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV
-          echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV
-      - uses: actions/download-artifact@v4
-        with:
-          # unpacks all CIBW artifacts into dist/
-          pattern: cibw-*
-          path: dist
-          merge-multiple: true
-      - name: Create Draft Release
-        id: create_release
-        uses: softprops/action-gh-release@v2
-        if: startsWith(github.ref, 'refs/tags/')
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          name: ${{ env.TAG_NAME }}
-          draft: true
-          prerelease: ${{ env.IS_PRERELEASE }}
-          files: "./dist/*" 
--- a/.github/workflows/explosionbot.yml
+++ b/.github/workflows/explosionbot.yml
@ -15,7 +15,7 @@ jobs:
        env:
          GITHUB_CONTEXT: ${{ toJson(github) }}
        run: echo "$GITHUB_CONTEXT"
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
      - name: Install and run explosion-bot
        run: |
--- a/.github/workflows/gputests.yml.disabled
+++ b/.github/workflows/gputests.yml.disabled
--- a/.github/workflows/lock.yml
+++ b/.github/workflows/lock.yml
@ -16,7 +16,7 @@ jobs:
    if: github.repository_owner == 'explosion'
    runs-on: ubuntu-latest
    steps:
-      - uses: dessant/lock-threads@v5
+      - uses: dessant/lock-threads@v4
        with:
          process-only: 'issues'
          issue-inactive-days: '30'
--- a/.github/workflows/publish_pypi.yml
+++ b/.github/workflows/publish_pypi.yml
@ -1,29 +0,0 @@
-# The cibuildwheel action triggers on creation of a release, this
-# triggers on publication.
-# The expected workflow is to create a draft release and let the wheels
-# upload, and then hit 'publish', which uploads to PyPi.
-
-on:
-  release:
-    types:
-      - published
-
-jobs:
-  upload_pypi:
-    runs-on: ubuntu-latest
-    environment:
-      name: pypi
-      url: https://pypi.org/p/spacy
-    permissions:
-      id-token: write
-      contents: read
-    if: github.event_name == 'release' && github.event.action == 'published'
-    # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
-    # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
-    steps:
-      - uses: robinraju/release-downloader@v1
-        with:
-          tag: ${{ github.event.release.tag_name }}
-          fileName: '*'
-          out-file-path: 'dist'
-      - uses: pypa/gh-action-pypi-publish@release/v1
--- a/.github/workflows/slowtests.yml.disabled
+++ b/.github/workflows/slowtests.yml.disabled
@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3
        with:
          ref: ${{ matrix.branch }}
      - name: Get commits from past 24 hours
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@ -18,7 +18,7 @@ jobs:
        run: |
          echo "$GITHUB_CONTEXT"

-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
      - uses: actions/setup-python@v4
        with:
          python-version: '3.10'
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@ -2,8 +2,6 @@ name: tests

 on:
  push:
-    tags-ignore:
-      - '**'
    branches-ignore:
      - "spacy.io"
      - "nightly.spacy.io"
@ -12,6 +10,7 @@ on:
      - "*.md"
      - "*.mdx"
      - "website/**"
+      - ".github/workflows/**"
  pull_request:
    types: [opened, synchronize, reopened, edited]
    paths-ignore:
@ -26,12 +25,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
-          python-version: "3.10"
+          python-version: "3.7"
+          architecture: x64

      - name: black
        run: |
@ -45,12 +45,11 @@ jobs:
        run: |
          python -m pip install flake8==5.0.4
          python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
-          # Unfortunately cython-lint isn't working after the shift to Cython 3.
-          #- name: cython-lint
-          #  run: |
-          #    python -m pip install cython-lint -c requirements.txt
-          #    # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
-          #    cython-lint spacy --ignore E501,W291,E266
+      - name: cython-lint
+        run: |
+          python -m pip install cython-lint -c requirements.txt
+          # E501: line too log, W291: trailing whitespace, E266: too many leading '#' for block comment
+          cython-lint spacy --ignore E501,W291,E266

  tests:
    name: Test
@ -59,18 +58,28 @@ jobs:
      fail-fast: true
      matrix:
        os: [ubuntu-latest, windows-latest, macos-latest]
-        python_version: ["3.9", "3.12", "3.13"]
+        python_version: ["3.11", "3.12.0-rc.2"]
+        include:
+          - os: windows-latest
+            python_version: "3.7"
+          - os: macos-latest
+            python_version: "3.8"
+          - os: ubuntu-latest
+            python_version: "3.9"
+          - os: windows-latest
+            python_version: "3.10"

    runs-on: ${{ matrix.os }}

    steps:
      - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python_version }}
+          architecture: x64

      - name: Install dependencies
        run: |
@ -148,9 +157,7 @@ jobs:
      - name: "Test assemble CLI"
        run: |
          python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
-          python -m spacy assemble ner_source_sm.cfg output_dir
-        env:
-          PYTHONWARNINGS: "error,ignore::DeprecationWarning" 
+          PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
        if: matrix.python_version == '3.9'

      - name: "Test assemble CLI vectors warning"
--- a/.github/workflows/universe_validation.yml
+++ b/.github/workflows/universe_validation.yml
@ -20,12 +20,13 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Check out repo
-        uses: actions/checkout@v4
+        uses: actions/checkout@v3

      - name: Configure Python version
        uses: actions/setup-python@v4
        with:
          python-version: "3.7"
+          architecture: x64

      - name: Validate website/meta/universe.json
        run: |
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -35,7 +35,7 @@ so that more people can benefit from it.

 When opening an issue, use a **descriptive title** and include your
 **environment** (operating system, Python version, spaCy version). Our
-[issue templates](https://github.com/explosion/spaCy/issues/new/choose) help you
+[issue template](https://github.com/explosion/spaCy/issues/new) helps you
 remember the most important details to include. If you've discovered a bug, you
 can also submit a [regression test](#fixing-bugs) straight away. When you're
 opening an issue to report the bug, simply refer to your pull request in the
@ -449,12 +449,13 @@ and plugins in spaCy v3.0, and we can't wait to see what you build with it!
  [`spacy`](https://github.com/topics/spacy?o=desc&s=stars) and
  [`spacy-extensions`](https://github.com/topics/spacy-extension?o=desc&s=stars)
  to make it easier to find. Those are also the topics we're linking to from the
-  spaCy website. If you're sharing your project on X, feel free to tag
-  [@spacy_io](https://x.com/spacy_io) so we can check it out.
+  spaCy website. If you're sharing your project on Twitter, feel free to tag
+  [@spacy_io](https://twitter.com/spacy_io) so we can check it out.

- Once your extension is published, you can open a
-  [PR](https://github.com/explosion/spaCy/pulls) to suggest it for the
-  [Universe](https://spacy.io/universe) page.
+- Once your extension is published, you can open an issue on the
+  [issue tracker](https://github.com/explosion/spacy/issues) to suggest it for the
+  [resources directory](https://spacy.io/usage/resources#extensions) on the
+  website.

 📖 **For more tips and best practices, see the [checklist for developing spaCy extensions](https://spacy.io/usage/processing-pipelines#extensions).**

--- a/2
+++ b/2
@ -1,6 +1,6 @@
 The MIT License (MIT)

-Copyright (C) 2016-2024 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal
+Copyright (C) 2016-2022 ExplosionAI GmbH, 2016 spaCy GmbH, 2015 Matthew Honnibal

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -4,6 +4,5 @@ include README.md
 include pyproject.toml
 include spacy/py.typed
 recursive-include spacy/cli *.yml
-recursive-include spacy/tests *.json
 recursive-include licenses *
 recursive-exclude spacy *.cpp
--- a/README.md
+++ b/README.md
@ -16,7 +16,7 @@ model packaging, deployment and workflow management. spaCy is commercial
 open-source software, released under the
 [MIT license](https://github.com/explosion/spaCy/blob/master/LICENSE).

-💫 **Version 3.8 out now!**
+💫 **Version 3.7 out now!**
 [Check out the release notes here.](https://github.com/explosion/spaCy/releases)

 [![tests](https://github.com/explosion/spaCy/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/spaCy/actions/workflows/tests.yml)
@ -28,6 +28,7 @@ open-source software, released under the
 <br />
 [![PyPi downloads](https://static.pepy.tech/personalized-badge/spacy?period=total&units=international_system&left_color=grey&right_color=orange&left_text=pip%20downloads)](https://pypi.org/project/spacy/)
 [![Conda downloads](https://img.shields.io/conda/dn/conda-forge/spacy?label=conda%20downloads)](https://anaconda.org/conda-forge/spacy)
+[![spaCy on Twitter](https://img.shields.io/twitter/follow/spacy_io.svg?style=social&label=Follow)](https://twitter.com/spacy_io)

 ## 📖 Documentation

@ -38,37 +39,28 @@ open-source software, released under the
 | 🚀 **[New in v3.0]**                                                                                                                                                                                                      | New features, backwards incompatibilities and migration guide.                                                                                                                                                                                                                                                                               |
 | 🪐 **[Project Templates]**                                                                                                                                                                                                | End-to-end workflows you can clone, modify and run.                                                                                                                                                                                                                                                                                          |
 | 🎛 **[API Reference]**                                                                                                                                                                                                     | The detailed reference for spaCy's API.                                                                                                                                                                                                                                                                                                      |
-| ⏩ **[GPU Processing]**                                                                                                                                                                                                    | Use spaCy with CUDA-compatible GPU processing.                                                                                                                                                                                                                                                                                               |
 | 📦 **[Models]**                                                                                                                                                                                                           | Download trained pipelines for spaCy.                                                                                                                                                                                                                                                                                                        |
-| 🦙 **[Large Language Models]**                                                                                                                                                                                            | Integrate LLMs into spaCy pipelines.                                                                                                                                                                                                                                                                                                        |
 | 🌌 **[Universe]**                                                                                                                                                                                                         | Plugins, extensions, demos and books from the spaCy ecosystem.                                                                                                                                                                                                                                                                               |
 | ⚙️ **[spaCy VS Code Extension]**                                                                                                                                                                                          | Additional tooling and features for working with spaCy's config files.                                                                                                                                                                                                                                                                       |
 | 👩‍🏫 **[Online Course]**                                                                                                                                                                                                    | Learn spaCy in this free and interactive online course.                                                                                                                                                                                                                                                                                      |
-| 📰 **[Blog]**                                                                                                                                                                                                             | Read about current spaCy and Prodigy development, releases, talks and more from Explosion.                                                                                                                                                                                                                 |
 | 📺 **[Videos]**                                                                                                                                                                                                           | Our YouTube channel with video tutorials, talks and more.                                                                                                                                                                                                                                                                                    |
-| 🔴 **[Live Stream]**                                                                                                                                                                                                       | Join Matt as he works on spaCy and chat about NLP, live every week.                                                                                                                                                                                                                                                                         |
 | 🛠 **[Changelog]**                                                                                                                                                                                                         | Changes and version history.                                                                                                                                                                                                                                                                                                                 |
 | 💝 **[Contribute]**                                                                                                                                                                                                       | How to contribute to the spaCy project and code base.                                                                                                                                                                                                                                                                                        |
-| 👕 **[Swag]**                                                                                                                                                                                                             | Support us and our work with unique, custom-designed swag!                                                                                                                                                                                                                                                                                   |
-| <a href="https://explosion.ai/tailored-solutions"><img src="https://github.com/explosion/spaCy/assets/13643239/36d2a42e-98c0-4599-90e1-788ef75181be" width="150" alt="Tailored Solutions"/></a> | Custom NLP consulting, implementation and strategic advice by spaCy’s core development team. Streamlined, production-ready, predictable and maintainable. Send us an email or take our 5-minute questionnaire, and well'be in touch! **[Learn more &rarr;](https://explosion.ai/tailored-solutions)**                 |
+| <a href="https://explosion.ai/spacy-tailored-pipelines"><img src="https://user-images.githubusercontent.com/13643239/152853098-1c761611-ccb0-4ec6-9066-b234552831fe.png" width="125" alt="spaCy Tailored Pipelines"/></a> | Get a custom spaCy pipeline, tailor-made for your NLP problem by spaCy's core developers. Streamlined, production-ready, predictable and maintainable. Start by completing our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-pipelines)**                 |
+| <a href="https://explosion.ai/spacy-tailored-analysis"><img src="https://user-images.githubusercontent.com/1019791/206151300-b00cd189-e503-4797-aa1e-1bb6344062c5.png" width="125" alt="spaCy Tailored Pipelines"/></a>   | Bespoke advice for problem solving, strategy and analysis for applied NLP projects. Services include data strategy, code reviews, pipeline design and annotation coaching. Curious? Fill in our 5-minute questionnaire to tell us what you need and we'll be in touch! **[Learn more &rarr;](https://explosion.ai/spacy-tailored-analysis)** |

 [spacy 101]: https://spacy.io/usage/spacy-101
 [new in v3.0]: https://spacy.io/usage/v3
 [usage guides]: https://spacy.io/usage/
 [api reference]: https://spacy.io/api/
-[gpu processing]: https://spacy.io/usage#gpu
 [models]: https://spacy.io/models
-[large language models]: https://spacy.io/usage/large-language-models
 [universe]: https://spacy.io/universe
 [spacy vs code extension]: https://github.com/explosion/spacy-vscode
 [videos]: https://www.youtube.com/c/ExplosionAI
-[live stream]: https://www.youtube.com/playlist?list=PLBmcuObd5An5_iAxNYLJa_xWmNzsYce8c
 [online course]: https://course.spacy.io
-[blog]: https://explosion.ai
 [project templates]: https://github.com/explosion/projects
 [changelog]: https://spacy.io/usage#changelog
 [contribute]: https://github.com/explosion/spaCy/blob/master/CONTRIBUTING.md
-[swag]: https://explosion.ai/merch

 ## 💬 Where to ask questions

@ -80,14 +72,13 @@ more people can benefit from it.
 | Type                            | Platforms                               |
 | ------------------------------- | --------------------------------------- |
 | 🚨 **Bug Reports**              | [GitHub Issue Tracker]                  |
-| 🎁 **Feature Requests & Ideas** | [GitHub Discussions] · [Live Stream]    |
+| 🎁 **Feature Requests & Ideas** | [GitHub Discussions]                    |
 | 👩‍💻 **Usage Questions**          | [GitHub Discussions] · [Stack Overflow] |
-| 🗯 **General Discussion**        | [GitHub Discussions] · [Live Stream]   |
+| 🗯 **General Discussion**        | [GitHub Discussions]                    |

 [github issue tracker]: https://github.com/explosion/spaCy/issues
 [github discussions]: https://github.com/explosion/spaCy/discussions
 [stack overflow]: https://stackoverflow.com/questions/tagged/spacy
-[live stream]: https://www.youtube.com/playlist?list=PLBmcuObd5An5_iAxNYLJa_xWmNzsYce8c

 ## Features

@ -117,7 +108,7 @@ For detailed installation instructions, see the

 - **Operating system**: macOS / OS X · Linux · Windows (Cygwin, MinGW, Visual
  Studio)
- **Python version**: Python >=3.7, <3.13 (only 64 bit)
+- **Python version**: Python 3.7+ (only 64 bit)
 - **Package managers**: [pip] · [conda] (via `conda-forge`)

 [pip]: https://pypi.org/project/spacy/
--- a/bin/release.sh
+++ b/bin/release.sh
@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-set -e
-
-# Insist repository is clean
-git diff-index --quiet HEAD
-
-version=$(grep "__version__ = " spacy/about.py)
-version=${version/__version__ = }
-version=${version/\'/}
-version=${version/\'/}
-version=${version/\"/}
-version=${version/\"/}
-
-echo "Pushing release-v"$version
-
-git tag -d release-v$version || true
-git push origin :release-v$version || true
-git tag release-v$version
-git push origin release-v$version
--- a/build-constraints.txt
+++ b/build-constraints.txt
@ -1,2 +1,6 @@
 # build version constraints for use with wheelwright
-numpy>=2.0.0,<3.0.0
+numpy==1.15.0; python_version=='3.7' and platform_machine!='aarch64'
+numpy==1.19.2; python_version=='3.7' and platform_machine=='aarch64'
+numpy==1.17.3; python_version=='3.8' and platform_machine!='aarch64'
+numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64'
+numpy>=1.25.0; python_version>='3.9'
--- a/licenses/3rd_party_licenses.txt
+++ b/licenses/3rd_party_licenses.txt
@ -158,45 +158,3 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
-
-
-SciPy
-----
-
-* Files: scorer.py
-
-The implementation of trapezoid() is adapted from SciPy, which is distributed
-under the following license:
-
-New BSD License
-
-Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-1. Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following
-   disclaimer in the documentation and/or other materials provided
-   with the distribution.
-
-3. Neither the name of the copyright holder nor the names of its
-   contributors may be used to endorse or promote products derived
-   from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,67 +1,15 @@
 [build-system]
 requires = [
    "setuptools",
-    "cython>=3.0,<4.0",
+    "cython>=0.25,<3.0",
    "cymem>=2.0.2,<2.1.0",
    "preshed>=3.0.2,<3.1.0",
    "murmurhash>=0.28.0,<1.1.0",
-    "thinc>=8.3.4,<8.4.0",
-    "numpy>=2.0.0,<3.0.0"
+    "thinc>=8.1.8,<8.3.0",
+    "numpy>=1.15.0; python_version < '3.9'",
+    "numpy>=1.25.0; python_version >= '3.9'",
 ]
 build-backend = "setuptools.build_meta"

-[tool.cibuildwheel]
-build = "*"
-skip = "pp* cp36* cp37* cp38* *-win32 *i686*"
-test-skip = ""
-free-threaded-support = false
-
-archs = ["native"]
-
-build-frontend = "default"
-config-settings = {}
-dependency-versions = "pinned"
-environment = { PIP_CONSTRAINT = "build-constraints.txt" }
-
-environment-pass = []
-build-verbosity = 0
-
-before-all = "curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain stable"
-before-build = "pip install -r requirements.txt && python setup.py clean"
-repair-wheel-command = ""
-
-test-command = ""
-before-test = ""
-test-requires = []
-test-extras = []
-
-container-engine = "docker"
-
-manylinux-x86_64-image = "manylinux2014"
-manylinux-i686-image = "manylinux2014"
-manylinux-aarch64-image = "manylinux2014"
-manylinux-ppc64le-image = "manylinux2014"
-manylinux-s390x-image = "manylinux2014"
-manylinux-pypy_x86_64-image = "manylinux2014"
-manylinux-pypy_i686-image = "manylinux2014"
-manylinux-pypy_aarch64-image = "manylinux2014"
-
-musllinux-x86_64-image = "musllinux_1_2"
-musllinux-i686-image = "musllinux_1_2"
-musllinux-aarch64-image = "musllinux_1_2"
-musllinux-ppc64le-image = "musllinux_1_2"
-musllinux-s390x-image = "musllinux_1_2"
-
-[tool.cibuildwheel.linux]
-repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}"
-
-[tool.cibuildwheel.macos]
-repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}"
-
-[tool.cibuildwheel.windows]
-
-[tool.cibuildwheel.pyodide]
-
-
 [tool.isort]
 profile = "black"
--- a/requirements.txt
+++ b/requirements.txt
@ -3,26 +3,31 @@ spacy-legacy>=3.0.11,<3.1.0
 spacy-loggers>=1.0.0,<2.0.0
 cymem>=2.0.2,<2.1.0
 preshed>=3.0.2,<3.1.0
-thinc>=8.3.4,<8.4.0
+thinc>=8.1.8,<8.3.0
 ml_datasets>=0.2.0,<0.3.0
 murmurhash>=0.28.0,<1.1.0
 wasabi>=0.9.1,<1.2.0
 srsly>=2.4.3,<3.0.0
 catalogue>=2.0.6,<2.1.0
-typer-slim>=0.3.0,<1.0.0
-weasel>=0.1.0,<0.5.0
+typer>=0.3.0,<0.10.0
+pathy>=0.10.0
+smart-open>=5.2.1,<7.0.0
+weasel>=0.1.0,<0.4.0
 # Third party dependencies
-numpy>=2.0.0,<3.0.0
+numpy>=1.15.0; python_version < "3.9"
+numpy>=1.19.0; python_version >= "3.9"
 requests>=2.13.0,<3.0.0
 tqdm>=4.38.0,<5.0.0
 pydantic>=1.7.4,!=1.8,!=1.8.1,<3.0.0
 jinja2
+langcodes>=3.2.0,<4.0.0
 # Official Python utilities
 setuptools
 packaging>=20.0
+typing_extensions>=3.7.4.1,<4.5.0; python_version < "3.8"
 # Development dependencies
 pre-commit>=2.13.0
-cython>=3.0,<4.0
+cython>=0.25,<3.0
 pytest>=5.2.0,!=7.1.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
--- a/setup.cfg
+++ b/setup.cfg
@ -17,11 +17,11 @@ classifiers =
    Operating System :: Microsoft :: Windows
    Programming Language :: Cython
    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
    Programming Language :: Python :: 3.9
    Programming Language :: Python :: 3.10
    Programming Language :: Python :: 3.11
-    Programming Language :: Python :: 3.12
-    Programming Language :: Python :: 3.13
    Topic :: Scientific/Engineering
 project_urls =
    Release notes = https://github.com/explosion/spaCy/releases
@ -30,18 +30,18 @@ project_urls =
 [options]
 zip_safe = false
 include_package_data = true
-python_requires = >=3.9,<3.14
+python_requires = >=3.7
 # NOTE: This section is superseded by pyproject.toml and will be removed in
 # spaCy v4
 setup_requires =
-    cython>=3.0,<4.0
-    numpy>=2.0.0,<3.0.0; python_version < "3.9"
-    numpy>=2.0.0,<3.0.0; python_version >= "3.9"
+    cython>=0.25,<3.0
+    numpy>=1.15.0; python_version < "3.9"
+    numpy>=1.19.0; python_version >= "3.9"
    # We also need our Cython packages here to compile against
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
    murmurhash>=0.28.0,<1.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.1.8,<8.3.0
 install_requires =
    # Our libraries
    spacy-legacy>=3.0.11,<3.1.0
@ -49,13 +49,15 @@ install_requires =
    murmurhash>=0.28.0,<1.1.0
    cymem>=2.0.2,<2.1.0
    preshed>=3.0.2,<3.1.0
-    thinc>=8.3.4,<8.4.0
+    thinc>=8.1.8,<8.3.0
    wasabi>=0.9.1,<1.2.0
    srsly>=2.4.3,<3.0.0
    catalogue>=2.0.6,<2.1.0
-    weasel>=0.1.0,<0.5.0
+    weasel>=0.1.0,<0.4.0
    # Third-party dependencies
-    typer-slim>=0.3.0,<1.0.0
+    typer>=0.3.0,<0.10.0
+    pathy>=0.10.0
+    smart-open>=5.2.1,<7.0.0
    tqdm>=4.38.0,<5.0.0
    numpy>=1.15.0; python_version < "3.9"
    numpy>=1.19.0; python_version >= "3.9"
@ -65,6 +67,8 @@ install_requires =
    # Official Python utilities
    setuptools
    packaging>=20.0
+    typing_extensions>=3.7.4.1,<4.5.0; python_version < "3.8"
+    langcodes>=3.2.0,<4.0.0

 [options.entry_points]
 console_scripts =
@ -114,7 +118,7 @@ cuda12x =
 cuda-autodetect =
    cupy-wheel>=11.0.0,<13.0.0
 apple =
-    thinc-apple-ops>=1.0.0,<2.0.0
+    thinc-apple-ops>=0.1.0.dev0,<1.0.0
 # Language tokenizers with external dependencies
 ja =
    sudachipy>=0.5.2,!=0.6.1
--- a/spacy/init.py
+++ b/spacy/init.py
@ -13,11 +13,9 @@ from thinc.api import Config, prefer_gpu, require_cpu, require_gpu  # noqa: F401
 from . import pipeline  # noqa: F401
 from . import util
 from .about import __version__  # noqa: F401
-from .cli.info import info  # noqa: F401
 from .errors import Errors
 from .glossary import explain  # noqa: F401
 from .language import Language
-from .registrations import REGISTRY_POPULATED, populate_registry
 from .util import logger, registry  # noqa: F401
 from .vocab import Vocab

@ -78,3 +76,9 @@ def blank(
    # We should accept both dot notation and nested dict here for consistency
    config = util.dot_to_dict(config)
    return LangClass.from_config(config, vocab=vocab, meta=meta)
+
+
+def info(*args, **kwargs):
+    from .cli.info import info as cli_info
+
+    return cli_info(*args, **kwargs)
--- a/spacy/about.py
+++ b/spacy/about.py
@ -1,5 +1,5 @@
 # fmt: off
 __title__ = "spacy"
-__version__ = "3.8.7"
+__version__ = "3.7.0"
 __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
--- a/spacy/cli/init.py
+++ b/spacy/cli/init.py
@ -1,7 +1,5 @@
 from wasabi import msg

-# Needed for testing
-from . import download as download_module  # noqa: F401
 from ._util import app, setup_cli  # noqa: F401
 from .apply import apply  # noqa: F401
 from .assemble import assemble_cli  # noqa: F401
@ -24,17 +22,8 @@ from .init_pipeline import init_pipeline_cli  # noqa: F401
 from .package import package  # noqa: F401
 from .pretrain import pretrain  # noqa: F401
 from .profile import profile  # noqa: F401
-from .project.assets import project_assets  # type: ignore[attr-defined]  # noqa: F401
-from .project.clone import project_clone  # type: ignore[attr-defined]  # noqa: F401
-from .project.document import (  # type: ignore[attr-defined]  # noqa: F401
-    project_document,
-)
-from .project.dvc import project_update_dvc  # type: ignore[attr-defined]  # noqa: F401
-from .project.pull import project_pull  # type: ignore[attr-defined]  # noqa: F401
-from .project.push import project_push  # type: ignore[attr-defined]  # noqa: F401
-from .project.run import project_run  # type: ignore[attr-defined]  # noqa: F401
-from .train import train_cli  # type: ignore[attr-defined]  # noqa: F401
-from .validate import validate  # type: ignore[attr-defined]  # noqa: F401
+from .train import train_cli  # noqa: F401
+from .validate import validate  # noqa: F401


@app.command("link", no_args_is_help=True, deprecated=True, hidden=True)
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -41,6 +41,10 @@ from ..util import (
    run_command,
 )

+if TYPE_CHECKING:
+    from pathy import FluidPath  # noqa: F401
+
+
 SDIST_SUFFIX = ".tar.gz"
 WHEEL_SUFFIX = "-py3-none-any.whl"

--- a/spacy/cli/benchmark_speed.py
+++ b/spacy/cli/benchmark_speed.py
@ -13,7 +13,7 @@ from .. import util
 from ..language import Language
 from ..tokens import Doc
 from ..training import Corpus
-from ._util import Arg, Opt, benchmark_cli, import_code, setup_gpu
+from ._util import Arg, Opt, benchmark_cli, setup_gpu


@benchmark_cli.command(
@ -30,14 +30,12 @@ def benchmark_speed_cli(
    use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
    n_batches: int = Opt(50, "--batches", help="Minimum number of batches to benchmark", min=30,),
    warmup_epochs: int = Opt(3, "--warmup", "-w", min=0, help="Number of iterations over the data for warmup"),
-    code_path: Optional[Path] = Opt(None, "--code", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
    # fmt: on
 ):
    """
    Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark
    data in the binary .spacy format.
    """
-    import_code(code_path)
    setup_gpu(use_gpu=use_gpu, silent=False)

    nlp = util.load_model(model)
@ -173,5 +171,5 @@ def print_outliers(sample: numpy.ndarray):
 def warmup(
    nlp: Language, docs: List[Doc], warmup_epochs: int, batch_size: Optional[int]
 ) -> numpy.ndarray:
-    docs = [doc.copy() for doc in docs * warmup_epochs]
+    docs = warmup_epochs * docs
    return annotate(nlp, docs, batch_size)
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -170,7 +170,7 @@ def debug_model(
        msg.divider(f"STEP 3 - prediction")
        msg.info(str(prediction))

-    msg.good(f"Successfully ended analysis - model looks good.")
+    msg.good(f"Succesfully ended analysis - model looks good.")


 def _sentences():
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -1,6 +1,5 @@
 import sys
 from typing import Optional, Sequence
-from urllib.parse import urljoin

 import requests
 import typer
@ -8,14 +7,7 @@ from wasabi import msg

 from .. import about
 from ..errors import OLD_MODEL_SHORTCUTS
-from ..util import (
-    get_minor_version,
-    is_in_interactive,
-    is_in_jupyter,
-    is_package,
-    is_prerelease_version,
-    run_command,
-)
+from ..util import get_minor_version, is_package, is_prerelease_version, run_command
 from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app


@ -64,13 +56,6 @@ def download(
        )
        pip_args = pip_args + ("--no-deps",)
    if direct:
-        # Reject model names with '/', in order to prevent shenanigans.
-        if "/" in model:
-            msg.fail(
-                title="Model download rejected",
-                text=f"Cannot download model '{model}'. Models are expected to be file names, not URLs or fragments",
-                exits=True,
-            )
        components = model.split("-")
        model_name = "".join(components[:-1])
        version = components[-1]
@ -92,27 +77,6 @@ def download(
        "Download and installation successful",
        f"You can now load the package via spacy.load('{model_name}')",
    )
-    if is_in_jupyter():
-        reload_deps_msg = (
-            "If you are in a Jupyter or Colab notebook, you may need to "
-            "restart Python in order to load all the package's dependencies. "
-            "You can do this by selecting the 'Restart kernel' or 'Restart "
-            "runtime' option."
-        )
-        msg.warn(
-            "Restart to reload dependencies",
-            reload_deps_msg,
-        )
-    elif is_in_interactive():
-        reload_deps_msg = (
-            "If you are in an interactive Python session, you may need to "
-            "exit and restart Python to load all the package's dependencies. "
-            "You can exit with Ctrl-D (or Ctrl-Z and Enter on Windows)."
-        )
-        msg.warn(
-            "Restart to reload dependencies",
-            reload_deps_msg,
-        )


 def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
@ -161,16 +125,7 @@ def get_latest_version(model: str) -> str:
 def download_model(
    filename: str, user_pip_args: Optional[Sequence[str]] = None
 ) -> None:
-    # Construct the download URL carefully. We need to make sure we don't
-    # allow relative paths or other shenanigans to trick us into download
-    # from outside our own repo.
-    base_url = about.__download_url__
-    # urljoin requires that the path ends with /, or the last path part will be dropped
-    if not base_url.endswith("/"):
-        base_url = about.__download_url__ + "/"
-    download_url = urljoin(base_url, filename)
-    if not download_url.startswith(about.__download_url__):
-        raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
+    download_url = about.__download_url__ + "/" + filename
    pip_args = list(user_pip_args) if user_pip_args is not None else []
    cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
    run_command(cmd)
--- a/spacy/cli/find_threshold.py
+++ b/spacy/cli/find_threshold.py
@ -39,7 +39,7 @@ def find_threshold_cli(
    # fmt: on
 ):
    """
-    Runs prediction trials for a trained model with varying thresholds to maximize
+    Runs prediction trials for a trained model with varying tresholds to maximize
    the specified metric. The search space for the threshold is traversed linearly
    from 0 to 1 in `n_trials` steps. Results are displayed in a table on `stdout`
    (the corresponding API call to `spacy.cli.find_threshold.find_threshold()`
@ -81,7 +81,7 @@ def find_threshold(
    silent: bool = True,
 ) -> Tuple[float, float, Dict[float, float]]:
    """
-    Runs prediction trials for models with varying thresholds to maximize the specified metric.
+    Runs prediction trials for models with varying tresholds to maximize the specified metric.
    model (Union[str, Path]): Pipeline to evaluate. Can be a package or a path to a data directory.
    data_path (Path): Path to file with DocBin with docs to use for threshold search.
    pipe_name (str): Name of pipe to examine thresholds for.
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -1,7 +1,5 @@
-import os
 import re
 import shutil
-import subprocess
 import sys
 from collections import defaultdict
 from pathlib import Path
@ -13,7 +11,6 @@ from thinc.api import Config
 from wasabi import MarkdownRenderer, Printer, get_raw_input

 from .. import about, util
-from ..compat import importlib_metadata
 from ..schemas import ModelMetaSchema, validate
 from ._util import SDIST_SUFFIX, WHEEL_SUFFIX, Arg, Opt, app, string_to_list

@ -30,7 +27,6 @@ def package_cli(
    version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
    build: str = Opt("sdist", "--build", "-b", help="Comma-separated formats to build: sdist and/or wheel, or none."),
    force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
-    require_parent: bool = Opt(True, "--require-parent/--no-require-parent", "-R", "-R", help="Include the parent package (e.g. spacy) in the requirements"),
    # fmt: on
 ):
    """
@ -39,7 +35,7 @@ def package_cli(
    specified output directory, and the data will be copied over. If
    --create-meta is set and a meta.json already exists in the output directory,
    the existing values will be used as the defaults in the command-line prompt.
-    After packaging, "python -m build --sdist" is run in the package directory,
+    After packaging, "python setup.py sdist" is run in the package directory,
    which will create a .tar.gz archive that can be installed via "pip install".

    If additional code files are provided (e.g. Python files containing custom
@ -61,7 +57,6 @@ def package_cli(
        create_sdist=create_sdist,
        create_wheel=create_wheel,
        force=force,
-        require_parent=require_parent,
        silent=False,
    )

@ -76,7 +71,6 @@ def package(
    create_meta: bool = False,
    create_sdist: bool = True,
    create_wheel: bool = False,
-    require_parent: bool = False,
    force: bool = False,
    silent: bool = True,
 ) -> None:
@ -84,17 +78,9 @@ def package(
    input_path = util.ensure_path(input_dir)
    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
-    if create_wheel and not has_wheel() and not has_build():
-        err = (
-            "Generating wheels requires 'build' or 'wheel' (deprecated) to be installed"
-        )
-        msg.fail(err, "pip install build", exits=1)
-    if not has_build():
-        msg.warn(
-            "Generating packages without the 'build' package is deprecated and "
-            "will not be supported in the future. To install 'build': pip "
-            "install build"
-        )
+    if create_wheel and not has_wheel():
+        err = "Generating a binary .whl file requires wheel to be installed"
+        msg.fail(err, "pip install wheel", exits=1)
    if not input_path or not input_path.exists():
        msg.fail("Can't locate pipeline data", input_path, exits=1)
    if not output_path or not output_path.exists():
@ -116,7 +102,7 @@ def package(
    if not meta_path.exists() or not meta_path.is_file():
        msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
    meta = srsly.read_json(meta_path)
-    meta = get_meta(input_dir, meta, require_parent=require_parent)
+    meta = get_meta(input_dir, meta)
    if meta["requirements"]:
        msg.good(
            f"Including {len(meta['requirements'])} package requirement(s) from "
@ -189,7 +175,6 @@ def package(
        imports.append(code_path.stem)
        shutil.copy(str(code_path), str(package_path))
    create_file(main_path / "meta.json", srsly.json_dumps(meta, indent=2))
-
    create_file(main_path / "setup.py", TEMPLATE_SETUP)
    create_file(main_path / "MANIFEST.in", TEMPLATE_MANIFEST)
    init_py = TEMPLATE_INIT.format(
@ -199,37 +184,12 @@ def package(
    msg.good(f"Successfully created package directory '{model_name_v}'", main_path)
    if create_sdist:
        with util.working_dir(main_path):
-            # run directly, since util.run_command is not designed to continue
-            # after a command fails
-            ret = subprocess.run(
-                [sys.executable, "-m", "build", ".", "--sdist"],
-                env=os.environ.copy(),
-            )
-            if ret.returncode != 0:
-                msg.warn(
-                    "Creating sdist with 'python -m build' failed. Falling "
-                    "back to deprecated use of 'python setup.py sdist'"
-                )
-                util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
+            util.run_command([sys.executable, "setup.py", "sdist"], capture=False)
        zip_file = main_path / "dist" / f"{model_name_v}{SDIST_SUFFIX}"
        msg.good(f"Successfully created zipped Python package", zip_file)
    if create_wheel:
        with util.working_dir(main_path):
-            # run directly, since util.run_command is not designed to continue
-            # after a command fails
-            ret = subprocess.run(
-                [sys.executable, "-m", "build", ".", "--wheel"],
-                env=os.environ.copy(),
-            )
-            if ret.returncode != 0:
-                msg.warn(
-                    "Creating wheel with 'python -m build' failed. Falling "
-                    "back to deprecated use of 'wheel' with "
-                    "'python setup.py bdist_wheel'"
-                )
-                util.run_command(
-                    [sys.executable, "setup.py", "bdist_wheel"], capture=False
-                )
+            util.run_command([sys.executable, "setup.py", "bdist_wheel"], capture=False)
        wheel_name_squashed = re.sub("_+", "_", model_name_v)
        wheel = main_path / "dist" / f"{wheel_name_squashed}{WHEEL_SUFFIX}"
        msg.good(f"Successfully created binary wheel", wheel)
@ -249,17 +209,6 @@ def has_wheel() -> bool:
        return False


-def has_build() -> bool:
-    # it's very likely that there is a local directory named build/ (especially
-    # in an editable install), so an import check is not sufficient; instead
-    # check that there is a package version
-    try:
-        importlib_metadata.version("build")
-        return True
-    except importlib_metadata.PackageNotFoundError:  # type: ignore[attr-defined]
-        return False
-
-
 def get_third_party_dependencies(
    config: Config, exclude: List[str] = util.SimpleFrozenList()
 ) -> List[str]:
@ -306,8 +255,6 @@ def get_third_party_dependencies(
                modules.add(func_info["module"].split(".")[0])  # type: ignore[union-attr]
    dependencies = []
    for module_name in modules:
-        if module_name == about.__title__:
-            continue
        if module_name in distributions:
            dist = distributions.get(module_name)
            if dist:
@ -338,9 +285,7 @@ def create_file(file_path: Path, contents: str) -> None:


 def get_meta(
-    model_path: Union[str, Path],
-    existing_meta: Dict[str, Any],
-    require_parent: bool = False,
+    model_path: Union[str, Path], existing_meta: Dict[str, Any]
 ) -> Dict[str, Any]:
    meta: Dict[str, Any] = {
        "lang": "en",
@ -369,8 +314,6 @@ def get_meta(
    existing_reqs = [util.split_requirement(req)[0] for req in meta["requirements"]]
    reqs = get_third_party_dependencies(nlp.config, exclude=existing_reqs)
    meta["requirements"].extend(reqs)
-    if require_parent and about.__title__ not in meta["requirements"]:
-        meta["requirements"].append(about.__title__ + meta["spacy_version"])
    return meta


@ -545,11 +488,8 @@ def list_files(data_dir):


 def list_requirements(meta):
-    # Up to version 3.7, we included the parent package
-    # in requirements by default. This behaviour is removed
-    # in 3.8, with a setting to include the parent package in
-    # the requirements list in the meta if desired.
-    requirements = []
+    parent_package = meta.get('parent_package', 'spacy')
+    requirements = [parent_package + meta['spacy_version']]
    if 'setup_requires' in meta:
        requirements += meta['setup_requires']
    if 'requirements' in meta:
--- a/spacy/cli/project/init.py
+++ b/spacy/cli/project/init.py
--- a/spacy/cli/project/assets.py
+++ b/spacy/cli/project/assets.py
@ -1 +0,0 @@
-from weasel.cli.assets import *
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@ -1 +0,0 @@
-from weasel.cli.clone import *
--- a/spacy/cli/project/document.py
+++ b/spacy/cli/project/document.py
@ -1 +0,0 @@
-from weasel.cli.document import *
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@ -1 +0,0 @@
-from weasel.cli.dvc import *
--- a/spacy/cli/project/pull.py
+++ b/spacy/cli/project/pull.py
@ -1 +0,0 @@
-from weasel.cli.pull import *
--- a/spacy/cli/project/push.py
+++ b/spacy/cli/project/push.py
@ -1 +0,0 @@
-from weasel.cli.push import *
--- a/spacy/cli/project/remote_storage.py
+++ b/spacy/cli/project/remote_storage.py
@ -1 +0,0 @@
-from weasel.cli.remote_storage import *
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@ -1 +0,0 @@
-from weasel.cli.run import *
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@ -271,9 +271,8 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"

 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
-length = 262144
 ngram_size = 1
 no_output_layer = false

@ -309,9 +308,8 @@ grad_factor = 1.0
@layers = "reduce_mean.v1"

 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
-length = 262144
 ngram_size = 1
 no_output_layer = false

@ -544,15 +542,14 @@ nO = null
 width = ${components.tok2vec.model.encode.width}

 [components.textcat.model.linear_model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
-length = 262144
 ngram_size = 1
 no_output_layer = false

 {% else -%}
 [components.textcat.model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = true
 ngram_size = 1
 no_output_layer = false
@ -573,17 +570,15 @@ nO = null
 width = ${components.tok2vec.model.encode.width}

 [components.textcat_multilabel.model.linear_model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
-length = 262144
 ngram_size = 1
 no_output_layer = false

 {% else -%}
 [components.textcat_multilabel.model]
-@architectures = "spacy.TextCatBOW.v3"
+@architectures = "spacy.TextCatBOW.v2"
 exclusive_classes = false
-length = 262144
 ngram_size = 1
 no_output_layer = false
 {%- endif %}
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -142,25 +142,7 @@ class SpanRenderer:
        spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
        title (str / None): Document title set in Doc.user_data['title'].
        """
-        per_token_info = self._assemble_per_token_info(tokens, spans)
-        markup = self._render_markup(per_token_info)
-        markup = TPL_SPANS.format(content=markup, dir=self.direction)
-        if title:
-            markup = TPL_TITLE.format(title=title) + markup
-        return markup
-
-    @staticmethod
-    def _assemble_per_token_info(
-        tokens: List[str], spans: List[Dict[str, Any]]
-    ) -> List[Dict[str, List[Dict[str, Any]]]]:
-        """Assembles token info used to generate markup in render_spans().
-        tokens (List[str]): Tokens in text.
-        spans (List[Dict[str, Any]]): Spans in text.
-        RETURNS (List[Dict[str, List[Dict, str, Any]]]): Per token info needed to render HTML markup for given tokens
-            and spans.
-        """
-        per_token_info: List[Dict[str, List[Dict[str, Any]]]] = []
-
+        per_token_info = []
        # we must sort so that we can correctly describe when spans need to "stack"
        # which is determined by their start token, then span length (longer spans on top),
        # then break any remaining ties with the span label
@ -172,22 +154,21 @@ class SpanRenderer:
                s["label"],
            ),
        )
-
        for s in spans:
            # this is the vertical 'slot' that the span will be rendered in
            # vertical_position = span_label_offset + (offset_step * (slot - 1))
            s["render_slot"] = 0
-
        for idx, token in enumerate(tokens):
            # Identify if a token belongs to a Span (and which) and if it's a
            # start token of said Span. We'll use this for the final HTML render
            token_markup: Dict[str, Any] = {}
            token_markup["text"] = token
-            intersecting_spans: List[Dict[str, Any]] = []
+            concurrent_spans = 0
            entities = []
            for span in spans:
                ent = {}
                if span["start_token"] <= idx < span["end_token"]:
+                    concurrent_spans += 1
                    span_start = idx == span["start_token"]
                    ent["label"] = span["label"]
                    ent["is_start"] = span_start
@ -195,12 +176,7 @@ class SpanRenderer:
                        # When the span starts, we need to know how many other
                        # spans are on the 'span stack' and will be rendered.
                        # This value becomes the vertical render slot for this entire span
-                        span["render_slot"] = (
-                            intersecting_spans[-1]["render_slot"]
-                            if len(intersecting_spans)
-                            else 0
-                        ) + 1
-                    intersecting_spans.append(span)
+                        span["render_slot"] = concurrent_spans
                    ent["render_slot"] = span["render_slot"]
                    kb_id = span.get("kb_id", "")
                    kb_url = span.get("kb_url", "#")
@ -217,8 +193,11 @@ class SpanRenderer:
                    span["render_slot"] = 0
            token_markup["entities"] = entities
            per_token_info.append(token_markup)
-
-        return per_token_info
+        markup = self._render_markup(per_token_info)
+        markup = TPL_SPANS.format(content=markup, dir=self.direction)
+        if title:
+            markup = TPL_TITLE.format(title=title) + markup
+        return markup

    def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
        """Render the markup from per-token information"""
--- a/spacy/errors.py
+++ b/spacy/errors.py
@ -220,7 +220,6 @@ class Warnings(metaclass=ErrorsWithCodes):
            "key attribute for vectors, configure it through Vectors(attr=) or "
            "'spacy init vectors --attr'")
    W126 = ("These keys are unsupported: {unsupported}")
-    W127 = ("Not all `Language.pipe` worker processes completed successfully")


 class Errors(metaclass=ErrorsWithCodes):
@ -228,6 +227,7 @@ class Errors(metaclass=ErrorsWithCodes):
    E002 = ("Can't find factory for '{name}' for language {lang} ({lang_code}). "
            "This usually happens when spaCy calls `nlp.{method}` with a custom "
            "component name that's not registered on the current language class. "
+            "If you're using a Transformer, make sure to install 'spacy-transformers'. "
            "If you're using a custom component, make sure you've added the "
            "decorator `@Language.component` (for function components) or "
            "`@Language.factory` (for class components).\n\nAvailable "
@ -984,10 +984,6 @@ class Errors(metaclass=ErrorsWithCodes):
             "predicted docs when training {component}.")
    E1055 = ("The 'replace_listener' callback expects {num_params} parameters, "
             "but only callbacks with one or three parameters are supported")
-    E1056 = ("The `TextCatBOW` architecture expects a length of at least 1, was {length}.")
-    E1057 = ("The `TextCatReduce` architecture must be used with at least one "
-             "reduction. Please enable one of `use_reduce_first`, "
-             "`use_reduce_last`, `use_reduce_max` or `use_reduce_mean`.")


 # Deprecated model shortcuts, only used in errors and warnings
--- a/spacy/kb/init.py
+++ b/spacy/kb/init.py
@ -1,11 +1,3 @@
 from .candidate import Candidate, get_candidates, get_candidates_batch
 from .kb import KnowledgeBase
 from .kb_in_memory import InMemoryLookupKB
-
-__all__ = [
-    "Candidate",
-    "KnowledgeBase",
-    "InMemoryLookupKB",
-    "get_candidates",
-    "get_candidates_batch",
-]
--- a/spacy/lang/bo/init.py
+++ b/spacy/lang/bo/init.py
@ -1,16 +0,0 @@
-from ...language import BaseDefaults, Language
-from .lex_attrs import LEX_ATTRS
-from .stop_words import STOP_WORDS
-
-
-class TibetanDefaults(BaseDefaults):
-    lex_attr_getters = LEX_ATTRS
-    stop_words = STOP_WORDS
-
-
-class Tibetan(Language):
-    lang = "bo"
-    Defaults = TibetanDefaults
-
-
-__all__ = ["Tibetan"]
--- a/spacy/lang/bo/examples.py
+++ b/spacy/lang/bo/examples.py
@ -1,16 +0,0 @@
-"""
-Example sentences to test spaCy and its language models.
-
->>> from spacy.lang.bo.examples import sentences
->>> docs = nlp.pipe(sentences)
-"""
-
-
-sentences = [
-    "དོན་དུ་རྒྱ་མཚོ་བླ་མ་ཞེས་བྱ་ཞིང༌།",
-    "ཏཱ་ལའི་ཞེས་པ་ནི་སོག་སྐད་ཡིན་པ་དེ་བོད་སྐད་དུ་རྒྱ་མཚོའི་དོན་དུ་འཇུག",
-    "སོག་པོ་ཨལ་ཐན་རྒྱལ་པོས་རྒྱལ་དབང་བསོད་ནམས་རྒྱ་མཚོར་ཆེ་བསྟོད་ཀྱི་མཚན་གསོལ་བ་ཞིག་ཡིན་ཞིང༌།",
-    "རྗེས་སུ་རྒྱལ་བ་དགེ་འདུན་གྲུབ་དང༌། དགེ་འདུན་རྒྱ་མཚོ་སོ་སོར་ཡང་ཏཱ་ལའི་བླ་མའི་སྐུ་ཕྲེང་དང་པོ་དང༌།",
-    "གཉིས་པའི་མཚན་དེ་གསོལ་ཞིང༌།༸རྒྱལ་དབང་སྐུ་ཕྲེང་ལྔ་པས་དགའ་ལྡན་ཕོ་བྲང་གི་སྲིད་དབང་བཙུགས་པ་ནས་ཏཱ་ལའི་བླ་མ་ནི་བོད་ཀྱི་ཆོས་སྲིད་གཉིས་ཀྱི་དབུ་ཁྲིད་དུ་གྱུར་ཞིང་།",
-    "ད་ལྟའི་བར་ཏཱ་ལའི་བླ་མ་སྐུ་ཕྲེང་བཅུ་བཞི་བྱོན་ཡོད།",
-]
--- a/spacy/lang/bo/lex_attrs.py
+++ b/spacy/lang/bo/lex_attrs.py
@ -1,65 +0,0 @@
-from ...attrs import LIKE_NUM
-
-# reference 1: https://en.wikipedia.org/wiki/Tibetan_numerals
-
-_num_words = [
-    "ཀླད་ཀོར་",
-    "གཅིག་",
-    "གཉིས་",
-    "གསུམ་",
-    "བཞི་",
-    "ལྔ་",
-    "དྲུག་",
-    "བདུན་",
-    "བརྒྱད་",
-    "དགུ་",
-    "བཅུ་",
-    "བཅུ་གཅིག་",
-    "བཅུ་གཉིས་",
-    "བཅུ་གསུམ་",
-    "བཅུ་བཞི་",
-    "བཅུ་ལྔ་",
-    "བཅུ་དྲུག་",
-    "བཅུ་བདུན་",
-    "བཅུ་པརྒྱད",
-    "བཅུ་དགུ་",
-    "ཉི་ཤུ་",
-    "སུམ་ཅུ",
-    "བཞི་བཅུ",
-    "ལྔ་བཅུ",
-    "དྲུག་ཅུ",
-    "བདུན་ཅུ",
-    "བརྒྱད་ཅུ",
-    "དགུ་བཅུ",
-    "བརྒྱ་",
-    "སྟོང་",
-    "ཁྲི་",
-    "ས་ཡ་",
-    "	བྱེ་བ་",
-    "དུང་ཕྱུར་",
-    "ཐེར་འབུམ་",
-    "ཐེར་འབུམ་ཆེན་པོ་",
-    "ཁྲག་ཁྲིག་",
-    "ཁྲག་ཁྲིག་ཆེན་པོ་",
-]
-
-
-def like_num(text):
-    """
-    Check if text resembles a number
-    """
-    if text.startswith(("+", "-", "±", "~")):
-        text = text[1:]
-    text = text.replace(",", "").replace(".", "")
-    if text.isdigit():
-        return True
-    if text.count("/") == 1:
-        num, denom = text.split("/")
-        if num.isdigit() and denom.isdigit():
-            return True
-    if text in _num_words:
-        return True
-    return False
-
-
-LEX_ATTRS = {LIKE_NUM: like_num}
--- a/spacy/lang/bo/stop_words.py
+++ b/spacy/lang/bo/stop_words.py
@ -1,198 +0,0 @@
-# Source: https://zenodo.org/records/10148636
-
-STOP_WORDS = set(
-    """
-འི་
-།
-དུ་
-གིས་
-སོགས་
-ཏེ
-གི་
-རྣམས་
-ནི
-ཀུན་
-ཡི་
-འདི
-ཀྱི་
-སྙེད་
-པས་
-གཞན་
-ཀྱིས་
-ཡི
-ལ
-ནི་
-དང་
-སོགས
-ཅིང་
-ར
-དུ
-མི་
-སུ་
-བཅས་
-ཡོངས་
-ལས
-ཙམ་
-གྱིས་
-དེ་
-ཡང་
-མཐའ་དག་
-ཏུ་
-ཉིད་
-ས
-ཏེ་
-གྱི་
-སྤྱི
-དེ
-ཀ་
-ཡིན་
-ཞིང་
-འདི་
-རུང་
-རང་
-ཞིག་
-སྟེ
-སྟེ་
-ན་རེ
-ངམ
-ཤིང་
-དག་
-ཏོ
-རེ་
-འང་
-ཀྱང་
-ལགས་པ
-ཚུ
-དོ
-ཡིན་པ
-རེ
-ན་རེ་
-ཨེ་
-ཚང་མ
-ཐམས་ཅད་
-དམ་
-འོ་
-ཅིག་
-གྱིན་
-ཡིན
-ན
-ཁོ་ན་
-འམ་
-ཀྱིན་
-ལོ
-ཀྱིས
-བས་
-ལགས་
-ཤིག
-གིས
-ཀི་
-སྣ་ཚོགས་
-རྣམས
-སྙེད་པ
-ཡིས་
-གྱི
-གི
-བམ་
-ཤིག་
-རེ་རེ་
-ནམ
-མིན་
-ནམ་
-ངམ་
-རུ་
-འགའ་
-ཀུན
-ཤས་
-ཏུ
-ཡིས
-གིན་
-གམ་
-འོ
-ཡིན་པ་
-མིན
-ལགས
-གྱིས
-ཅང་
-འགའ
-སམ་
-ཞིག
-འང
-ལས་ཆེ་
-འཕྲལ་
-བར་
-རུ
-དང
-ཡ
-འག
-སམ
-ཀ
-ཅུང་ཟད་
-ཅིག
-ཉིད
-དུ་མ
-མ
-ཡིན་བ
-འམ
-མམ
-དམ
-དག
-ཁོ་ན
-ཀྱི
-ལམ
-ཕྱི་
-ནང་
-ཙམ
-ནོ་
-སོ་
-རམ་
-བོ་
-ཨང་
-ཕྱི
-ཏོ་
-ཚོ
-ལ་ལ་
-ཚོ་
-ཅིང
-མ་གི་
-གེ
-གོ
-ཡིན་ལུགས་
-རོ་
-བོ
-ལགས་པ་
-པས
-རབ་
-འི
-རམ
-བས
-གཞན
-སྙེད་པ་
-འབའ་
-མཾ་
-པོ
-ག་
-ག
-གམ
-སྤྱི་
-བམ
-མོ་
-ཙམ་པ་
-ཤ་སྟག་
-མམ་
-རེ་རེ
-སྙེད
-ཏམ་
-ངོ
-གྲང་
-ཏ་རེ
-ཏམ
-ཁ་
-ངེ་
-ཅོག་
-རིལ་
-ཉུང་ཤས་
-གིང་
-ཚ་
-ཀྱང
-""".split()
-)
--- a/spacy/lang/en/lex_attrs.py
+++ b/spacy/lang/en/lex_attrs.py
@ -6,8 +6,7 @@ _num_words = [
    "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
    "sixteen", "seventeen", "eighteen", "nineteen", "twenty", "thirty", "forty",
    "fifty", "sixty", "seventy", "eighty", "ninety", "hundred", "thousand",
-    "million", "billion", "trillion", "quadrillion", "quintillion", "sextillion",
-    "septillion", "octillion", "nonillion", "decillion", "gajillion", "bazillion"
+    "million", "billion", "trillion", "quadrillion", "gajillion", "bazillion"
 ]
 _ordinal_words = [
    "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth",
@ -15,8 +14,7 @@ _ordinal_words = [
    "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth",
    "twentieth", "thirtieth", "fortieth", "fiftieth", "sixtieth", "seventieth",
    "eightieth", "ninetieth", "hundredth", "thousandth", "millionth", "billionth",
-    "trillionth", "quadrillionth", "quintillionth", "sextillionth", "septillionth",
-    "octillionth", "nonillionth", "decillionth", "gajillionth", "bazillionth"
+    "trillionth", "quadrillionth", "gajillionth", "bazillionth"
 ]
 # fmt: on

--- a/spacy/lang/fo/init.py
+++ b/spacy/lang/fo/init.py
@ -1,18 +0,0 @@
-from ...language import BaseDefaults, Language
-from ..punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-
-
-class FaroeseDefaults(BaseDefaults):
-    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
-    infixes = TOKENIZER_INFIXES
-    suffixes = TOKENIZER_SUFFIXES
-    prefixes = TOKENIZER_PREFIXES
-
-
-class Faroese(Language):
-    lang = "fo"
-    Defaults = FaroeseDefaults
-
-
-__all__ = ["Faroese"]
--- a/spacy/lang/fo/tokenizer_exceptions.py
+++ b/spacy/lang/fo/tokenizer_exceptions.py
@ -1,90 +0,0 @@
-from ...symbols import ORTH
-from ...util import update_exc
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-
-_exc = {}
-
-for orth in [
-    "apr.",
-    "aug.",
-    "avgr.",
-    "árg.",
-    "ávís.",
-    "beinl.",
-    "blkv.",
-    "blaðkv.",
-    "blm.",
-    "blaðm.",
-    "bls.",
-    "blstj.",
-    "blaðstj.",
-    "des.",
-    "eint.",
-    "febr.",
-    "fyrrv.",
-    "góðk.",
-    "h.m.",
-    "innt.",
-    "jan.",
-    "kl.",
-    "m.a.",
-    "mðr.",
-    "mió.",
-    "nr.",
-    "nto.",
-    "nov.",
-    "nút.",
-    "o.a.",
-    "o.a.m.",
-    "o.a.tíl.",
-    "o.fl.",
-    "ff.",
-    "o.m.a.",
-    "o.o.",
-    "o.s.fr.",
-    "o.tíl.",
-    "o.ø.",
-    "okt.",
-    "omf.",
-    "pst.",
-    "ritstj.",
-    "sbr.",
-    "sms.",
-    "smst.",
-    "smb.",
-    "sb.",
-    "sbrt.",
-    "sp.",
-    "sept.",
-    "spf.",
-    "spsk.",
-    "t.e.",
-    "t.s.",
-    "t.s.s.",
-    "tlf.",
-    "tel.",
-    "tsk.",
-    "t.o.v.",
-    "t.d.",
-    "uml.",
-    "ums.",
-    "uppl.",
-    "upprfr.",
-    "uppr.",
-    "útg.",
-    "útl.",
-    "útr.",
-    "vanl.",
-    "v.",
-    "v.h.",
-    "v.ø.o.",
-    "viðm.",
-    "viðv.",
-    "vm.",
-    "v.m.",
-]:
-    _exc[orth] = [{ORTH: orth}]
-    capitalized = orth.capitalize()
-    _exc[capitalized] = [{ORTH: capitalized}]
-
-TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
--- a/spacy/lang/gd/init.py
+++ b/spacy/lang/gd/init.py
@ -1,18 +0,0 @@
-from typing import Optional
-
-from ...language import BaseDefaults, Language
-from .stop_words import STOP_WORDS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-
-
-class ScottishDefaults(BaseDefaults):
-    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
-    stop_words = STOP_WORDS
-
-
-class Scottish(Language):
-    lang = "gd"
-    Defaults = ScottishDefaults
-
-
-__all__ = ["Scottish"]
--- a/spacy/lang/gd/stop_words.py
+++ b/spacy/lang/gd/stop_words.py
@ -1,388 +0,0 @@
-STOP_WORDS = set(
-    """
-'ad
-'ar
-'d # iad
-'g # ag
-'ga
-'gam
-'gan
-'gar
-'gur
-'m # am
-'n # an
-'n seo
-'na
-'nad
-'nam
-'nan
-'nar
-'nuair
-'nur
-'s
-'sa
-'san
-'sann
-'se
-'sna
-a
-a'
-a'd # agad
-a'm # agam
-a-chèile
-a-seo
-a-sin
-a-siud
-a chionn
-a chionn 's
-a chèile
-a chéile
-a dh'
-a h-uile
-a seo
-ac' # aca
-aca
-aca-san
-acasan
-ach
-ag
-agad
-agad-sa
-agads'
-agadsa
-agaibh
-agaibhse
-againn
-againne
-agam
-agam-sa
-agams'
-agamsa
-agus
-aice
-aice-se
-aicese
-aig
-aig' # aige
-aige
-aige-san
-aigesan
-air
-air-san
-air neo
-airsan
-am
-an
-an seo
-an sin
-an siud
-an uair
-ann
-ann a
-ann a'
-ann a shin
-ann am
-ann an
-annad
-annam
-annam-s'
-annamsa
-anns
-anns an
-annta
-aon
-ar
-as
-asad
-asda
-asta
-b'
-bho
-bhon
-bhuaidhe # bhuaithe
-bhuainn
-bhuaipe
-bhuaithe
-bhuapa
-bhur
-brì
-bu
-c'à
-car son
-carson
-cha
-chan
-chionn
-choir
-chon
-chun
-chèile
-chéile
-chòir
-cia mheud
-ciamar
-co-dhiubh
-cuide
-cuin
-cuin'
-cuine
-cà
-cà'
-càil
-càit
-càit'
-càite
-cò
-cò mheud
-có
-d'
-da
-de
-dh'
-dha
-dhaibh
-dhaibh-san
-dhaibhsan
-dhan
-dhasan
-dhe
-dhen
-dheth
-dhi
-dhiom
-dhiot
-dhith
-dhiubh
-dhomh
-dhomh-s'
-dhomhsa
-dhu'sa # dhut-sa
-dhuibh
-dhuibhse
-dhuinn
-dhuinne
-dhuit
-dhut
-dhutsa
-dhut-sa
-dhà
-dhà-san
-dhàsan
-dhòmhsa
-diubh
-do
-docha
-don
-dà
-dè
-dè mar
-dé
-dé mar
-dòch'
-dòcha
-e
-eadar
-eatarra
-eatorra
-eile
-esan
-fa
-far
-feud
-fhad
-fheudar
-fhearr
-fhein
-fheudar
-fheàrr
-fhèin
-fhéin
-fhìn
-fo
-fodha
-fodhainn
-foipe
-fon
-fèin
-ga
-gach
-gam
-gan
-ge brith
-ged
-gu
-gu dè
-gu ruige
-gun
-gur
-gus
-i
-iad
-iadsan
-innte
-is
-ise
-le
-leam
-leam-sa
-leamsa
-leat
-leat-sa
-leatha
-leatsa
-leibh
-leis
-leis-san
-leoth'
-leotha
-leotha-san
-linn
-m'
-m'a
-ma
-mac
-man
-mar
-mas
-mathaid
-mi
-mis'
-mise
-mo
-mu
-mu 'n
-mun
-mur
-mura
-mus
-na
-na b'
-na bu
-na iad
-nach
-nad
-nam
-nan
-nar
-nas
-neo
-no
-nuair
-o
-o'n
-oir
-oirbh
-oirbh-se
-oirnn
-oirnne
-oirre
-on
-orm
-orm-sa
-ormsa
-orra
-orra-san
-orrasan
-ort
-os
-r'
-ri
-ribh
-rinn
-ris
-rithe
-rithe-se
-rium
-rium-sa
-riums'
-riumsa
-riut
-riuth'
-riutha
-riuthasan
-ro
-ro'n
-roimh
-roimhe
-romhainn
-romham
-romhpa
-ron
-ruibh
-ruinn
-ruinne
-sa
-san
-sann
-se
-seach
-seo
-seothach
-shin
-sibh
-sibh-se
-sibhse
-sin
-sineach
-sinn
-sinne
-siod
-siodach
-siud
-siudach
-sna # ann an
-sè
-t'
-tarsaing
-tarsainn
-tarsuinn
-thar
-thoigh
-thro
-thu
-thuc'
-thuca
-thugad
-thugaibh
-thugainn
-thugam
-thugamsa
-thuice
-thuige
-thus'
-thusa
-timcheall
-toigh
-toil
-tro
-tro' # troimh
-troimh
-troimhe
-tron
-tu
-tusa
-uair
-ud
-ugaibh
-ugam-s'
-ugam-sa
-uice
-uige
-uige-san
-umad
-unnta # ann an
-ur
-urrainn
-à
-às
-àsan
-á
-ás
-è
-ì
-ò
-ó
-""".split(
-        "\n"
-    )
-)
--- a/spacy/lang/gd/tokenizer_exceptions.py
+++ b/spacy/lang/gd/tokenizer_exceptions.py
--- a/spacy/lang/hr/lemma_lookup_license.txt
+++ b/spacy/lang/hr/lemma_lookup_license.txt
@ -1,5 +1,5 @@
 The list of Croatian lemmas was extracted from the reldi-tagger repository (https://github.com/clarinsi/reldi-tagger).
-Reldi-tagger is licensed under the Apache 2.0 licence.
+Reldi-tagger is licesned under the Apache 2.0 licence.

@InProceedings{ljubesic16-new,
  author = {Nikola Ljubešić and Filip Klubička and Željko Agić and Ivo-Pavao Jazbec},
@ -12,4 +12,4 @@ Reldi-tagger is licensed under the Apache 2.0 licence.
  publisher = {European Language Resources Association (ELRA)},
  address = {Paris, France},
  isbn = {978-2-9517408-9-1}
- }
+ }
--- a/spacy/lang/ht/init.py
+++ b/spacy/lang/ht/init.py
@ -1,52 +0,0 @@
-from typing import Callable, Optional
-
-from thinc.api import Model
-
-from ...language import BaseDefaults, Language
-from .lemmatizer import HaitianCreoleLemmatizer
-from .lex_attrs import LEX_ATTRS
-from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES, TOKENIZER_SUFFIXES
-from .stop_words import STOP_WORDS
-from .syntax_iterators import SYNTAX_ITERATORS
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-from .tag_map import TAG_MAP
-
-
-class HaitianCreoleDefaults(BaseDefaults):
-    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
-    prefixes = TOKENIZER_PREFIXES
-    infixes = TOKENIZER_INFIXES
-    suffixes = TOKENIZER_SUFFIXES
-    lex_attr_getters = LEX_ATTRS
-    syntax_iterators = SYNTAX_ITERATORS
-    stop_words = STOP_WORDS
-    tag_map = TAG_MAP
-
-class HaitianCreole(Language):
-    lang = "ht"
-    Defaults = HaitianCreoleDefaults
-
-@HaitianCreole.factory(
-    "lemmatizer",
-    assigns=["token.lemma"],
-    default_config={
-        "model": None,
-        "mode": "rule",
-        "overwrite": False,
-        "scorer": {"@scorers": "spacy.lemmatizer_scorer.v1"},
-    },
-    default_score_weights={"lemma_acc": 1.0},
-)
-def make_lemmatizer(
-    nlp: Language,
-    model: Optional[Model],
-    name: str,
-    mode: str,
-    overwrite: bool,
-    scorer: Optional[Callable],
-):
-    return HaitianCreoleLemmatizer(
-        nlp.vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer
-    )
-
-__all__ = ["HaitianCreole"]
--- a/spacy/lang/ht/examples.py
+++ b/spacy/lang/ht/examples.py
@ -1,18 +0,0 @@
-"""
-Example sentences to test spaCy and its language models.
-
->>> from spacy.lang.ht.examples import sentences
->>> docs = nlp.pipe(sentences)
-"""
-
-
-sentences = [
-    "Apple ap panse achte yon demaraj nan Wayòm Ini pou $1 milya dola",
-    "Machin otonòm fè responsablite asirans lan ale sou men fabrikan yo",
-    "San Francisco ap konsidere entèdi robo ki livre sou twotwa yo",
-    "Lond se yon gwo vil nan Wayòm Ini",
-    "Kote ou ye?",
-    "Kilès ki prezidan Lafrans?",
-    "Ki kapital Etazini?",
-    "Kile Barack Obama te fèt?",
-]
--- a/spacy/lang/ht/lemmatizer.py
+++ b/spacy/lang/ht/lemmatizer.py
@ -1,51 +0,0 @@
-from typing import List, Tuple
-
-from ...pipeline import Lemmatizer
-from ...tokens import Token
-from ...lookups import Lookups
-
-
-class HaitianCreoleLemmatizer(Lemmatizer):
-    """
-    Minimal Haitian Creole lemmatizer.
-    Returns a word's base form based on rules and lookup,
-    or defaults to the original form.
-    """
-
-    def is_base_form(self, token: Token) -> bool:
-        morph = token.morph.to_dict()
-        upos = token.pos_.lower()
-
-        # Consider unmarked forms to be base
-        if upos in {"noun", "verb", "adj", "adv"}:
-            if not morph:
-                return True
-            if upos == "noun" and morph.get("Number") == "Sing":
-                return True
-            if upos == "verb" and morph.get("VerbForm") == "Inf":
-                return True
-            if upos == "adj" and morph.get("Degree") == "Pos":
-                return True
-        return False
-
-    def rule_lemmatize(self, token: Token) -> List[str]:
-        string = token.text.lower()
-        pos = token.pos_.lower()
-        cache_key = (token.orth, token.pos)
-        if cache_key in self.cache:
-            return self.cache[cache_key]
-
-        forms = []
-
-        # fallback rule: just return lowercased form
-        forms.append(string)
-
-        self.cache[cache_key] = forms
-        return forms
-
-    @classmethod
-    def get_lookups_config(cls, mode: str) -> Tuple[List[str], List[str]]:
-        if mode == "rule":
-            required = ["lemma_lookup", "lemma_rules", "lemma_exc", "lemma_index"]
-            return (required, [])
-        return super().get_lookups_config(mode)
--- a/spacy/lang/ht/lex_attrs.py
+++ b/spacy/lang/ht/lex_attrs.py
@ -1,78 +0,0 @@
-from ...attrs import LIKE_NUM, NORM
-
-# Cardinal numbers in Creole
-_num_words = set(
-    """
-zewo youn en de twa kat senk sis sèt uit nèf dis
-onz douz trèz katoz kenz sèz disèt dizwit diznèf
-vent trant karant sinkant swasant swasann-dis
-san mil milyon milya
-""".split()
-)
-
-# Ordinal numbers in Creole (some are French-influenced, some simplified)
-_ordinal_words = set(
-    """
-premye dezyèm twazyèm katryèm senkyèm sizyèm sètvyèm uitvyèm nèvyèm dizyèm
-onzèm douzyèm trèzyèm katozyèm kenzèm sèzyèm disetyèm dizwityèm diznèvyèm
-ventyèm trantyèm karantyèm sinkantyèm swasantyèm
-swasann-disyèm santyèm milyèm milyonnyèm milyadyèm
-""".split()
-)
-
-NORM_MAP = {
-    "'m": "mwen",
-    "'w": "ou",
-    "'l": "li",
-    "'n": "nou",
-    "'y": "yo",
-    "’m": "mwen",
-    "’w": "ou",
-    "’l": "li",
-    "’n": "nou",
-    "’y": "yo",
-    "m": "mwen",
-    "n": "nou",
-    "l": "li",
-    "y": "yo",
-    "w": "ou",
-    "t": "te",
-    "k": "ki",
-    "p": "pa",
-    "M": "Mwen",
-    "N": "Nou",
-    "L": "Li",
-    "Y": "Yo",
-    "W": "Ou",
-    "T": "Te",
-    "K": "Ki",
-    "P": "Pa",
-}
-
-def like_num(text):
-    text = text.strip().lower()
-    if text.startswith(("+", "-", "±", "~")):
-        text = text[1:]
-    text = text.replace(",", "").replace(".", "")
-    if text.isdigit():
-        return True
-    if text.count("/") == 1:
-        num, denom = text.split("/")
-        if num.isdigit() and denom.isdigit():
-            return True
-    if text in _num_words:
-        return True
-    if text in _ordinal_words:
-        return True
-    # Handle things like "3yèm", "10yèm", "25yèm", etc.
-    if text.endswith("yèm") and text[:-3].isdigit():
-        return True
-    return False
-
-def norm_custom(text):
-    return NORM_MAP.get(text, text.lower())
-
-LEX_ATTRS = {
-    LIKE_NUM: like_num,
-    NORM: norm_custom,
-}
--- a/spacy/lang/ht/punctuation.py
+++ b/spacy/lang/ht/punctuation.py
@ -1,43 +0,0 @@
-from ..char_classes import (
-    ALPHA,
-    ALPHA_LOWER,
-    ALPHA_UPPER,
-    CONCAT_QUOTES,
-    HYPHENS,
-    LIST_PUNCT,
-    LIST_QUOTES,
-    LIST_ELLIPSES,
-    LIST_ICONS,
-    merge_chars,
-)
-
-ELISION = "'’".replace(" ", "")
-
-_prefixes_elision = "m n l y t k w"
-_prefixes_elision += " " + _prefixes_elision.upper()
-
-TOKENIZER_PREFIXES = LIST_PUNCT + LIST_QUOTES + [
-    r"(?:({pe})[{el}])(?=[{a}])".format(
-        a=ALPHA, el=ELISION, pe=merge_chars(_prefixes_elision)
-    )
-]
-
-TOKENIZER_SUFFIXES = LIST_PUNCT + LIST_QUOTES + LIST_ELLIPSES + [
-    r"(?<=[0-9])%",  # numbers like 10%
-    r"(?<=[0-9])(?:{h})".format(h=HYPHENS),  # hyphens after numbers
-    r"(?<=[{a}])['’]".format(a=ALPHA),  # apostrophes after letters
-    r"(?<=[{a}])['’][mwlnytk](?=\s|$)".format(a=ALPHA),  # contractions
-    r"(?<=[{a}0-9])\)",  # right parenthesis after letter/number
-    r"(?<=[{a}])\.(?=\s|$)".format(a=ALPHA),  # period after letter if space or end of string
-    r"(?<=\))[\.\?!]",  # punctuation immediately after right parenthesis
-]
-
-TOKENIZER_INFIXES = LIST_ELLIPSES + LIST_ICONS + [
-    r"(?<=[0-9])[+\-\*^](?=[0-9-])",
-    r"(?<=[{al}{q}])\.(?=[{au}{q}])".format(
-        al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
-    ),
-    r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-    r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
-    r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION),
-]
--- a/spacy/lang/ht/stop_words.py
+++ b/spacy/lang/ht/stop_words.py
@ -1,50 +0,0 @@
-STOP_WORDS = set(
-    """
-a ak an ankò ant apre ap atò avan avanlè
-byen bò byenke
-
-chak
-
-de depi deja deja
-
-e en epi èske
-
-fò fòk
-
-gen genyen
-
-ki kisa kilès kote koukou konsa konbyen konn konnen kounye kouman
-
-la l laa le lè li lye lò
-
-m m' mwen
-
-nan nap nou n'
-
-ou oumenm
-
-pa paske pami pandan pito pou pral preske pwiske
-
-se selman si sou sòt
-
-ta tap tankou te toujou tou tan tout toutotan twòp tèl
-
-w w' wi wè
-
-y y' yo yon yonn
-
-non o oh eh
-
-sa san si swa si
-
-men mèsi oswa osinon
-
-"""
-.split()
-)
-
-# Add common contractions, with and without apostrophe variants
-contractions = ["m'", "n'", "w'", "y'", "l'", "t'", "k'"]
-for apostrophe in ["'", "’", "‘"]:
-    for word in contractions:
-        STOP_WORDS.add(word.replace("'", apostrophe))
--- a/spacy/lang/ht/syntax_iterators.py
+++ b/spacy/lang/ht/syntax_iterators.py
@ -1,74 +0,0 @@
-from typing import Iterator, Tuple, Union
-
-from ...errors import Errors
-from ...symbols import NOUN, PRON, PROPN
-from ...tokens import Doc, Span
-
-
-def noun_chunks(doclike: Union[Doc, Span]) -> Iterator[Tuple[int, int, int]]:
-    """
-    Detect base noun phrases from a dependency parse for Haitian Creole.
-    Works on both Doc and Span objects.
-    """
-
-    # Core nominal dependencies common in Haitian Creole
-    labels = [
-        "nsubj",
-        "obj",
-        "obl",
-        "nmod",
-        "appos",
-        "ROOT",
-    ]
-
-    # Modifiers to optionally include in chunk (to the right)
-    post_modifiers = ["compound", "flat", "flat:name", "fixed"]
-
-    doc = doclike.doc
-    if not doc.has_annotation("DEP"):
-        raise ValueError(Errors.E029)
-
-    np_deps = {doc.vocab.strings.add(label) for label in labels}
-    np_mods = {doc.vocab.strings.add(mod) for mod in post_modifiers}
-    conj_label = doc.vocab.strings.add("conj")
-    np_label = doc.vocab.strings.add("NP")
-    adp_pos = doc.vocab.strings.add("ADP")
-    cc_pos = doc.vocab.strings.add("CCONJ")
-
-    prev_end = -1
-    for i, word in enumerate(doclike):
-        if word.pos not in (NOUN, PROPN, PRON):
-            continue
-        if word.left_edge.i <= prev_end:
-            continue
-
-        if word.dep in np_deps:
-            right_end = word
-            # expand to include known modifiers to the right
-            for child in word.rights:
-                if child.dep in np_mods:
-                    right_end = child.right_edge
-                elif child.pos == NOUN:
-                    right_end = child.right_edge
-
-            left_index = word.left_edge.i
-            # Skip prepositions at the start
-            if word.left_edge.pos == adp_pos:
-                left_index += 1
-
-            prev_end = right_end.i
-            yield left_index, right_end.i + 1, np_label
-
-        elif word.dep == conj_label:
-            head = word.head
-            while head.dep == conj_label and head.head.i < head.i:
-                head = head.head
-            if head.dep in np_deps:
-                left_index = word.left_edge.i
-                if word.left_edge.pos == cc_pos:
-                    left_index += 1
-                prev_end = word.i
-                yield left_index, word.i + 1, np_label
-
-
-SYNTAX_ITERATORS = {"noun_chunks": noun_chunks}
--- a/spacy/lang/ht/tag_map.py
+++ b/spacy/lang/ht/tag_map.py
@ -1,21 +0,0 @@
-from spacy.symbols import NOUN, VERB, AUX, ADJ, ADV, PRON, DET, ADP, SCONJ, CCONJ, PART, INTJ, NUM, PROPN, PUNCT, SYM, X
-
-TAG_MAP = {
-    "NOUN": {"pos": NOUN},
-    "VERB": {"pos": VERB},
-    "AUX": {"pos": AUX},
-    "ADJ": {"pos": ADJ},
-    "ADV": {"pos": ADV},
-    "PRON": {"pos": PRON},
-    "DET": {"pos": DET},
-    "ADP": {"pos": ADP},
-    "SCONJ": {"pos": SCONJ},
-    "CCONJ": {"pos": CCONJ},
-    "PART": {"pos": PART},
-    "INTJ": {"pos": INTJ},
-    "NUM": {"pos": NUM},
-    "PROPN": {"pos": PROPN},
-    "PUNCT": {"pos": PUNCT},
-    "SYM": {"pos": SYM},
-    "X": {"pos": X},
-}
--- a/spacy/lang/ht/tokenizer_exceptions.py
+++ b/spacy/lang/ht/tokenizer_exceptions.py
@ -1,121 +0,0 @@
-from spacy.symbols import ORTH, NORM
-
-def make_variants(base, first_norm, second_orth, second_norm):
-    return {
-        base: [
-            {ORTH: base.split("'")[0] + "'", NORM: first_norm},
-            {ORTH: second_orth, NORM: second_norm},
-        ],
-        base.capitalize(): [
-            {ORTH: base.split("'")[0].capitalize() + "'", NORM: first_norm.capitalize()},
-            {ORTH: second_orth, NORM: second_norm},
-        ]
-    }
-
-TOKENIZER_EXCEPTIONS = {
-    "Dr.": [{ORTH: "Dr."}]
-}
-
-# Apostrophe forms
-TOKENIZER_EXCEPTIONS.update(make_variants("m'ap", "mwen", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("n'ap", "nou", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("l'ap", "li", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("y'ap", "yo", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("m'te", "mwen", "te", "te"))
-TOKENIZER_EXCEPTIONS.update(make_variants("m'pral", "mwen", "pral", "pral"))
-TOKENIZER_EXCEPTIONS.update(make_variants("w'ap", "ou", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("k'ap", "ki", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("p'ap", "pa", "ap", "ap"))
-TOKENIZER_EXCEPTIONS.update(make_variants("t'ap", "te", "ap", "ap"))
-
-# Non-apostrophe contractions (with capitalized variants)
-TOKENIZER_EXCEPTIONS.update({
-    "map": [
-        {ORTH: "m", NORM: "mwen"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Map": [
-        {ORTH: "M", NORM: "Mwen"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "lem": [
-        {ORTH: "le", NORM: "le"},
-        {ORTH: "m", NORM: "mwen"},
-    ],
-    "Lem": [
-        {ORTH: "Le", NORM: "Le"},
-        {ORTH: "m", NORM: "mwen"},
-    ],
-    "lew": [
-        {ORTH: "le", NORM: "le"},
-        {ORTH: "w", NORM: "ou"},
-    ],
-    "Lew": [
-        {ORTH: "Le", NORM: "Le"},
-        {ORTH: "w", NORM: "ou"},
-    ],
-    "nap": [
-        {ORTH: "n", NORM: "nou"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Nap": [
-        {ORTH: "N", NORM: "Nou"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "lap": [
-        {ORTH: "l", NORM: "li"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Lap": [
-        {ORTH: "L", NORM: "Li"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "yap": [
-        {ORTH: "y", NORM: "yo"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Yap": [
-        {ORTH: "Y", NORM: "Yo"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "mte": [
-        {ORTH: "m", NORM: "mwen"},
-        {ORTH: "te", NORM: "te"},
-    ],
-    "Mte": [
-        {ORTH: "M", NORM: "Mwen"},
-        {ORTH: "te", NORM: "te"},
-    ],
-    "mpral": [
-        {ORTH: "m", NORM: "mwen"},
-        {ORTH: "pral", NORM: "pral"},
-    ],
-    "Mpral": [
-        {ORTH: "M", NORM: "Mwen"},
-        {ORTH: "pral", NORM: "pral"},
-    ],
-    "wap": [
-        {ORTH: "w", NORM: "ou"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Wap": [
-        {ORTH: "W", NORM: "Ou"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "kap": [
-        {ORTH: "k", NORM: "ki"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Kap": [
-        {ORTH: "K", NORM: "Ki"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "tap": [
-        {ORTH: "t", NORM: "te"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-    "Tap": [
-        {ORTH: "T", NORM: "Te"},
-        {ORTH: "ap", NORM: "ap"},
-    ],
-})
--- a/spacy/lang/ja/init.py
+++ b/spacy/lang/ja/init.py
@ -32,6 +32,7 @@ split_mode = null
 """


+@registry.tokenizers("spacy.ja.JapaneseTokenizer")
 def create_tokenizer(split_mode: Optional[str] = None):
    def japanese_tokenizer_factory(nlp):
        return JapaneseTokenizer(nlp.vocab, split_mode=split_mode)
--- a/spacy/lang/kmr/init.py
+++ b/spacy/lang/kmr/init.py
@ -1,16 +0,0 @@
-from ...language import BaseDefaults, Language
-from .lex_attrs import LEX_ATTRS
-from .stop_words import STOP_WORDS
-
-
-class KurmanjiDefaults(BaseDefaults):
-    stop_words = STOP_WORDS
-    lex_attr_getters = LEX_ATTRS
-
-
-class Kurmanji(Language):
-    lang = "kmr"
-    Defaults = KurmanjiDefaults
-
-
-__all__ = ["Kurmanji"]
--- a/spacy/lang/kmr/examples.py
+++ b/spacy/lang/kmr/examples.py
@ -1,17 +0,0 @@
-"""
-Example sentences to test spaCy and its language models.
-
->>> from spacy.lang.kmr.examples import sentences
->>> docs = nlp.pipe(sentences)
-"""
-
-sentences = [
-    "Berê mirovan her tim li geşedana pêşerojê ye",  # People's gaze is always on the development of the future
-    "Kawa Nemir di 14 salan de Ulysses wergerand Kurmancî.",  # Kawa Nemir translated Ulysses into Kurmanji in 14 years.
-    "Mem Ararat hunermendekî Kurd yê bi nav û deng e.",  # Mem Ararat is a famous Kurdish artist
-    "Firat Cewerî 40 sal e pirtûkên Kurdî dinivîsîne.",  # Firat Ceweri has been writing Kurdish books for 40 years
-    "Rojnamegerê ciwan nûçeyeke balkêş li ser rewşa aborî nivîsand",  # The young journalist wrote an interesting news article about the economic situation
-    "Sektora çandiniyê beşeke giring a belavkirina gaza serayê li seranserê cîhanê pêk tîne",  # The agricultural sector constitutes an important part of greenhouse gas emissions worldwide
-    "Xwendekarên jêhatî di pêşbaziya matematîkê de serkeftî bûn",  # Talented students succeeded in the mathematics competition
-    "Ji ber ji tunebûnê bavê min xwişkeke min nedan xwendin ew ji min re bû derd û kulek.",  # Because of poverty, my father didn't send my sister to school, which became a pain and sorrow for me
-]
--- a/spacy/lang/kmr/lex_attrs.py
+++ b/spacy/lang/kmr/lex_attrs.py
@ -1,138 +0,0 @@
-from ...attrs import LIKE_NUM
-
-_num_words = [
-    "sifir",
-    "yek",
-    "du",
-    "sê",
-    "çar",
-    "pênc",
-    "şeş",
-    "heft",
-    "heşt",
-    "neh",
-    "deh",
-    "yazde",
-    "dazde",
-    "sêzde",
-    "çarde",
-    "pazde",
-    "şazde",
-    "hevde",
-    "hejde",
-    "nozde",
-    "bîst",
-    "sî",
-    "çil",
-    "pêncî",
-    "şêst",
-    "heftê",
-    "heştê",
-    "nod",
-    "sed",
-    "hezar",
-    "milyon",
-    "milyar",
-]
-
-_ordinal_words = [
-    "yekem",
-    "yekemîn",
-    "duyem",
-    "duyemîn",
-    "sêyem",
-    "sêyemîn",
-    "çarem",
-    "çaremîn",
-    "pêncem",
-    "pêncemîn",
-    "şeşem",
-    "şeşemîn",
-    "heftem",
-    "heftemîn",
-    "heştem",
-    "heştemîn",
-    "nehem",
-    "nehemîn",
-    "dehem",
-    "dehemîn",
-    "yazdehem",
-    "yazdehemîn",
-    "dazdehem",
-    "dazdehemîn",
-    "sêzdehem",
-    "sêzdehemîn",
-    "çardehem",
-    "çardehemîn",
-    "pazdehem",
-    "pazdehemîn",
-    "şanzdehem",
-    "şanzdehemîn",
-    "hevdehem",
-    "hevdehemîn",
-    "hejdehem",
-    "hejdehemîn",
-    "nozdehem",
-    "nozdehemîn",
-    "bîstem",
-    "bîstemîn",
-    "sîyem",
-    "sîyemîn",
-    "çilem",
-    "çilemîn",
-    "pêncîyem",
-    "pênciyemîn",
-    "şêstem",
-    "şêstemîn",
-    "heftêyem",
-    "heftêyemîn",
-    "heştêyem",
-    "heştêyemîn",
-    "notem",
-    "notemîn",
-    "sedem",
-    "sedemîn",
-    "hezarem",
-    "hezaremîn",
-    "milyonem",
-    "milyonemîn",
-    "milyarem",
-    "milyaremîn",
-]
-
-
-def like_num(text):
-    if text.startswith(("+", "-", "±", "~")):
-        text = text[1:]
-    text = text.replace(",", "").replace(".", "")
-    if text.isdigit():
-        return True
-    if text.count("/") == 1:
-        num, denom = text.split("/")
-        if num.isdigit() and denom.isdigit():
-            return True
-    text_lower = text.lower()
-    if text_lower in _num_words:
-        return True
-
-    # Check ordinal number
-    if text_lower in _ordinal_words:
-        return True
-
-    if is_digit(text_lower):
-        return True
-
-    return False
-
-
-def is_digit(text):
-    endings = ("em", "yem", "emîn", "yemîn")
-    for ending in endings:
-        to = len(ending)
-        if text.endswith(ending) and text[:-to].isdigit():
-            return True
-
-    return False
-
-
-LEX_ATTRS = {LIKE_NUM: like_num}
--- a/spacy/lang/kmr/stop_words.py
+++ b/spacy/lang/kmr/stop_words.py
@ -1,44 +0,0 @@
-STOP_WORDS = set(
-    """
-û
-li
-bi
-di
-da
-de
-ji
-ku
-ew
-ez
-tu
-em
-hûn
-ew
-ev
-min
-te
-wî
-wê
-me
-we
-wan
-vê
-vî
-va
-çi
-kî
-kê
-çawa
-çima
-kengî
-li ku
-çend
-çiqas
-her
-hin
-gelek
-hemû
-kes
-tişt
-""".split()
-)
--- a/spacy/lang/ko/init.py
+++ b/spacy/lang/ko/init.py
@ -20,6 +20,7 @@ DEFAULT_CONFIG = """
 """


+@registry.tokenizers("spacy.ko.KoreanTokenizer")
 def create_tokenizer():
    def korean_tokenizer_factory(nlp):
        return KoreanTokenizer(nlp.vocab)
--- a/spacy/lang/mk/init.py
+++ b/spacy/lang/mk/init.py
@ -24,6 +24,12 @@ class MacedonianDefaults(BaseDefaults):
    tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
    stop_words = STOP_WORDS

+    @classmethod
+    def create_lemmatizer(cls, nlp=None, lookups=None):
+        if lookups is None:
+            lookups = Lookups()
+        return MacedonianLemmatizer(lookups)
+

 class Macedonian(Language):
    lang = "mk"
--- a/spacy/lang/nn/init.py
+++ b/spacy/lang/nn/init.py
@ -1,20 +0,0 @@
-from ...language import BaseDefaults, Language
-from ..nb import SYNTAX_ITERATORS
-from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
-from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
-
-
-class NorwegianNynorskDefaults(BaseDefaults):
-    tokenizer_exceptions = TOKENIZER_EXCEPTIONS
-    prefixes = TOKENIZER_PREFIXES
-    infixes = TOKENIZER_INFIXES
-    suffixes = TOKENIZER_SUFFIXES
-    syntax_iterators = SYNTAX_ITERATORS
-
-
-class NorwegianNynorsk(Language):
-    lang = "nn"
-    Defaults = NorwegianNynorskDefaults
-
-
-__all__ = ["NorwegianNynorsk"]
--- a/spacy/lang/nn/examples.py
+++ b/spacy/lang/nn/examples.py
@ -1,15 +0,0 @@
-"""
-Example sentences to test spaCy and its language models.
-
->>> from spacy.lang.nn.examples import sentences
->>> docs = nlp.pipe(sentences)
-"""
-
-
-# sentences taken from Omsetjingsminne frå Nynorsk pressekontor 2022 (https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-80/)
-sentences = [
-    "Konseptet går ut på at alle tre omgangar tel, alle hopparar må stille i kvalifiseringa og poengsummen skal telje.",
-    "Det er ein meir enn i same periode i fjor.",
-    "Det har lava ned enorme snømengder i store delar av Europa den siste tida.",
-    "Akhtar Chaudhry er ikkje innstilt på Oslo-lista til SV, men utfordrar Heikki Holmås om førsteplassen.",
-]
--- a/spacy/lang/nn/punctuation.py
+++ b/spacy/lang/nn/punctuation.py
@ -1,74 +0,0 @@
-from ..char_classes import (
-    ALPHA,
-    ALPHA_LOWER,
-    ALPHA_UPPER,
-    CONCAT_QUOTES,
-    CURRENCY,
-    LIST_CURRENCY,
-    LIST_ELLIPSES,
-    LIST_ICONS,
-    LIST_PUNCT,
-    LIST_QUOTES,
-    PUNCT,
-    UNITS,
-)
-from ..punctuation import TOKENIZER_SUFFIXES
-
-_quotes = CONCAT_QUOTES.replace("'", "")
-_list_punct = [x for x in LIST_PUNCT if x != "#"]
-_list_icons = [x for x in LIST_ICONS if x != "°"]
-_list_icons = [x.replace("\\u00B0", "") for x in _list_icons]
-_list_quotes = [x for x in LIST_QUOTES if x != "\\'"]
-
-
-_prefixes = (
-    ["§", "%", "=", "—", "–", r"\+(?![0-9])"]
-    + _list_punct
-    + LIST_ELLIPSES
-    + LIST_QUOTES
-    + LIST_CURRENCY
-    + LIST_ICONS
-)
-
-
-_infixes = (
-    LIST_ELLIPSES
-    + _list_icons
-    + [
-        r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER),
-        r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA),
-        r"(?<=[{a}])[:<>=/](?=[{a}])".format(a=ALPHA),
-        r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-        r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes),
-        r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA),
-    ]
-)
-
-_suffixes = (
-    LIST_PUNCT
-    + LIST_ELLIPSES
-    + _list_quotes
-    + _list_icons
-    + ["—", "–"]
-    + [
-        r"(?<=[0-9])\+",
-        r"(?<=°[FfCcKk])\.",
-        r"(?<=[0-9])(?:{c})".format(c=CURRENCY),
-        r"(?<=[0-9])(?:{u})".format(u=UNITS),
-        r"(?<=[{al}{e}{p}(?:{q})])\.".format(
-            al=ALPHA_LOWER, e=r"%²\-\+", q=_quotes, p=PUNCT
-        ),
-        r"(?<=[{au}][{au}])\.".format(au=ALPHA_UPPER),
-    ]
-    + [r"(?<=[^sSxXzZ])'"]
-)
-_suffixes += [
-    suffix
-    for suffix in TOKENIZER_SUFFIXES
-    if suffix not in ["'s", "'S", "’s", "’S", r"\'"]
-]
-
-
-TOKENIZER_PREFIXES = _prefixes
-TOKENIZER_INFIXES = _infixes
-TOKENIZER_SUFFIXES = _suffixes
--- a/spacy/lang/nn/tokenizer_exceptions.py
+++ b/spacy/lang/nn/tokenizer_exceptions.py
@ -1,228 +0,0 @@
-from ...symbols import NORM, ORTH
-from ...util import update_exc
-from ..tokenizer_exceptions import BASE_EXCEPTIONS
-
-_exc = {}
-
-
-for exc_data in [
-    {ORTH: "jan.", NORM: "januar"},
-    {ORTH: "feb.", NORM: "februar"},
-    {ORTH: "mar.", NORM: "mars"},
-    {ORTH: "apr.", NORM: "april"},
-    {ORTH: "jun.", NORM: "juni"},
-    # note: "jul." is in the simple list below without a NORM exception
-    {ORTH: "aug.", NORM: "august"},
-    {ORTH: "sep.", NORM: "september"},
-    {ORTH: "okt.", NORM: "oktober"},
-    {ORTH: "nov.", NORM: "november"},
-    {ORTH: "des.", NORM: "desember"},
-]:
-    _exc[exc_data[ORTH]] = [exc_data]
-
-
-for orth in [
-    "Ap.",
-    "Aq.",
-    "Ca.",
-    "Chr.",
-    "Co.",
-    "Dr.",
-    "F.eks.",
-    "Fr.p.",
-    "Frp.",
-    "Grl.",
-    "Kr.",
-    "Kr.F.",
-    "Kr.F.s",
-    "Mr.",
-    "Mrs.",
-    "Pb.",
-    "Pr.",
-    "Sp.",
-    "St.",
-    "a.m.",
-    "ad.",
-    "adm.dir.",
-    "adr.",
-    "b.c.",
-    "bl.a.",
-    "bla.",
-    "bm.",
-    "bnr.",
-    "bto.",
-    "c.c.",
-    "ca.",
-    "cand.mag.",
-    "co.",
-    "d.d.",
-    "d.m.",
-    "d.y.",
-    "dept.",
-    "dr.",
-    "dr.med.",
-    "dr.philos.",
-    "dr.psychol.",
-    "dss.",
-    "dvs.",
-    "e.Kr.",
-    "e.l.",
-    "eg.",
-    "eig.",
-    "ekskl.",
-    "el.",
-    "et.",
-    "etc.",
-    "etg.",
-    "ev.",
-    "evt.",
-    "f.",
-    "f.Kr.",
-    "f.eks.",
-    "f.o.m.",
-    "fhv.",
-    "fk.",
-    "foreg.",
-    "fork.",
-    "fv.",
-    "fvt.",
-    "g.",
-    "gl.",
-    "gno.",
-    "gnr.",
-    "grl.",
-    "gt.",
-    "h.r.adv.",
-    "hhv.",
-    "hoh.",
-    "hr.",
-    "ifb.",
-    "ifm.",
-    "iht.",
-    "inkl.",
-    "istf.",
-    "jf.",
-    "jr.",
-    "jul.",
-    "juris.",
-    "kfr.",
-    "kgl.",
-    "kgl.res.",
-    "kl.",
-    "komm.",
-    "kr.",
-    "kst.",
-    "lat.",
-    "lø.",
-    "m.a.",
-    "m.a.o.",
-    "m.fl.",
-    "m.m.",
-    "m.v.",
-    "ma.",
-    "mag.art.",
-    "md.",
-    "mfl.",
-    "mht.",
-    "mill.",
-    "min.",
-    "mnd.",
-    "moh.",
-    "mrd.",
-    "muh.",
-    "mv.",
-    "mva.",
-    "n.å.",
-    "ndf.",
-    "nr.",
-    "nto.",
-    "nyno.",
-    "o.a.",
-    "o.l.",
-    "obl.",
-    "off.",
-    "ofl.",
-    "on.",
-    "op.",
-    "org.",
-    "osv.",
-    "ovf.",
-    "p.",
-    "p.a.",
-    "p.g.a.",
-    "p.m.",
-    "p.t.",
-    "pga.",
-    "ph.d.",
-    "pkt.",
-    "pr.",
-    "pst.",
-    "pt.",
-    "red.anm.",
-    "ref.",
-    "res.",
-    "res.kap.",
-    "resp.",
-    "rv.",
-    "s.",
-    "s.d.",
-    "s.k.",
-    "s.u.",
-    "s.å.",
-    "sen.",
-    "sep.",
-    "siviling.",
-    "sms.",
-    "snr.",
-    "spm.",
-    "sr.",
-    "sst.",
-    "st.",
-    "st.meld.",
-    "st.prp.",
-    "stip.",
-    "stk.",
-    "stud.",
-    "sv.",
-    "såk.",
-    "sø.",
-    "t.d.",
-    "t.h.",
-    "t.o.m.",
-    "t.v.",
-    "temp.",
-    "ti.",
-    "tils.",
-    "tilsv.",
-    "tl;dr",
-    "tlf.",
-    "to.",
-    "ult.",
-    "utg.",
-    "v.",
-    "vedk.",
-    "vedr.",
-    "vg.",
-    "vgs.",
-    "vha.",
-    "vit.ass.",
-    "vn.",
-    "vol.",
-    "vs.",
-    "vsa.",
-    "§§",
-    "©NTB",
-    "årg.",
-    "årh.",
-]:
-    _exc[orth] = [{ORTH: orth}]
-
-# Dates
-for h in range(1, 31 + 1):
-    for period in ["."]:
-        _exc[f"{h}{period}"] = [{ORTH: f"{h}."}]
-
-_custom_base_exc = {"i.": [{ORTH: "i", NORM: "i"}, {ORTH: "."}]}
-_exc.update(_custom_base_exc)
-
-TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
--- a/spacy/lang/th/init.py
+++ b/spacy/lang/th/init.py
@ -13,6 +13,7 @@ DEFAULT_CONFIG = """
 """


+@registry.tokenizers("spacy.th.ThaiTokenizer")
 def create_thai_tokenizer():
    def thai_tokenizer_factory(nlp):
        return ThaiTokenizer(nlp.vocab)
--- a/spacy/lang/vi/init.py
+++ b/spacy/lang/vi/init.py
@ -22,6 +22,7 @@ use_pyvi = true
 """


+@registry.tokenizers("spacy.vi.VietnameseTokenizer")
 def create_vietnamese_tokenizer(use_pyvi: bool = True):
    def vietnamese_tokenizer_factory(nlp):
        return VietnameseTokenizer(nlp.vocab, use_pyvi=use_pyvi)
--- a/spacy/lang/zh/init.py
+++ b/spacy/lang/zh/init.py
@ -46,6 +46,7 @@ class Segmenter(str, Enum):
        return list(cls.__members__.keys())


+@registry.tokenizers("spacy.zh.ChineseTokenizer")
 def create_chinese_tokenizer(segmenter: Segmenter = Segmenter.char):
    def chinese_tokenizer_factory(nlp):
        return ChineseTokenizer(nlp.vocab, segmenter=segmenter)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -5,7 +5,7 @@ import multiprocessing as mp
 import random
 import traceback
 import warnings
-from contextlib import ExitStack, contextmanager
+from contextlib import contextmanager
 from copy import deepcopy
 from dataclasses import dataclass
 from itertools import chain, cycle
@ -30,11 +30,8 @@ from typing import (
    overload,
 )

-import numpy
 import srsly
-from cymem.cymem import Pool
 from thinc.api import Config, CupyOps, Optimizer, get_current_ops
-from thinc.util import convert_recursive

 from . import about, ty, util
 from .compat import Literal
@ -104,6 +101,7 @@ class BaseDefaults:
    writing_system = {"direction": "ltr", "has_case": True, "has_letters": True}


+@registry.tokenizers("spacy.Tokenizer.v1")
 def create_tokenizer() -> Callable[["Language"], Tokenizer]:
    """Registered function to create a tokenizer. Returns a factory that takes
    the nlp object and returns a Tokenizer instance using the language detaults.
@ -129,6 +127,7 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
    return tokenizer_factory


+@registry.misc("spacy.LookupsDataLoader.v1")
 def load_lookups_data(lang, tables):
    util.logger.debug("Loading lookups from spacy-lookups-data: %s", tables)
    lookups = load_lookups(lang=lang, tables=tables)
@ -141,7 +140,7 @@ class Language:

    Defaults (class): Settings, data and factory methods for creating the `nlp`
        object and processing pipeline.
-    lang (str): Two-letter ISO 639-1 or three-letter ISO 639-3 language codes, such as 'en' and 'eng'.
+    lang (str): IETF language code, such as 'en'.

    DOCS: https://spacy.io/api/language
    """
@ -183,9 +182,6 @@ class Language:

        DOCS: https://spacy.io/api/language#init
        """
-        from .pipeline.factories import register_factories
-
-        register_factories()
        # We're only calling this to import all factories provided via entry
        # points. The factory decorator applied to these functions takes care
        # of the rest.
@ -1215,7 +1211,7 @@ class Language:
                    examples,
                ):
                    eg.predicted = doc
-        return _replace_numpy_floats(losses)
+        return losses

    def rehearse(
        self,
@ -1466,7 +1462,7 @@ class Language:
        results = scorer.score(examples, per_component=per_component)
        n_words = sum(len(eg.predicted) for eg in examples)
        results["speed"] = n_words / (end_time - start_time)
-        return _replace_numpy_floats(results)
+        return results

    def create_optimizer(self):
        """Create an optimizer, usually using the [training.optimizer] config."""
@ -1687,12 +1683,6 @@ class Language:
        for proc in procs:
            proc.start()

-        # Close writing-end of channels. This is needed to avoid that reading
-        # from the channel blocks indefinitely when the worker closes the
-        # channel.
-        for tx in bytedocs_send_ch:
-            tx.close()
-
        # Cycle channels not to break the order of docs.
        # The received object is a batch of byte-encoded docs, so flatten them with chain.from_iterable.
        byte_tuples = chain.from_iterable(
@ -1715,27 +1705,8 @@ class Language:
                    # tell `sender` that one batch was consumed.
                    sender.step()
        finally:
-            # If we are stopping in an orderly fashion, the workers' queues
-            # are empty. Put the sentinel in their queues to signal that work
-            # is done, so that they can exit gracefully.
-            for q in texts_q:
-                q.put(_WORK_DONE_SENTINEL)
-                q.close()
-
-            # Otherwise, we are stopping because the error handler raised an
-            # exception. The sentinel will be last to go out of the queue.
-            # To avoid doing unnecessary work or hanging on platforms that
-            # block on sending (Windows), we'll close our end of the channel.
-            # This signals to the worker that it can exit the next time it
-            # attempts to send data down the channel.
-            for r in bytedocs_recv_ch:
-                r.close()
-
            for proc in procs:
-                proc.join()
-
-            if not all(proc.exitcode == 0 for proc in procs):
-                warnings.warn(Warnings.W127)
+                proc.terminate()

    def _link_components(self) -> None:
        """Register 'listeners' within pipeline components, to allow them to
@ -2095,38 +2066,6 @@ class Language:
                util.replace_model_node(pipe.model, listener, new_model)  # type: ignore[attr-defined]
                tok2vec.remove_listener(listener, pipe_name)

-    @contextmanager
-    def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]:
-        """Begin a block where all resources allocated during the block will
-        be freed at the end of it. If a resources was created within the
-        memory zone block, accessing it outside the block is invalid.
-        Behaviour of this invalid access is undefined. Memory zones should
-        not be nested.
-
-        The memory zone is helpful for services that need to process large
-        volumes of text with a defined memory budget.
-
-        Example
-        -------
-        >>> with nlp.memory_zone():
-        ...     for doc in nlp.pipe(texts):
-        ...        process_my_doc(doc)
-        >>> # use_doc(doc) <-- Invalid: doc was allocated in the memory zone
-        """
-        if mem is None:
-            mem = Pool()
-        # The ExitStack allows programmatic nested context managers.
-        # We don't know how many we need, so it would be awkward to have
-        # them as nested blocks.
-        with ExitStack() as stack:
-            contexts = [stack.enter_context(self.vocab.memory_zone(mem))]
-            if hasattr(self.tokenizer, "memory_zone"):
-                contexts.append(stack.enter_context(self.tokenizer.memory_zone(mem)))
-            for _, pipe in self.pipeline:
-                if hasattr(pipe, "memory_zone"):
-                    contexts.append(stack.enter_context(pipe.memory_zone(mem)))
-            yield mem
-
    def to_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList()
    ) -> None:
@ -2144,9 +2083,7 @@ class Language:
        serializers["tokenizer"] = lambda p: self.tokenizer.to_disk(  # type: ignore[union-attr]
            p, exclude=["vocab"]
        )
-        serializers["meta.json"] = lambda p: srsly.write_json(
-            p, _replace_numpy_floats(self.meta)
-        )
+        serializers["meta.json"] = lambda p: srsly.write_json(p, self.meta)
        serializers["config.cfg"] = lambda p: self.config.to_disk(p)
        for name, proc in self._components:
            if name in exclude:
@ -2260,9 +2197,7 @@ class Language:
        serializers: Dict[str, Callable[[], bytes]] = {}
        serializers["vocab"] = lambda: self.vocab.to_bytes(exclude=exclude)
        serializers["tokenizer"] = lambda: self.tokenizer.to_bytes(exclude=["vocab"])  # type: ignore[union-attr]
-        serializers["meta.json"] = lambda: srsly.json_dumps(
-            _replace_numpy_floats(self.meta)
-        )
+        serializers["meta.json"] = lambda: srsly.json_dumps(self.meta)
        serializers["config.cfg"] = lambda: self.config.to_bytes()
        for name, proc in self._components:
            if name in exclude:
@ -2313,12 +2248,6 @@ class Language:
        return self


-def _replace_numpy_floats(meta_dict: dict) -> dict:
-    return convert_recursive(
-        lambda v: isinstance(v, numpy.floating), lambda v: float(v), dict(meta_dict)
-    )
-
-
@dataclass
 class FactoryMeta:
    """Dataclass containing information about a component and its defaults
@ -2394,13 +2323,6 @@ def _apply_pipes(
    while True:
        try:
            texts_with_ctx = receiver.get()
-
-            # Stop working if we encounter the end-of-work sentinel.
-            if isinstance(texts_with_ctx, _WorkDoneSentinel):
-                sender.close()
-                receiver.close()
-                return
-
            docs = (
                ensure_doc(doc_like, context) for doc_like, context in texts_with_ctx
            )
@ -2409,23 +2331,11 @@ def _apply_pipes(
            # Connection does not accept unpickable objects, so send list.
            byte_docs = [(doc.to_bytes(), doc._context, None) for doc in docs]
            padding = [(None, None, None)] * (len(texts_with_ctx) - len(byte_docs))
-            data: Sequence[Tuple[Optional[bytes], Optional[Any], Optional[bytes]]] = (
-                byte_docs + padding  # type: ignore[operator]
-            )
+            sender.send(byte_docs + padding)  # type: ignore[operator]
        except Exception:
            error_msg = [(None, None, srsly.msgpack_dumps(traceback.format_exc()))]
            padding = [(None, None, None)] * (len(texts_with_ctx) - 1)
-            data = error_msg + padding
-
-        try:
-            sender.send(data)
-        except BrokenPipeError:
-            # Parent has closed the pipe prematurely. This happens when a
-            # worker encounters an error and the error handler is set to
-            # stop processing.
-            sender.close()
-            receiver.close()
-            return
+            sender.send(error_msg + padding)


 class _Sender:
@ -2455,10 +2365,3 @@ class _Sender:
        if self.count >= self.chunk_size:
            self.count = 0
            self.send()
-
-
-class _WorkDoneSentinel:
-    pass
-
-
-_WORK_DONE_SENTINEL = _WorkDoneSentinel()
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@ -35,7 +35,7 @@ cdef class Lexeme:
        return self

    @staticmethod
-    cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) noexcept nogil:
+    cdef inline void set_struct_attr(LexemeC* lex, attr_id_t name, attr_t value) nogil:
        if name < (sizeof(flags_t) * 8):
            Lexeme.c_set_flag(lex, name, value)
        elif name == ID:
@ -54,7 +54,7 @@ cdef class Lexeme:
            lex.lang = value

    @staticmethod
-    cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) noexcept nogil:
+    cdef inline attr_t get_struct_attr(const LexemeC* lex, attr_id_t feat_name) nogil:
        if feat_name < (sizeof(flags_t) * 8):
            if Lexeme.c_check_flag(lex, feat_name):
                return 1
@ -82,7 +82,7 @@ cdef class Lexeme:
            return 0

    @staticmethod
-    cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) noexcept nogil:
+    cdef inline bint c_check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
        cdef flags_t one = 1
        if lexeme.flags & (one << flag_id):
            return True
@ -90,7 +90,7 @@ cdef class Lexeme:
            return False

    @staticmethod
-    cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) noexcept nogil:
+    cdef inline bint c_set_flag(LexemeC* lex, attr_id_t flag_id, bint value) nogil:
        cdef flags_t one = 1
        if value:
            lex.flags |= one << flag_id
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -70,7 +70,7 @@ cdef class Lexeme:
        if isinstance(other, Lexeme):
            a = self.orth
            b = other.orth
-        elif isinstance(other, int):
+        elif isinstance(other, long):
            a = self.orth
            b = other
        elif isinstance(other, str):
@ -104,7 +104,7 @@ cdef class Lexeme:
            # skip PROB, e.g. from lexemes.jsonl
            if isinstance(value, float):
                continue
-            elif isinstance(value, int):
+            elif isinstance(value, (int, long)):
                Lexeme.set_struct_attr(self.c, attr, value)
            else:
                Lexeme.set_struct_attr(self.c, attr, self.vocab.strings.add(value))
@ -164,48 +164,45 @@ cdef class Lexeme:
        vector = self.vector
        return numpy.sqrt((vector**2).sum())

-    @property
-    def vector(self):
+    property vector:
        """A real-valued meaning representation.

        RETURNS (numpy.ndarray[ndim=1, dtype='float32']): A 1D numpy array
            representing the lexeme's semantics.
        """
-        cdef int length = self.vocab.vectors_length
-        if length == 0:
-            raise ValueError(Errors.E010)
-        return self.vocab.get_vector(self.c.orth)
+        def __get__(self):
+            cdef int length = self.vocab.vectors_length
+            if length == 0:
+                raise ValueError(Errors.E010)
+            return self.vocab.get_vector(self.c.orth)

-    @vector.setter
-    def vector(self, vector):
-        if len(vector) != self.vocab.vectors_length:
-            raise ValueError(Errors.E073.format(new_length=len(vector),
-                                                length=self.vocab.vectors_length))
-        self.vocab.set_vector(self.c.orth, vector)
+        def __set__(self, vector):
+            if len(vector) != self.vocab.vectors_length:
+                raise ValueError(Errors.E073.format(new_length=len(vector),
+                                                    length=self.vocab.vectors_length))
+            self.vocab.set_vector(self.c.orth, vector)

-    @property
-    def rank(self):
+    property rank:
        """RETURNS (str): Sequential ID of the lexeme's lexical type, used
            to index into tables, e.g. for word vectors."""
-        return self.c.id
+        def __get__(self):
+            return self.c.id

-    @rank.setter
-    def rank(self, value):
-        self.c.id = value
+        def __set__(self, value):
+            self.c.id = value

-    @property
-    def sentiment(self):
+    property sentiment:
        """RETURNS (float): A scalar value indicating the positivity or
            negativity of the lexeme."""
-        sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
-        return sentiment_table.get(self.c.orth, 0.0)
+        def __get__(self):
+            sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment", {})
+            return sentiment_table.get(self.c.orth, 0.0)

-    @sentiment.setter
-    def sentiment(self, float x):
-        if "lexeme_sentiment" not in self.vocab.lookups:
-            self.vocab.lookups.add_table("lexeme_sentiment")
-        sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
-        sentiment_table[self.c.orth] = x
+        def __set__(self, float x):
+            if "lexeme_sentiment" not in self.vocab.lookups:
+                self.vocab.lookups.add_table("lexeme_sentiment")
+            sentiment_table = self.vocab.lookups.get_table("lexeme_sentiment")
+            sentiment_table[self.c.orth] = x

    @property
    def orth_(self):
@ -219,338 +216,306 @@ cdef class Lexeme:
        """RETURNS (str): The original verbatim text of the lexeme."""
        return self.orth_

-    @property
-    def lower(self):
+    property lower:
        """RETURNS (uint64): Lowercase form of the lexeme."""
-        return self.c.lower
+        def __get__(self):
+            return self.c.lower

-    @lower.setter
-    def lower(self, attr_t x):
-        self.c.lower = x
+        def __set__(self, attr_t x):
+            self.c.lower = x

-    @property
-    def norm(self):
+    property norm:
        """RETURNS (uint64): The lexeme's norm, i.e. a normalised form of the
            lexeme text.
        """
-        return self.c.norm
+        def __get__(self):
+            return self.c.norm

-    @norm.setter
-    def norm(self, attr_t x):
-        if "lexeme_norm" not in self.vocab.lookups:
-            self.vocab.lookups.add_table("lexeme_norm")
-        norm_table = self.vocab.lookups.get_table("lexeme_norm")
-        norm_table[self.c.orth] = self.vocab.strings[x]
-        self.c.norm = x
+        def __set__(self, attr_t x):
+            if "lexeme_norm" not in self.vocab.lookups:
+                self.vocab.lookups.add_table("lexeme_norm")
+            norm_table = self.vocab.lookups.get_table("lexeme_norm")
+            norm_table[self.c.orth] = self.vocab.strings[x]
+            self.c.norm = x

-    @property
-    def shape(self):
+    property shape:
        """RETURNS (uint64): Transform of the word's string, to show
            orthographic features.
        """
-        return self.c.shape
+        def __get__(self):
+            return self.c.shape

-    @shape.setter
-    def shape(self, attr_t x):
-        self.c.shape = x
+        def __set__(self, attr_t x):
+            self.c.shape = x

-    @property
-    def prefix(self):
+    property prefix:
        """RETURNS (uint64): Length-N substring from the start of the word.
            Defaults to `N=1`.
        """
-        return self.c.prefix
+        def __get__(self):
+            return self.c.prefix

-    @prefix.setter
-    def prefix(self, attr_t x):
-        self.c.prefix = x
+        def __set__(self, attr_t x):
+            self.c.prefix = x

-    @property
-    def suffix(self):
+    property suffix:
        """RETURNS (uint64): Length-N substring from the end of the word.
            Defaults to `N=3`.
        """
-        return self.c.suffix
+        def __get__(self):
+            return self.c.suffix

-    @suffix.setter
-    def suffix(self, attr_t x):
-        self.c.suffix = x
+        def __set__(self, attr_t x):
+            self.c.suffix = x

-    @property
-    def cluster(self):
+    property cluster:
        """RETURNS (int): Brown cluster ID."""
-        cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
-        return cluster_table.get(self.c.orth, 0)
+        def __get__(self):
+            cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
+            return cluster_table.get(self.c.orth, 0)

-    @cluster.setter
-    def cluster(self, int x):
-        cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
-        cluster_table[self.c.orth] = x
+        def __set__(self, int x):
+            cluster_table = self.vocab.lookups.get_table("lexeme_cluster", {})
+            cluster_table[self.c.orth] = x

-    @property
-    def lang(self):
+    property lang:
        """RETURNS (uint64): Language of the parent vocabulary."""
-        return self.c.lang
+        def __get__(self):
+            return self.c.lang

-    @lang.setter
-    def lang(self, attr_t x):
-        self.c.lang = x
+        def __set__(self, attr_t x):
+            self.c.lang = x

-    @property
-    def prob(self):
+    property prob:
        """RETURNS (float): Smoothed log probability estimate of the lexeme's
            type."""
-        prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
-        settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
-        default_oov_prob = settings_table.get("oov_prob", -20.0)
-        return prob_table.get(self.c.orth, default_oov_prob)
+        def __get__(self):
+            prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
+            settings_table = self.vocab.lookups.get_table("lexeme_settings", {})
+            default_oov_prob = settings_table.get("oov_prob", -20.0)
+            return prob_table.get(self.c.orth, default_oov_prob)

-    @prob.setter
-    def prob(self, float x):
-        prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
-        prob_table[self.c.orth] = x
+        def __set__(self, float x):
+            prob_table = self.vocab.lookups.get_table("lexeme_prob", {})
+            prob_table[self.c.orth] = x

-    @property
-    def lower_(self):
+    property lower_:
        """RETURNS (str): Lowercase form of the word."""
-        return self.vocab.strings[self.c.lower]
+        def __get__(self):
+            return self.vocab.strings[self.c.lower]

-    @lower_.setter
-    def lower_(self, str x):
-        self.c.lower = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.c.lower = self.vocab.strings.add(x)

-    @property
-    def norm_(self):
+    property norm_:
        """RETURNS (str): The lexeme's norm, i.e. a normalised form of the
            lexeme text.
        """
-        return self.vocab.strings[self.c.norm]
+        def __get__(self):
+            return self.vocab.strings[self.c.norm]

-    @norm_.setter
-    def norm_(self, str x):
-        self.norm = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.norm = self.vocab.strings.add(x)

-    @property
-    def shape_(self):
+    property shape_:
        """RETURNS (str): Transform of the word's string, to show
            orthographic features.
        """
-        return self.vocab.strings[self.c.shape]
+        def __get__(self):
+            return self.vocab.strings[self.c.shape]

-    @shape_.setter
-    def shape_(self, str x):
-        self.c.shape = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.c.shape = self.vocab.strings.add(x)

-    @property
-    def prefix_(self):
+    property prefix_:
        """RETURNS (str): Length-N substring from the start of the word.
            Defaults to `N=1`.
        """
-        return self.vocab.strings[self.c.prefix]
+        def __get__(self):
+            return self.vocab.strings[self.c.prefix]

-    @prefix_.setter
-    def prefix_(self, str x):
-        self.c.prefix = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.c.prefix = self.vocab.strings.add(x)

-    @property
-    def suffix_(self):
+    property suffix_:
        """RETURNS (str): Length-N substring from the end of the word.
            Defaults to `N=3`.
        """
-        return self.vocab.strings[self.c.suffix]
+        def __get__(self):
+            return self.vocab.strings[self.c.suffix]

-    @suffix_.setter
-    def suffix_(self, str x):
-        self.c.suffix = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.c.suffix = self.vocab.strings.add(x)

-    @property
-    def lang_(self):
+    property lang_:
        """RETURNS (str): Language of the parent vocabulary."""
-        return self.vocab.strings[self.c.lang]
+        def __get__(self):
+            return self.vocab.strings[self.c.lang]

-    @lang_.setter
-    def lang_(self, str x):
-        self.c.lang = self.vocab.strings.add(x)
+        def __set__(self, str x):
+            self.c.lang = self.vocab.strings.add(x)

-    @property
-    def flags(self):
+    property flags:
        """RETURNS (uint64): Container of the lexeme's binary flags."""
-        return self.c.flags
+        def __get__(self):
+            return self.c.flags

-    @flags.setter
-    def flags(self, flags_t x):
-        self.c.flags = x
+        def __set__(self, flags_t x):
+            self.c.flags = x

    @property
    def is_oov(self):
        """RETURNS (bool): Whether the lexeme is out-of-vocabulary."""
        return self.orth not in self.vocab.vectors

-    @property
-    def is_stop(self):
+    property is_stop:
        """RETURNS (bool): Whether the lexeme is a stop word."""
-        return Lexeme.c_check_flag(self.c, IS_STOP)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_STOP)

-    @is_stop.setter
-    def is_stop(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_STOP, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_STOP, x)

-    @property
-    def is_alpha(self):
+    property is_alpha:
        """RETURNS (bool): Whether the lexeme consists of alphabetic
            characters. Equivalent to `lexeme.text.isalpha()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_ALPHA)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_ALPHA)

-    @is_alpha.setter
-    def is_alpha(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_ALPHA, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_ALPHA, x)

-    @property
-    def is_ascii(self):
+    property is_ascii:
        """RETURNS (bool): Whether the lexeme consists of ASCII characters.
            Equivalent to `[any(ord(c) >= 128 for c in lexeme.text)]`.
        """
-        return Lexeme.c_check_flag(self.c, IS_ASCII)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_ASCII)

-    @is_ascii.setter
-    def is_ascii(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_ASCII, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_ASCII, x)

-    @property
-    def is_digit(self):
+    property is_digit:
        """RETURNS (bool): Whether the lexeme consists of digits. Equivalent
            to `lexeme.text.isdigit()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_DIGIT)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_DIGIT)

-    @is_digit.setter
-    def is_digit(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_DIGIT, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_DIGIT, x)

-    @property
-    def is_lower(self):
+    property is_lower:
        """RETURNS (bool): Whether the lexeme is in lowercase. Equivalent to
            `lexeme.text.islower()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_LOWER)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_LOWER)

-    @is_lower.setter
-    def is_lower(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_LOWER, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_LOWER, x)

-    @property
-    def is_upper(self):
+    property is_upper:
        """RETURNS (bool): Whether the lexeme is in uppercase. Equivalent to
            `lexeme.text.isupper()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_UPPER)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_UPPER)

-    @is_upper.setter
-    def is_upper(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_UPPER, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_UPPER, x)

-    @property
-    def is_title(self):
+    property is_title:
        """RETURNS (bool): Whether the lexeme is in titlecase. Equivalent to
            `lexeme.text.istitle()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_TITLE)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_TITLE)

-    @is_title.setter
-    def is_title(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_TITLE, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_TITLE, x)

-    @property
-    def is_punct(self):
+    property is_punct:
        """RETURNS (bool): Whether the lexeme is punctuation."""
-        return Lexeme.c_check_flag(self.c, IS_PUNCT)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_PUNCT)

-    @is_punct.setter
-    def is_punct(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_PUNCT, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_PUNCT, x)

-    @property
-    def is_space(self):
+    property is_space:
        """RETURNS (bool): Whether the lexeme consist of whitespace characters.
            Equivalent to `lexeme.text.isspace()`.
        """
-        return Lexeme.c_check_flag(self.c, IS_SPACE)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_SPACE)

-    @is_space.setter
-    def is_space(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_SPACE, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_SPACE, x)

-    @property
-    def is_bracket(self):
+    property is_bracket:
        """RETURNS (bool): Whether the lexeme is a bracket."""
-        return Lexeme.c_check_flag(self.c, IS_BRACKET)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_BRACKET)

-    @is_bracket.setter
-    def is_bracket(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_BRACKET, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_BRACKET, x)

-    @property
-    def is_quote(self):
+    property is_quote:
        """RETURNS (bool): Whether the lexeme is a quotation mark."""
-        return Lexeme.c_check_flag(self.c, IS_QUOTE)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_QUOTE)

-    @is_quote.setter
-    def is_quote(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_QUOTE, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_QUOTE, x)

-    @property
-    def is_left_punct(self):
+    property is_left_punct:
        """RETURNS (bool): Whether the lexeme is left punctuation, e.g. (."""
-        return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT)

-    @is_left_punct.setter
-    def is_left_punct(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x)

-    @property
-    def is_right_punct(self):
+    property is_right_punct:
        """RETURNS (bool): Whether the lexeme is right punctuation, e.g. )."""
-        return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT)

-    @is_right_punct.setter
-    def is_right_punct(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x)

-    @property
-    def is_currency(self):
+    property is_currency:
        """RETURNS (bool): Whether the lexeme is a currency symbol, e.g. $, €."""
-        return Lexeme.c_check_flag(self.c, IS_CURRENCY)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, IS_CURRENCY)

-    @is_currency.setter
-    def is_currency(self, bint x):
-        Lexeme.c_set_flag(self.c, IS_CURRENCY, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, IS_CURRENCY, x)

-    @property
-    def like_url(self):
+    property like_url:
        """RETURNS (bool): Whether the lexeme resembles a URL."""
-        return Lexeme.c_check_flag(self.c, LIKE_URL)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_URL)

-    @like_url.setter
-    def like_url(self, bint x):
-        Lexeme.c_set_flag(self.c, LIKE_URL, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_URL, x)

-    @property
-    def like_num(self):
+    property like_num:
        """RETURNS (bool): Whether the lexeme represents a number, e.g. "10.9",
            "10", "ten", etc.
        """
-        return Lexeme.c_check_flag(self.c, LIKE_NUM)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_NUM)

-    @like_num.setter
-    def like_num(self, bint x):
-        Lexeme.c_set_flag(self.c, LIKE_NUM, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_NUM, x)

-    @property
-    def like_email(self):
+    property like_email:
        """RETURNS (bool): Whether the lexeme resembles an email address."""
-        return Lexeme.c_check_flag(self.c, LIKE_EMAIL)
+        def __get__(self):
+            return Lexeme.c_check_flag(self.c, LIKE_EMAIL)

-    @like_email.setter
-    def like_email(self, bint x):
-        Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
+        def __set__(self, bint x):
+            Lexeme.c_set_flag(self.c, LIKE_EMAIL, x)
--- a/spacy/matcher/init.py
+++ b/spacy/matcher/init.py
@ -3,4 +3,4 @@ from .levenshtein import levenshtein
 from .matcher import Matcher
 from .phrasematcher import PhraseMatcher

-__all__ = ["DependencyMatcher", "Matcher", "PhraseMatcher", "levenshtein"]
+__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"]
--- a/spacy/matcher/levenshtein.pyx
+++ b/spacy/matcher/levenshtein.pyx
@ -1,4 +1,4 @@
-# cython: binding=True, infer_types=True, language_level=3
+# cython: binding=True, infer_types=True
 from cpython.object cimport PyObject
 from libc.stdint cimport int64_t

@ -27,5 +27,6 @@ cpdef bint levenshtein_compare(input_text: str, pattern_text: str, fuzzy: int =
    return levenshtein(input_text, pattern_text, max_edits) <= max_edits


+@registry.misc("spacy.levenshtein_compare.v1")
 def make_levenshtein_compare():
    return levenshtein_compare
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -625,7 +625,7 @@ cdef action_t get_action(
    const TokenC * token,
    const attr_t * extra_attrs,
    const int8_t * predicate_matches
-) noexcept nogil:
+) nogil:
    """We need to consider:
    a) Does the token match the specification? [Yes, No]
    b) What's the quantifier? [1, 0+, ?]
@ -740,7 +740,7 @@ cdef int8_t get_is_match(
    const TokenC* token,
    const attr_t* extra_attrs,
    const int8_t* predicate_matches
-) noexcept nogil:
+) nogil:
    for i in range(state.pattern.nr_py):
        if predicate_matches[state.pattern.py_predicates[i]] == -1:
            return 0
@ -755,14 +755,14 @@ cdef int8_t get_is_match(
    return True


-cdef inline int8_t get_is_final(PatternStateC state) noexcept nogil:
+cdef inline int8_t get_is_final(PatternStateC state) nogil:
    if state.pattern[1].quantifier == FINAL_ID:
        return 1
    else:
        return 0


-cdef inline int8_t get_quantifier(PatternStateC state) noexcept nogil:
+cdef inline int8_t get_quantifier(PatternStateC state) nogil:
    return state.pattern.quantifier


@ -805,7 +805,7 @@ cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id, object token_specs)
    return pattern


-cdef attr_t get_ent_id(const TokenPatternC* pattern) noexcept nogil:
+cdef attr_t get_ent_id(const TokenPatternC* pattern) nogil:
    while pattern.quantifier != FINAL_ID:
        pattern += 1
    id_attr = pattern[0].attrs[0]
--- a/spacy/matcher/phrasematcher.pyx
+++ b/spacy/matcher/phrasematcher.pyx
@ -47,7 +47,7 @@ cdef class PhraseMatcher:
        self._terminal_hash = 826361138722620965
        map_init(self.mem, self.c_map, 8)

-        if isinstance(attr, int):
+        if isinstance(attr, (int, long)):
            self.attr = attr
        else:
            if attr is None:
--- a/spacy/ml/_character_embed.py
+++ b/spacy/ml/_character_embed.py
@ -7,6 +7,7 @@ from ..tokens import Doc
 from ..util import registry


+@registry.layers("spacy.CharEmbed.v1")
 def CharacterEmbed(nM: int, nC: int) -> Model[List[Doc], List[Floats2d]]:
    # nM: Number of dimensions per character. nC: Number of characters.
    return Model(
--- a/spacy/ml/_precomputable_affine.py
+++ b/spacy/ml/_precomputable_affine.py
@ -3,6 +3,7 @@ from thinc.api import Model, normal_init
 from ..util import registry


+@registry.layers("spacy.PrecomputableAffine.v1")
 def PrecomputableAffine(nO, nI, nF, nP, dropout=0.1):
    model = Model(
        "precomputable_affine",
--- a/spacy/ml/callbacks.py
+++ b/spacy/ml/callbacks.py
@ -50,6 +50,7 @@ def models_with_nvtx_range(nlp, forward_color: int, backprop_color: int):
    return nlp


+@registry.callbacks("spacy.models_with_nvtx_range.v1")
 def create_models_with_nvtx_range(
    forward_color: int = -1, backprop_color: int = -1
 ) -> Callable[["Language"], "Language"]:
@ -109,6 +110,7 @@ def pipes_with_nvtx_range(
    return nlp


+@registry.callbacks("spacy.models_and_pipes_with_nvtx_range.v1")
 def create_models_and_pipes_with_nvtx_range(
    forward_color: int = -1,
    backprop_color: int = -1,
--- a/spacy/ml/extract_ngrams.py
+++ b/spacy/ml/extract_ngrams.py
@ -4,6 +4,7 @@ from ..attrs import LOWER
 from ..util import registry


+@registry.layers("spacy.extract_ngrams.v1")
 def extract_ngrams(ngram_size: int, attr: int = LOWER) -> Model:
    model: Model = Model("extract_ngrams", forward)
    model.attrs["ngram_size"] = ngram_size
--- a/spacy/ml/extract_spans.py
+++ b/spacy/ml/extract_spans.py
@ -6,6 +6,7 @@ from thinc.types import Ints1d, Ragged
 from ..util import registry


+@registry.layers("spacy.extract_spans.v1")
 def extract_spans() -> Model[Tuple[Ragged, Ragged], Ragged]:
    """Extract spans from a sequence of source arrays, as specified by an array
    of (start, end) indices. The output is a ragged array of the
--- a/spacy/ml/featureextractor.py
+++ b/spacy/ml/featureextractor.py
@ -6,9 +6,8 @@ from thinc.types import Ints2d
 from ..tokens import Doc


-def FeatureExtractor(
-    columns: Union[List[str], List[int], List[Union[int, str]]]
-) -> Model[List[Doc], List[Ints2d]]:
+@registry.layers("spacy.FeatureExtractor.v1")
+def FeatureExtractor(columns: List[Union[int, str]]) -> Model[List[Doc], List[Ints2d]]:
    return Model("extract_features", forward, attrs={"columns": columns})


--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@ -28,6 +28,7 @@ from ...vocab import Vocab
 from ..extract_spans import extract_spans


+@registry.architectures("spacy.EntityLinker.v2")
 def build_nel_encoder(
    tok2vec: Model, nO: Optional[int] = None
 ) -> Model[List[Doc], Floats2d]:
@ -91,6 +92,7 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab
    return out, lambda x: []


+@registry.misc("spacy.KBFromFile.v1")
 def load_kb(
    kb_path: Path,
 ) -> Callable[[Vocab], KnowledgeBase]:
@ -102,6 +104,7 @@ def load_kb(
    return kb_from_file


+@registry.misc("spacy.EmptyKB.v2")
 def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]:
    def empty_kb_factory(vocab: Vocab, entity_vector_length: int):
        return InMemoryLookupKB(vocab=vocab, entity_vector_length=entity_vector_length)
@ -109,6 +112,7 @@ def empty_kb_for_config() -> Callable[[Vocab, int], KnowledgeBase]:
    return empty_kb_factory


+@registry.misc("spacy.EmptyKB.v1")
 def empty_kb(
    entity_vector_length: int,
 ) -> Callable[[Vocab], KnowledgeBase]:
@ -118,10 +122,12 @@ def empty_kb(
    return empty_kb_factory


+@registry.misc("spacy.CandidateGenerator.v1")
 def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]:
    return get_candidates


+@registry.misc("spacy.CandidateBatchGenerator.v1")
 def create_candidates_batch() -> Callable[
    [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
 ]:
--- a/spacy/ml/models/multi_task.py
+++ b/spacy/ml/models/multi_task.py
@ -30,6 +30,7 @@ if TYPE_CHECKING:
    from ...vocab import Vocab  # noqa: F401


+@registry.architectures("spacy.PretrainVectors.v1")
 def create_pretrain_vectors(
    maxout_pieces: int, hidden_size: int, loss: str
 ) -> Callable[["Vocab", Model], Model]:
@ -56,6 +57,7 @@ def create_pretrain_vectors(
    return create_vectors_objective


+@registry.architectures("spacy.PretrainCharacters.v1")
 def create_pretrain_characters(
    maxout_pieces: int, hidden_size: int, n_characters: int
 ) -> Callable[["Vocab", Model], Model]:
--- a/spacy/ml/models/parser.py
+++ b/spacy/ml/models/parser.py
@ -11,6 +11,7 @@ from .._precomputable_affine import PrecomputableAffine
 from ..tb_framework import TransitionModel


+@registry.architectures("spacy.TransitionBasedParser.v2")
 def build_tb_parser_model(
    tok2vec: Model[List[Doc], List[Floats2d]],
    state_type: Literal["parser", "ner"],
--- a/spacy/ml/models/span_finder.py
+++ b/spacy/ml/models/span_finder.py
@ -10,6 +10,7 @@ InT = List[Doc]
 OutT = Floats2d


+@registry.architectures("spacy.SpanFinder.v1")
 def build_finder_model(
    tok2vec: Model[InT, List[Floats2d]], scorer: Model[OutT, OutT]
 ) -> Model[InT, OutT]:
--- a/spacy/ml/models/spancat.py
+++ b/spacy/ml/models/spancat.py
@ -22,6 +22,7 @@ from ...util import registry
 from ..extract_spans import extract_spans


+@registry.layers("spacy.LinearLogistic.v1")
 def build_linear_logistic(nO=None, nI=None) -> Model[Floats2d, Floats2d]:
    """An output layer for multi-label classification. It uses a linear layer
    followed by a logistic activation.
@ -29,6 +30,7 @@ def build_linear_logistic(nO=None, nI=None) -> Model[Floats2d, Floats2d]:
    return chain(Linear(nO=nO, nI=nI, init_W=glorot_uniform_init), Logistic())


+@registry.layers("spacy.mean_max_reducer.v1")
 def build_mean_max_reducer(hidden_size: int) -> Model[Ragged, Floats2d]:
    """Reduce sequences by concatenating their mean and max pooled vectors,
    and then combine the concatenated vectors with a hidden layer.
@ -44,6 +46,7 @@ def build_mean_max_reducer(hidden_size: int) -> Model[Ragged, Floats2d]:
    )


+@registry.architectures("spacy.SpanCategorizer.v1")
 def build_spancat_model(
    tok2vec: Model[List[Doc], List[Floats2d]],
    reducer: Model[Ragged, Floats2d],
--- a/spacy/ml/models/tagger.py
+++ b/spacy/ml/models/tagger.py
@ -7,6 +7,7 @@ from ...tokens import Doc
 from ...util import registry


+@registry.architectures("spacy.Tagger.v2")
 def build_tagger_model(
    tok2vec: Model[List[Doc], List[Floats2d]], nO: Optional[int] = None, normalize=False
 ) -> Model[List[Doc], List[Floats2d]]:
--- a/spacy/ml/models/textcat.py
+++ b/spacy/ml/models/textcat.py
@ -1,27 +1,21 @@
 from functools import partial
-from typing import List, Optional, Tuple, cast
+from typing import List, Optional, cast

 from thinc.api import (
    Dropout,
-    Gelu,
    LayerNorm,
    Linear,
    Logistic,
    Maxout,
    Model,
    ParametricAttention,
-    ParametricAttention_v2,
    Relu,
    Softmax,
    SparseLinear,
-    SparseLinear_v2,
    chain,
    clone,
    concatenate,
    list2ragged,
-    reduce_first,
-    reduce_last,
-    reduce_max,
    reduce_mean,
    reduce_sum,
    residual,
@ -31,10 +25,9 @@ from thinc.api import (
 )
 from thinc.layers.chain import init as init_chain
 from thinc.layers.resizable import resize_linear_weighted, resize_model
-from thinc.types import ArrayXd, Floats2d
+from thinc.types import Floats2d

 from ...attrs import ORTH
-from ...errors import Errors
 from ...tokens import Doc
 from ...util import registry
 from ..extract_ngrams import extract_ngrams
@ -44,6 +37,7 @@ from .tok2vec import get_tok2vec_width
 NEG_VALUE = -5000


+@registry.architectures("spacy.TextCatCNN.v2")
 def build_simple_cnn_text_classifier(
    tok2vec: Model, exclusive_classes: bool, nO: Optional[int] = None
 ) -> Model[List[Doc], Floats2d]:
@ -53,15 +47,39 @@ def build_simple_cnn_text_classifier(
    outputs sum to 1. If exclusive_classes=False, a logistic non-linearity
    is applied instead, so that outputs are in the range [0, 1].
    """
-    return build_reduce_text_classifier(
-        tok2vec=tok2vec,
-        exclusive_classes=exclusive_classes,
-        use_reduce_first=False,
-        use_reduce_last=False,
-        use_reduce_max=False,
-        use_reduce_mean=True,
-        nO=nO,
-    )
+    fill_defaults = {"b": 0, "W": 0}
+    with Model.define_operators({">>": chain}):
+        cnn = tok2vec >> list2ragged() >> reduce_mean()
+        nI = tok2vec.maybe_get_dim("nO")
+        if exclusive_classes:
+            output_layer = Softmax(nO=nO, nI=nI)
+            fill_defaults["b"] = NEG_VALUE
+            resizable_layer: Model = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer
+        else:
+            output_layer = Linear(nO=nO, nI=nI)
+            resizable_layer = resizable(
+                output_layer,
+                resize_layer=partial(
+                    resize_linear_weighted, fill_defaults=fill_defaults
+                ),
+            )
+            model = cnn >> resizable_layer >> Logistic()
+        model.set_ref("output_layer", output_layer)
+        model.attrs["resize_output"] = partial(
+            resize_and_set_ref,
+            resizable_layer=resizable_layer,
+        )
+    model.set_ref("tok2vec", tok2vec)
+    if nO is not None:
+        model.set_dim("nO", cast(int, nO))
+    model.attrs["multi_label"] = not exclusive_classes
+    return model


 def resize_and_set_ref(model, new_nO, resizable_layer):
@ -71,52 +89,16 @@ def resize_and_set_ref(model, new_nO, resizable_layer):
    return model


+@registry.architectures("spacy.TextCatBOW.v2")
 def build_bow_text_classifier(
    exclusive_classes: bool,
    ngram_size: int,
    no_output_layer: bool,
    nO: Optional[int] = None,
-) -> Model[List[Doc], Floats2d]:
-    return _build_bow_text_classifier(
-        exclusive_classes=exclusive_classes,
-        ngram_size=ngram_size,
-        no_output_layer=no_output_layer,
-        nO=nO,
-        sparse_linear=SparseLinear(nO=nO),
-    )
-
-
-def build_bow_text_classifier_v3(
-    exclusive_classes: bool,
-    ngram_size: int,
-    no_output_layer: bool,
-    length: int = 262144,
-    nO: Optional[int] = None,
-) -> Model[List[Doc], Floats2d]:
-    if length < 1:
-        raise ValueError(Errors.E1056.format(length=length))
-
-    # Find k such that 2**(k-1) < length <= 2**k.
-    length = 2 ** (length - 1).bit_length()
-
-    return _build_bow_text_classifier(
-        exclusive_classes=exclusive_classes,
-        ngram_size=ngram_size,
-        no_output_layer=no_output_layer,
-        nO=nO,
-        sparse_linear=SparseLinear_v2(nO=nO, length=length),
-    )
-
-
-def _build_bow_text_classifier(
-    exclusive_classes: bool,
-    ngram_size: int,
-    no_output_layer: bool,
-    sparse_linear: Model[Tuple[ArrayXd, ArrayXd, ArrayXd], ArrayXd],
-    nO: Optional[int] = None,
 ) -> Model[List[Doc], Floats2d]:
    fill_defaults = {"b": 0, "W": 0}
    with Model.define_operators({">>": chain}):
+        sparse_linear = SparseLinear(nO=nO)
        output_layer = None
        if not no_output_layer:
            fill_defaults["b"] = NEG_VALUE
@ -139,14 +121,12 @@ def _build_bow_text_classifier(
    return model


+@registry.architectures("spacy.TextCatEnsemble.v2")
 def build_text_classifier_v2(
    tok2vec: Model[List[Doc], List[Floats2d]],
    linear_model: Model[List[Doc], Floats2d],
    nO: Optional[int] = None,
 ) -> Model[List[Doc], Floats2d]:
-    # TODO: build the model with _build_parametric_attention_with_residual_nonlinear
-    # in spaCy v4. We don't do this in spaCy v3 to preserve model
-    # compatibility.
    exclusive_classes = not linear_model.attrs["multi_label"]
    with Model.define_operators({">>": chain, "|": concatenate}):
        width = tok2vec.maybe_get_dim("nO")
@ -181,11 +161,6 @@ def build_text_classifier_v2(


 def init_ensemble_textcat(model, X, Y) -> Model:
-    # When tok2vec is lazily initialized, we need to initialize it before
-    # the rest of the chain to ensure that we can get its width.
-    tok2vec = model.get_ref("tok2vec")
-    tok2vec.initialize(X)
-
    tok2vec_width = get_tok2vec_width(model)
    model.get_ref("attention_layer").set_dim("nO", tok2vec_width)
    model.get_ref("maxout_layer").set_dim("nO", tok2vec_width)
@ -196,6 +171,7 @@ def init_ensemble_textcat(model, X, Y) -> Model:
    return model


+@registry.architectures("spacy.TextCatLowData.v1")
 def build_text_classifier_lowdata(
    width: int, dropout: Optional[float], nO: Optional[int] = None
 ) -> Model[List[Doc], Floats2d]:
@ -214,151 +190,3 @@ def build_text_classifier_lowdata(
            model = model >> Dropout(dropout)
        model = model >> Logistic()
    return model
-
-
-def build_textcat_parametric_attention_v1(
-    tok2vec: Model[List[Doc], List[Floats2d]],
-    exclusive_classes: bool,
-    nO: Optional[int] = None,
-) -> Model[List[Doc], Floats2d]:
-    width = tok2vec.maybe_get_dim("nO")
-    parametric_attention = _build_parametric_attention_with_residual_nonlinear(
-        tok2vec=tok2vec,
-        nonlinear_layer=Maxout(nI=width, nO=width),
-        key_transform=Gelu(nI=width, nO=width),
-    )
-    with Model.define_operators({">>": chain}):
-        if exclusive_classes:
-            output_layer = Softmax(nO=nO)
-        else:
-            output_layer = Linear(nO=nO) >> Logistic()
-        model = parametric_attention >> output_layer
-    if model.has_dim("nO") is not False and nO is not None:
-        model.set_dim("nO", cast(int, nO))
-    model.set_ref("output_layer", output_layer)
-    model.attrs["multi_label"] = not exclusive_classes
-
-    return model
-
-
-def _build_parametric_attention_with_residual_nonlinear(
-    *,
-    tok2vec: Model[List[Doc], List[Floats2d]],
-    nonlinear_layer: Model[Floats2d, Floats2d],
-    key_transform: Optional[Model[Floats2d, Floats2d]] = None,
-) -> Model[List[Doc], Floats2d]:
-    with Model.define_operators({">>": chain, "|": concatenate}):
-        width = tok2vec.maybe_get_dim("nO")
-        attention_layer = ParametricAttention_v2(nO=width, key_transform=key_transform)
-        norm_layer = LayerNorm(nI=width)
-        parametric_attention = (
-            tok2vec
-            >> list2ragged()
-            >> attention_layer
-            >> reduce_sum()
-            >> residual(nonlinear_layer >> norm_layer >> Dropout(0.0))
-        )
-
-        parametric_attention.init = _init_parametric_attention_with_residual_nonlinear
-
-        parametric_attention.set_ref("tok2vec", tok2vec)
-        parametric_attention.set_ref("attention_layer", attention_layer)
-        parametric_attention.set_ref("key_transform", key_transform)
-        parametric_attention.set_ref("nonlinear_layer", nonlinear_layer)
-        parametric_attention.set_ref("norm_layer", norm_layer)
-
-        return parametric_attention
-
-
-def _init_parametric_attention_with_residual_nonlinear(model, X, Y) -> Model:
-    # When tok2vec is lazily initialized, we need to initialize it before
-    # the rest of the chain to ensure that we can get its width.
-    tok2vec = model.get_ref("tok2vec")
-    tok2vec.initialize(X)
-
-    tok2vec_width = get_tok2vec_width(model)
-    model.get_ref("attention_layer").set_dim("nO", tok2vec_width)
-    model.get_ref("key_transform").set_dim("nI", tok2vec_width)
-    model.get_ref("key_transform").set_dim("nO", tok2vec_width)
-    model.get_ref("nonlinear_layer").set_dim("nI", tok2vec_width)
-    model.get_ref("nonlinear_layer").set_dim("nO", tok2vec_width)
-    model.get_ref("norm_layer").set_dim("nI", tok2vec_width)
-    model.get_ref("norm_layer").set_dim("nO", tok2vec_width)
-    init_chain(model, X, Y)
-    return model
-
-
-def build_reduce_text_classifier(
-    tok2vec: Model,
-    exclusive_classes: bool,
-    use_reduce_first: bool,
-    use_reduce_last: bool,
-    use_reduce_max: bool,
-    use_reduce_mean: bool,
-    nO: Optional[int] = None,
-) -> Model[List[Doc], Floats2d]:
-    """Build a model that classifies pooled `Doc` representations.
-
-    Pooling is performed using reductions. Reductions are concatenated when
-    multiple reductions are used.
-
-    tok2vec (Model): the tok2vec layer to pool over.
-    exclusive_classes (bool): Whether or not classes are mutually exclusive.
-    use_reduce_first (bool): Pool by using the hidden representation of the
-        first token of a `Doc`.
-    use_reduce_last (bool): Pool by using the hidden representation of the
-        last token of a `Doc`.
-    use_reduce_max (bool): Pool by taking the maximum values of the hidden
-        representations of a `Doc`.
-    use_reduce_mean (bool): Pool by taking the mean of all hidden
-        representations of a `Doc`.
-    nO (Optional[int]): Number of classes.
-    """
-
-    fill_defaults = {"b": 0, "W": 0}
-    reductions = []
-    if use_reduce_first:
-        reductions.append(reduce_first())
-    if use_reduce_last:
-        reductions.append(reduce_last())
-    if use_reduce_max:
-        reductions.append(reduce_max())
-    if use_reduce_mean:
-        reductions.append(reduce_mean())
-
-    if not len(reductions):
-        raise ValueError(Errors.E1057)
-
-    with Model.define_operators({">>": chain}):
-        cnn = tok2vec >> list2ragged() >> concatenate(*reductions)
-        nO_tok2vec = tok2vec.maybe_get_dim("nO")
-        nI = nO_tok2vec * len(reductions) if nO_tok2vec is not None else None
-        if exclusive_classes:
-            output_layer = Softmax(nO=nO, nI=nI)
-            fill_defaults["b"] = NEG_VALUE
-            resizable_layer: Model = resizable(
-                output_layer,
-                resize_layer=partial(
-                    resize_linear_weighted, fill_defaults=fill_defaults
-                ),
-            )
-            model = cnn >> resizable_layer
-        else:
-            output_layer = Linear(nO=nO, nI=nI)
-            resizable_layer = resizable(
-                output_layer,
-                resize_layer=partial(
-                    resize_linear_weighted, fill_defaults=fill_defaults
-                ),
-            )
-            model = cnn >> resizable_layer >> Logistic()
-        model.set_ref("output_layer", output_layer)
-        model.attrs["resize_output"] = partial(
-            resize_and_set_ref,
-            resizable_layer=resizable_layer,
-        )
-    model.set_ref("tok2vec", tok2vec)
-    if nO is not None:
-        model.set_dim("nO", cast(int, nO))
-    model.attrs["multi_label"] = not exclusive_classes
-    return model
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -29,6 +29,7 @@ from ..featureextractor import FeatureExtractor
 from ..staticvectors import StaticVectors


+@registry.architectures("spacy.Tok2VecListener.v1")
 def tok2vec_listener_v1(width: int, upstream: str = "*"):
    tok2vec = Tok2VecListener(upstream_name=upstream, width=width)
    return tok2vec
@ -45,6 +46,7 @@ def get_tok2vec_width(model: Model):
    return nO


+@registry.architectures("spacy.HashEmbedCNN.v2")
 def build_hash_embed_cnn_tok2vec(
    *,
    width: int,
@ -100,6 +102,7 @@ def build_hash_embed_cnn_tok2vec(
    )


+@registry.architectures("spacy.Tok2Vec.v2")
 def build_Tok2Vec_model(
    embed: Model[List[Doc], List[Floats2d]],
    encode: Model[List[Floats2d], List[Floats2d]],
@ -120,9 +123,10 @@ def build_Tok2Vec_model(
    return tok2vec


+@registry.architectures("spacy.MultiHashEmbed.v2")
 def MultiHashEmbed(
    width: int,
-    attrs: Union[List[str], List[int], List[Union[str, int]]],
+    attrs: List[Union[str, int]],
    rows: List[int],
    include_static_vectors: bool,
 ) -> Model[List[Doc], List[Floats2d]]:
@ -188,7 +192,7 @@ def MultiHashEmbed(
        )
    else:
        model = chain(
-            FeatureExtractor(attrs),
+            FeatureExtractor(list(attrs)),
            cast(Model[List[Ints2d], Ragged], list2ragged()),
            with_array(concatenate(*embeddings)),
            max_out,
@ -197,6 +201,7 @@ def MultiHashEmbed(
    return model


+@registry.architectures("spacy.CharacterEmbed.v2")
 def CharacterEmbed(
    width: int,
    rows: int,
@ -273,6 +278,7 @@ def CharacterEmbed(
    return model


+@registry.architectures("spacy.MaxoutWindowEncoder.v2")
 def MaxoutWindowEncoder(
    width: int, window_size: int, maxout_pieces: int, depth: int
 ) -> Model[List[Floats2d], List[Floats2d]]:
@ -304,6 +310,7 @@ def MaxoutWindowEncoder(
    return with_array(model, pad=receptive_field)


+@registry.architectures("spacy.MishWindowEncoder.v2")
 def MishWindowEncoder(
    width: int, window_size: int, depth: int
 ) -> Model[List[Floats2d], List[Floats2d]]:
@ -326,6 +333,7 @@ def MishWindowEncoder(
    return with_array(model)


+@registry.architectures("spacy.TorchBiLSTMEncoder.v1")
 def BiLSTMEncoder(
    width: int, depth: int, dropout: float
 ) -> Model[List[Floats2d], List[Floats2d]]:
--- a/spacy/ml/parser_model.pyx
+++ b/spacy/ml/parser_model.pyx
@ -52,14 +52,14 @@ cdef SizesC get_c_sizes(model, int batch_size) except *:
    return output


-cdef ActivationsC alloc_activations(SizesC n) noexcept nogil:
+cdef ActivationsC alloc_activations(SizesC n) nogil:
    cdef ActivationsC A
    memset(&A, 0, sizeof(A))
    resize_activations(&A, n)
    return A


-cdef void free_activations(const ActivationsC* A) noexcept nogil:
+cdef void free_activations(const ActivationsC* A) nogil:
    free(A.token_ids)
    free(A.scores)
    free(A.unmaxed)
@ -67,7 +67,7 @@ cdef void free_activations(const ActivationsC* A) noexcept nogil:
    free(A.is_valid)


-cdef void resize_activations(ActivationsC* A, SizesC n) noexcept nogil:
+cdef void resize_activations(ActivationsC* A, SizesC n) nogil:
    if n.states <= A._max_size:
        A._curr_size = n.states
        return
@ -100,7 +100,7 @@ cdef void resize_activations(ActivationsC* A, SizesC n) noexcept nogil:

 cdef void predict_states(
    CBlas cblas, ActivationsC* A, StateC** states, const WeightsC* W, SizesC n
-) noexcept nogil:
+) nogil:
    resize_activations(A, n)
    for i in range(n.states):
        states[i].set_context_tokens(&A.token_ids[i*n.feats], n.feats)
@ -159,7 +159,7 @@ cdef void sum_state_features(
    int B,
    int F,
    int O
-) noexcept nogil:
+) nogil:
    cdef int idx, b, f
    cdef const float* feature
    padding = cached
@ -183,7 +183,7 @@ cdef void cpu_log_loss(
    const int* is_valid,
    const float* scores,
    int O
-) noexcept nogil:
+) nogil:
    """Do multi-label log loss"""
    cdef double max_, gmax, Z, gZ
    best = arg_max_if_gold(scores, costs, is_valid, O)
@ -209,7 +209,7 @@ cdef void cpu_log_loss(

 cdef int arg_max_if_gold(
    const weight_t* scores, const weight_t* costs, const int* is_valid, int n
-) noexcept nogil:
+) nogil:
    # Find minimum cost
    cdef float cost = 1
    for i in range(n):
@ -224,7 +224,7 @@ cdef int arg_max_if_gold(
    return best


-cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) noexcept nogil:
+cdef int arg_max_if_valid(const weight_t* scores, const int* is_valid, int n) nogil:
    cdef int best = -1
    for i in range(n):
        if is_valid[i] >= 1:
--- a/spacy/ml/staticvectors.py
+++ b/spacy/ml/staticvectors.py
@ -13,6 +13,7 @@ from ..vectors import Mode, Vectors
 from ..vocab import Vocab


+@registry.layers("spacy.StaticVectors.v2")
 def StaticVectors(
    nO: Optional[int] = None,
    nM: Optional[int] = None,
--- a/spacy/ml/tb_framework.py
+++ b/spacy/ml/tb_framework.py
@ -4,6 +4,7 @@ from ..util import registry
 from .parser_model import ParserStepModel


+@registry.layers("spacy.TransitionModel.v1")
 def TransitionModel(
    tok2vec, lower, upper, resize_output, dropout=0.2, unseen_classes=set()
 ):
--- a/Show More
+++ b/Show More
				`@ -1 +0,0 @@`
				`custom: [https://explosion.ai/merch, https://explosion.ai/tailored-solutions]`