From f40ceb53789f52e986e367e7c9dbbc6394b60d2b Mon Sep 17 00:00:00 2001 From: OMOTAYO OMOYEMI <58476114+tayo4christ@users.noreply.github.com> Date: Tue, 28 Oct 2025 08:41:50 +0000 Subject: [PATCH 01/28] docs(website): remove spaCy Quickstart from Universe/Courses due to spam redirect (fixes #13853) (#13877) --- website/meta/universe.json | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index b7842bddc..1f55d9616 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -2739,20 +2739,7 @@ "courses" ] }, - { - "type": "education", - "id": "spacy-quickstart", - "title": "spaCy Quickstart", - "slogan": "Learn spaCy basics quickly by visualizing various Doc objects", - "description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.", - "url": "https://learnspacy.com/courses/spacy-quickstart/", - "image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png", - "thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png", - "author": "Aravind Mohanoor", - "category": [ - "courses" - ] - }, + { "type": "education", "id": "video-spacys-ner-model", From f5d04868e1e66d0acd6417b1c8099bcd4068fff7 Mon Sep 17 00:00:00 2001 From: Matthew Hernandez Date: Tue, 28 Oct 2025 02:42:23 -0600 Subject: [PATCH 02/28] Update _util.py to fix Deprecation Warning (#13844) Fixes issue 13843 involving a Deprecation Warning with the python package Click. --- spacy/cli/_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index fa41e6a08..1b42b5254 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -20,7 +20,7 @@ from typing import ( import srsly import typer from click import NoSuchOption -from click.parser import split_arg_string +from click.shell_completion import split_arg_string from thinc.api import Config, ConfigValidationError, require_gpu from thinc.util import gpu_is_available from typer.main import get_command From 94d6be8a9b1dbecc92820bd7996cd8c75e186320 Mon Sep 17 00:00:00 2001 From: Marwan Mohammed Sayed <155132957+MarwanMohammed2500@users.noreply.github.com> Date: Tue, 28 Oct 2025 11:43:08 +0300 Subject: [PATCH 03/28] Fixed the import issue in displacy/__init__.py (#13876) IPython deprecated IPython.core.display.display. The new one became IPython.display.display. So, I just fixed the import issue based on the new changes in IPython --- spacy/displacy/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index bde2d04fe..4651e5212 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -66,7 +66,8 @@ def render( if jupyter or (jupyter is None and is_in_jupyter()): # return HTML rendered by IPython display() # See #4840 for details on span wrapper to disable mathjax - from IPython.core.display import HTML, display + from IPython.core.display import HTML + from IPython.display import display return display(HTML('{}'.format(html))) return html From 68679d6f853d4ee37aaa3f572df9acbbc95180d4 Mon Sep 17 00:00:00 2001 From: "Etienne.bfx" Date: Tue, 28 Oct 2025 09:43:47 +0100 Subject: [PATCH 04/28] Add custom download URL (#13848) * add download url * Update .pre-commit-config.yaml * Update cli.mdx --------- Co-authored-by: ebonnafoux --- spacy/cli/download.py | 12 ++++++++---- website/docs/api/cli.mdx | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 4261fb830..8ab6a2997 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -29,6 +29,7 @@ def download_cli( model: str = Arg(..., help="Name of pipeline package to download"), direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"), sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"), + url: str = Opt(None, "--url", "-U", help="Download from given url") # fmt: on ): """ @@ -41,13 +42,14 @@ def download_cli( DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models """ - download(model, direct, sdist, *ctx.args) + download(model, direct, sdist, custom_url=url, *ctx.args) def download( model: str, direct: bool = False, sdist: bool = False, + custom_url: Optional[str] = None, *pip_args, ) -> None: if ( @@ -87,7 +89,7 @@ def download( filename = get_model_filename(model_name, version, sdist) - download_model(filename, pip_args) + download_model(filename, pip_args, custom_url) msg.good( "Download and installation successful", f"You can now load the package via spacy.load('{model_name}')", @@ -159,12 +161,14 @@ def get_latest_version(model: str) -> str: def download_model( - filename: str, user_pip_args: Optional[Sequence[str]] = None + filename: str, + user_pip_args: Optional[Sequence[str]] = None, + custom_url: Optional[str] = None, ) -> None: # Construct the download URL carefully. We need to make sure we don't # allow relative paths or other shenanigans to trick us into download # from outside our own repo. - base_url = about.__download_url__ + base_url = custom_url if custom_url else about.__download_url__ # urljoin requires that the path ends with /, or the last path part will be dropped if not base_url.endswith("/"): base_url = about.__download_url__ + "/" diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx index 09a978259..add6b1446 100644 --- a/website/docs/api/cli.mdx +++ b/website/docs/api/cli.mdx @@ -47,7 +47,7 @@ pipeline name to be specified with its version (e.g. `en_core_web_sm-3.0.0`). > project. ```bash -$ python -m spacy download [model] [--direct] [--sdist] [pip_args] +$ python -m spacy download [model] [--direct] [--sdist] [pip_args] [--url url] ``` | Name | Description | @@ -58,6 +58,7 @@ $ python -m spacy download [model] [--direct] [--sdist] [pip_args] | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | pip args | Additional installation options to be passed to `pip install` when installing the pipeline package. For example, `--user` to install to the user home directory or `--no-deps` to not install package dependencies. ~~Any (option/flag)~~ | | **CREATES** | The installed pipeline package in your `site-packages` directory. | +| `--url`, `-U` | Download from a mirror repository at the given url | ## info {id="info",tag="command"} From 54f54fc4cc23edfd143d5895664f7ce0b879255f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 4 Nov 2025 15:19:09 +0100 Subject: [PATCH 05/28] Reformat with black 25 --- spacy/cli/_util.py | 6 +- spacy/cli/debug_data.py | 6 +- spacy/cli/find_threshold.py | 22 +- spacy/cli/init_config.py | 8 +- spacy/compat.py | 1 + spacy/displacy/__init__.py | 1 + spacy/lang/am/examples.py | 1 - spacy/lang/az/examples.py | 1 - spacy/lang/bg/stop_words.py | 1 + spacy/lang/bn/examples.py | 1 - spacy/lang/bo/examples.py | 1 - spacy/lang/ca/examples.py | 1 - spacy/lang/char_classes.py | 6 +- spacy/lang/cs/examples.py | 1 - spacy/lang/da/tokenizer_exceptions.py | 1 + spacy/lang/de/examples.py | 1 - spacy/lang/dsb/examples.py | 1 - spacy/lang/en/examples.py | 1 - spacy/lang/es/examples.py | 1 - spacy/lang/fa/examples.py | 1 - spacy/lang/fi/tokenizer_exceptions.py | 4 +- spacy/lang/fr/examples.py | 1 - spacy/lang/grc/examples.py | 1 - spacy/lang/gu/examples.py | 1 - spacy/lang/he/examples.py | 1 - spacy/lang/hi/examples.py | 1 - spacy/lang/hsb/examples.py | 1 - spacy/lang/ht/__init__.py | 3 + spacy/lang/ht/examples.py | 1 - spacy/lang/ht/lex_attrs.py | 3 + spacy/lang/ht/punctuation.py | 61 +++--- spacy/lang/ht/stop_words.py | 3 +- spacy/lang/ht/tag_map.py | 20 +- spacy/lang/ht/tokenizer_exceptions.py | 195 +++++++++--------- spacy/lang/hu/examples.py | 1 - spacy/lang/hu/punctuation.py | 2 +- spacy/lang/hy/examples.py | 1 - spacy/lang/id/examples.py | 1 - spacy/lang/it/examples.py | 1 - spacy/lang/ja/__init__.py | 6 +- spacy/lang/ja/examples.py | 1 - spacy/lang/ja/tag_map.py | 8 +- spacy/lang/kn/examples.py | 1 - spacy/lang/lij/examples.py | 1 - spacy/lang/lt/examples.py | 1 - spacy/lang/ml/examples.py | 1 - spacy/lang/ms/examples.py | 1 - spacy/lang/nb/examples.py | 1 - spacy/lang/ne/examples.py | 1 - spacy/lang/nl/examples.py | 1 - spacy/lang/nn/examples.py | 1 - spacy/lang/pl/examples.py | 1 - spacy/lang/pt/examples.py | 1 - spacy/lang/ro/examples.py | 1 - spacy/lang/ru/examples.py | 1 - spacy/lang/sa/examples.py | 1 - spacy/lang/si/examples.py | 1 - spacy/lang/sk/examples.py | 1 - spacy/lang/sl/examples.py | 1 - spacy/lang/sq/examples.py | 1 - spacy/lang/sr/examples.py | 1 - spacy/lang/sv/examples.py | 1 - spacy/lang/ta/examples.py | 1 - spacy/lang/te/examples.py | 1 - spacy/lang/ti/examples.py | 1 - spacy/lang/tn/examples.py | 1 - spacy/lang/tr/examples.py | 1 - spacy/lang/uk/examples.py | 1 - spacy/lang/ur/examples.py | 1 - spacy/lang/vi/examples.py | 1 - spacy/language.py | 14 +- spacy/matcher/dependencymatcher.pyi | 4 +- spacy/ml/featureextractor.py | 2 +- spacy/ml/models/entity_linker.py | 6 +- spacy/pipeline/edit_tree_lemmatizer.py | 2 +- spacy/pipeline/spancat.py | 3 +- spacy/registrations.py | 1 + spacy/tests/doc/test_doc_api.py | 6 +- spacy/tests/lang/ht/test_exceptions.py | 14 +- .../tests/lang/ht/test_prefix_suffix_infix.py | 4 +- spacy/tests/lang/ht/test_text.py | 7 +- spacy/tests/lang/hu/test_tokenizer.py | 8 +- spacy/tests/matcher/test_matcher_logic.py | 2 +- spacy/tests/pipeline/test_entity_linker.py | 12 +- spacy/tests/pipeline/test_pipe_factories.py | 18 +- spacy/tests/pipeline/test_textcat.py | 3 +- spacy/tests/test_cli.py | 2 +- spacy/tokens/doc.pyi | 8 +- spacy/tokens/span.pyi | 8 +- spacy/tokens/token.pyi | 8 +- spacy/training/loop.py | 2 +- spacy/ty.py | 18 +- spacy/util.py | 2 +- 93 files changed, 277 insertions(+), 285 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 1b42b5254..309b6b1e7 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -225,13 +225,11 @@ def get_git_version( @overload -def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]: - ... +def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]: ... @overload -def string_to_list(value: str, intify: Literal[True]) -> List[int]: - ... +def string_to_list(value: str, intify: Literal[True]) -> List[int]: ... def string_to_list(value: str, intify: bool = False) -> Union[List[str], List[int]]: diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index af3c24f3b..1c9c0e0ea 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -968,16 +968,14 @@ def _compile_gold( @overload -def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str: - ... +def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str: ... @overload def _format_labels( labels: Iterable[Tuple[str, int]], counts: Literal[True], -) -> str: - ... +) -> str: ... def _format_labels( diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py index 3e86495e7..ff7af32e6 100644 --- a/spacy/cli/find_threshold.py +++ b/spacy/cli/find_threshold.py @@ -157,9 +157,11 @@ def find_threshold( exits=1, ) return { - keys[0]: filter_config(config[keys[0]], keys[1:], full_key) - if len(keys) > 1 - else config[keys[0]] + keys[0]: ( + filter_config(config[keys[0]], keys[1:], full_key) + if len(keys) > 1 + else config[keys[0]] + ) } # Evaluate with varying threshold values. @@ -216,12 +218,14 @@ def find_threshold( if len(set(scores.values())) == 1: wasabi.msg.warn( title="All scores are identical. Verify that all settings are correct.", - text="" - if ( - not isinstance(pipe, MultiLabel_TextCategorizer) - or scores_key in ("cats_macro_f", "cats_micro_f") - ) - else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`.", + text=( + "" + if ( + not isinstance(pipe, MultiLabel_TextCategorizer) + or scores_key in ("cats_macro_f", "cats_micro_f") + ) + else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`." + ), ) else: diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index a7c03d00f..a7fb2b5b8 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -195,9 +195,11 @@ def init_config( "Pipeline": ", ".join(pipeline), "Optimize for": optimize, "Hardware": variables["hardware"].upper(), - "Transformer": template_vars.transformer.get("name") # type: ignore[attr-defined] - if template_vars.use_transformer # type: ignore[attr-defined] - else None, + "Transformer": ( + template_vars.transformer.get("name") # type: ignore[attr-defined] + if template_vars.use_transformer # type: ignore[attr-defined] + else None + ), } msg.info("Generated config template specific for your use case") for label, value in use_case.items(): diff --git a/spacy/compat.py b/spacy/compat.py index 522fa30dd..a9e7d5a20 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -1,4 +1,5 @@ """Helpers for Python and platform compatibility.""" + import sys from thinc.util import copy_array diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 4651e5212..55474734a 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -4,6 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities. DOCS: https://spacy.io/api/top-level#displacy USAGE: https://spacy.io/usage/visualizers """ + import warnings from typing import Any, Callable, Dict, Iterable, Optional, Union diff --git a/spacy/lang/am/examples.py b/spacy/lang/am/examples.py index 253d32d1d..b156cb84f 100644 --- a/spacy/lang/am/examples.py +++ b/spacy/lang/am/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "አፕል የዩኬን ጅምር ድርጅት በ 1 ቢሊዮን ዶላር ለመግዛት አስቧል።", "የራስ ገዝ መኪኖች የኢንሹራንስ ኃላፊነትን ወደ አምራቾች ያዛውራሉ", diff --git a/spacy/lang/az/examples.py b/spacy/lang/az/examples.py index f3331a8cb..df5e3521d 100644 --- a/spacy/lang/az/examples.py +++ b/spacy/lang/az/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Bu bir cümlədir.", "Necəsən?", diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py index df708b65e..061850da5 100644 --- a/spacy/lang/bg/stop_words.py +++ b/spacy/lang/bg/stop_words.py @@ -3,6 +3,7 @@ References: https://github.com/Alir3z4/stop-words - Original list, serves as a base. https://postvai.com/books/stop-dumi.pdf - Additions to the original list in order to improve it. """ + STOP_WORDS = set( """ а автентичен аз ако ала diff --git a/spacy/lang/bn/examples.py b/spacy/lang/bn/examples.py index c3be4c556..11a65acb1 100644 --- a/spacy/lang/bn/examples.py +++ b/spacy/lang/bn/examples.py @@ -5,5 +5,4 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = ["তুই খুব ভালো", "আজ আমরা ডাক্তার দেখতে যাবো", "আমি জানি না "] diff --git a/spacy/lang/bo/examples.py b/spacy/lang/bo/examples.py index 8ed9372ec..8655f2d9d 100644 --- a/spacy/lang/bo/examples.py +++ b/spacy/lang/bo/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "དོན་དུ་རྒྱ་མཚོ་བླ་མ་ཞེས་བྱ་ཞིང༌།", "ཏཱ་ལའི་ཞེས་པ་ནི་སོག་སྐད་ཡིན་པ་དེ་བོད་སྐད་དུ་རྒྱ་མཚོའི་དོན་དུ་འཇུག", diff --git a/spacy/lang/ca/examples.py b/spacy/lang/ca/examples.py index ae6aa3e24..de54c05ce 100644 --- a/spacy/lang/ca/examples.py +++ b/spacy/lang/ca/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple està buscant comprar una startup del Regne Unit per mil milions de dòlars", "Els cotxes autònoms deleguen la responsabilitat de l'assegurança als seus fabricants", diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index 37c58c85f..69e752c91 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -277,10 +277,10 @@ _currency = ( # These expressions contain various unicode variations, including characters # used in Chinese (see #1333, #1340, #1351) – unless there are cross-language # conflicts, spaCy's base tokenizer should handle all of those by default -_punct = ( - r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪" +_punct = r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪" +_quotes = ( + r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧' ) -_quotes = r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧' _hyphens = "- – — -- --- —— ~" # Various symbols like dingbats, but also emoji diff --git a/spacy/lang/cs/examples.py b/spacy/lang/cs/examples.py index a30b5ac14..35d86dde7 100644 --- a/spacy/lang/cs/examples.py +++ b/spacy/lang/cs/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Máma mele maso.", "Příliš žluťoučký kůň úpěl ďábelské ódy.", diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py index 649d12022..15a943ad6 100644 --- a/spacy/lang/da/tokenizer_exceptions.py +++ b/spacy/lang/da/tokenizer_exceptions.py @@ -2,6 +2,7 @@ Tokenizer Exceptions. Source: https://forkortelse.dk/ and various others. """ + from ...symbols import NORM, ORTH from ...util import update_exc from ..tokenizer_exceptions import BASE_EXCEPTIONS diff --git a/spacy/lang/de/examples.py b/spacy/lang/de/examples.py index 735d1c316..30b8f195b 100644 --- a/spacy/lang/de/examples.py +++ b/spacy/lang/de/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Die ganze Stadt ist ein Startup: Shenzhen ist das Silicon Valley für Hardware-Firmen", "Wie deutsche Startups die Technologie vorantreiben wollen: Künstliche Intelligenz", diff --git a/spacy/lang/dsb/examples.py b/spacy/lang/dsb/examples.py index 6e9143826..11ecbddb2 100644 --- a/spacy/lang/dsb/examples.py +++ b/spacy/lang/dsb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Z tym stwori so wuměnjenje a zakład za dalše wobdźěłanje přez analyzu tekstoweje struktury a semantisku anotaciju a z tym tež za tu předstajenu digitalnu online-wersiju.", "Mi so tu jara derje spodoba.", diff --git a/spacy/lang/en/examples.py b/spacy/lang/en/examples.py index 2cca9e05f..7ed0ba0c1 100644 --- a/spacy/lang/en/examples.py +++ b/spacy/lang/en/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple is looking at buying U.K. startup for $1 billion", "Autonomous cars shift insurance liability toward manufacturers", diff --git a/spacy/lang/es/examples.py b/spacy/lang/es/examples.py index e4dfbcb6d..653a38bfd 100644 --- a/spacy/lang/es/examples.py +++ b/spacy/lang/es/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple está buscando comprar una startup del Reino Unido por mil millones de dólares.", "Los coches autónomos delegan la responsabilidad del seguro en sus fabricantes.", diff --git a/spacy/lang/fa/examples.py b/spacy/lang/fa/examples.py index 9c6fb0345..6810e48d5 100644 --- a/spacy/lang/fa/examples.py +++ b/spacy/lang/fa/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "این یک جمله نمونه می باشد.", "قرار ما، امروز ساعت ۲:۳۰ بعدازظهر هست!", diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py index 881d5b91d..0bbd7bd91 100644 --- a/spacy/lang/fi/tokenizer_exceptions.py +++ b/spacy/lang/fi/tokenizer_exceptions.py @@ -100,9 +100,9 @@ conj_contraction_negations = [ ("eivat", "eivät"), ("eivät", "eivät"), ] -for (base_lower, base_norm) in conj_contraction_bases: +for base_lower, base_norm in conj_contraction_bases: for base in [base_lower, base_lower.title()]: - for (suffix, suffix_norm) in conj_contraction_negations: + for suffix, suffix_norm in conj_contraction_negations: _exc[base + suffix] = [ {ORTH: base, NORM: base_norm}, {ORTH: suffix, NORM: suffix_norm}, diff --git a/spacy/lang/fr/examples.py b/spacy/lang/fr/examples.py index a74a62204..759de5615 100644 --- a/spacy/lang/fr/examples.py +++ b/spacy/lang/fr/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple cherche à acheter une start-up anglaise pour 1 milliard de dollars", "Les voitures autonomes déplacent la responsabilité de l'assurance vers les constructeurs", diff --git a/spacy/lang/grc/examples.py b/spacy/lang/grc/examples.py index 9c0bcb265..51ec8f8cc 100644 --- a/spacy/lang/grc/examples.py +++ b/spacy/lang/grc/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ἐρᾷ μὲν ἁγνὸς οὐρανὸς τρῶσαι χθόνα, ἔρως δὲ γαῖαν λαμβάνει γάμου τυχεῖν·", "εὐδαίμων Χαρίτων καὶ Μελάνιππος ἔφυ, θείας ἁγητῆρες ἐφαμερίοις φιλότατος.", diff --git a/spacy/lang/gu/examples.py b/spacy/lang/gu/examples.py index 1cf75fd32..e67b7ba9d 100644 --- a/spacy/lang/gu/examples.py +++ b/spacy/lang/gu/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "લોકશાહી એ સરકારનું એક એવું તંત્ર છે જ્યાં નાગરિકો મત દ્વારા સત્તાનો ઉપયોગ કરે છે.", "તે ગુજરાત રાજ્યના ધરમપુર શહેરમાં આવેલું હતું", diff --git a/spacy/lang/he/examples.py b/spacy/lang/he/examples.py index d54d2a145..ee484e07b 100644 --- a/spacy/lang/he/examples.py +++ b/spacy/lang/he/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "סין מקימה קרן של 440 מיליון דולר להשקעה בהייטק בישראל", 'רה"מ הודיע כי יחרים טקס בחסותו', diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py index 1443b4908..f3196c58f 100644 --- a/spacy/lang/hi/examples.py +++ b/spacy/lang/hi/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है।", "स्वायत्त कारें निर्माताओं की ओर बीमा दायित्व रखतीं हैं।", diff --git a/spacy/lang/hsb/examples.py b/spacy/lang/hsb/examples.py index 21f6f7584..754011c6f 100644 --- a/spacy/lang/hsb/examples.py +++ b/spacy/lang/hsb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "To běšo wjelgin raźone a jo se wót luźi derje pśiwzeło. Tak som dožywiła wjelgin", "Jogo pśewóźowarce stej groniłej, až how w serbskich stronach njama Santa Claus nic pytaś.", diff --git a/spacy/lang/ht/__init__.py b/spacy/lang/ht/__init__.py index e5c1c2770..9fc2df40c 100644 --- a/spacy/lang/ht/__init__.py +++ b/spacy/lang/ht/__init__.py @@ -22,10 +22,12 @@ class HaitianCreoleDefaults(BaseDefaults): stop_words = STOP_WORDS tag_map = TAG_MAP + class HaitianCreole(Language): lang = "ht" Defaults = HaitianCreoleDefaults + @HaitianCreole.factory( "lemmatizer", assigns=["token.lemma"], @@ -49,4 +51,5 @@ def make_lemmatizer( nlp.vocab, model, name, mode=mode, overwrite=overwrite, scorer=scorer ) + __all__ = ["HaitianCreole"] diff --git a/spacy/lang/ht/examples.py b/spacy/lang/ht/examples.py index 456d34a5f..0afeb19c8 100644 --- a/spacy/lang/ht/examples.py +++ b/spacy/lang/ht/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple ap panse achte yon demaraj nan Wayòm Ini pou $1 milya dola", "Machin otonòm fè responsablite asirans lan ale sou men fabrikan yo", diff --git a/spacy/lang/ht/lex_attrs.py b/spacy/lang/ht/lex_attrs.py index 8a3ec1ff9..ab1a39a82 100644 --- a/spacy/lang/ht/lex_attrs.py +++ b/spacy/lang/ht/lex_attrs.py @@ -49,6 +49,7 @@ NORM_MAP = { "P": "Pa", } + def like_num(text): text = text.strip().lower() if text.startswith(("+", "-", "±", "~")): @@ -69,9 +70,11 @@ def like_num(text): return True return False + def norm_custom(text): return NORM_MAP.get(text, text.lower()) + LEX_ATTRS = { LIKE_NUM: like_num, NORM: norm_custom, diff --git a/spacy/lang/ht/punctuation.py b/spacy/lang/ht/punctuation.py index 61d88d6e1..0077db1c0 100644 --- a/spacy/lang/ht/punctuation.py +++ b/spacy/lang/ht/punctuation.py @@ -16,28 +16,43 @@ ELISION = "'’".replace(" ", "") _prefixes_elision = "m n l y t k w" _prefixes_elision += " " + _prefixes_elision.upper() -TOKENIZER_PREFIXES = LIST_PUNCT + LIST_QUOTES + [ - r"(?:({pe})[{el}])(?=[{a}])".format( - a=ALPHA, el=ELISION, pe=merge_chars(_prefixes_elision) - ) -] +TOKENIZER_PREFIXES = ( + LIST_PUNCT + + LIST_QUOTES + + [ + r"(?:({pe})[{el}])(?=[{a}])".format( + a=ALPHA, el=ELISION, pe=merge_chars(_prefixes_elision) + ) + ] +) -TOKENIZER_SUFFIXES = LIST_PUNCT + LIST_QUOTES + LIST_ELLIPSES + [ - r"(?<=[0-9])%", # numbers like 10% - r"(?<=[0-9])(?:{h})".format(h=HYPHENS), # hyphens after numbers - r"(?<=[{a}])['’]".format(a=ALPHA), # apostrophes after letters - r"(?<=[{a}])['’][mwlnytk](?=\s|$)".format(a=ALPHA), # contractions - r"(?<=[{a}0-9])\)", # right parenthesis after letter/number - r"(?<=[{a}])\.(?=\s|$)".format(a=ALPHA), # period after letter if space or end of string - r"(?<=\))[\.\?!]", # punctuation immediately after right parenthesis -] +TOKENIZER_SUFFIXES = ( + LIST_PUNCT + + LIST_QUOTES + + LIST_ELLIPSES + + [ + r"(?<=[0-9])%", # numbers like 10% + r"(?<=[0-9])(?:{h})".format(h=HYPHENS), # hyphens after numbers + r"(?<=[{a}])['’]".format(a=ALPHA), # apostrophes after letters + r"(?<=[{a}])['’][mwlnytk](?=\s|$)".format(a=ALPHA), # contractions + r"(?<=[{a}0-9])\)", # right parenthesis after letter/number + r"(?<=[{a}])\.(?=\s|$)".format( + a=ALPHA + ), # period after letter if space or end of string + r"(?<=\))[\.\?!]", # punctuation immediately after right parenthesis + ] +) -TOKENIZER_INFIXES = LIST_ELLIPSES + LIST_ICONS + [ - r"(?<=[0-9])[+\-\*^](?=[0-9-])", - r"(?<=[{al}{q}])\.(?=[{au}{q}])".format( - al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES - ), - r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA), - r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS), - r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION), -] +TOKENIZER_INFIXES = ( + LIST_ELLIPSES + + LIST_ICONS + + [ + r"(?<=[0-9])[+\-\*^](?=[0-9-])", + r"(?<=[{al}{q}])\.(?=[{au}{q}])".format( + al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES + ), + r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA), + r"(?<=[{a}0-9])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS), + r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION), + ] +) diff --git a/spacy/lang/ht/stop_words.py b/spacy/lang/ht/stop_words.py index 6243887a4..50998e0e5 100644 --- a/spacy/lang/ht/stop_words.py +++ b/spacy/lang/ht/stop_words.py @@ -39,8 +39,7 @@ sa san si swa si men mèsi oswa osinon -""" -.split() +""".split() ) # Add common contractions, with and without apostrophe variants diff --git a/spacy/lang/ht/tag_map.py b/spacy/lang/ht/tag_map.py index 8c9cdd6d4..261d1aef3 100644 --- a/spacy/lang/ht/tag_map.py +++ b/spacy/lang/ht/tag_map.py @@ -1,4 +1,22 @@ -from spacy.symbols import NOUN, VERB, AUX, ADJ, ADV, PRON, DET, ADP, SCONJ, CCONJ, PART, INTJ, NUM, PROPN, PUNCT, SYM, X +from spacy.symbols import ( + NOUN, + VERB, + AUX, + ADJ, + ADV, + PRON, + DET, + ADP, + SCONJ, + CCONJ, + PART, + INTJ, + NUM, + PROPN, + PUNCT, + SYM, + X, +) TAG_MAP = { "NOUN": {"pos": NOUN}, diff --git a/spacy/lang/ht/tokenizer_exceptions.py b/spacy/lang/ht/tokenizer_exceptions.py index b44ad7a6f..4d617fd36 100644 --- a/spacy/lang/ht/tokenizer_exceptions.py +++ b/spacy/lang/ht/tokenizer_exceptions.py @@ -1,5 +1,6 @@ from spacy.symbols import ORTH, NORM + def make_variants(base, first_norm, second_orth, second_norm): return { base: [ @@ -7,14 +8,16 @@ def make_variants(base, first_norm, second_orth, second_norm): {ORTH: second_orth, NORM: second_norm}, ], base.capitalize(): [ - {ORTH: base.split("'")[0].capitalize() + "'", NORM: first_norm.capitalize()}, + { + ORTH: base.split("'")[0].capitalize() + "'", + NORM: first_norm.capitalize(), + }, {ORTH: second_orth, NORM: second_norm}, - ] + ], } -TOKENIZER_EXCEPTIONS = { - "Dr.": [{ORTH: "Dr."}] -} + +TOKENIZER_EXCEPTIONS = {"Dr.": [{ORTH: "Dr."}]} # Apostrophe forms TOKENIZER_EXCEPTIONS.update(make_variants("m'ap", "mwen", "ap", "ap")) @@ -29,93 +32,95 @@ TOKENIZER_EXCEPTIONS.update(make_variants("p'ap", "pa", "ap", "ap")) TOKENIZER_EXCEPTIONS.update(make_variants("t'ap", "te", "ap", "ap")) # Non-apostrophe contractions (with capitalized variants) -TOKENIZER_EXCEPTIONS.update({ - "map": [ - {ORTH: "m", NORM: "mwen"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Map": [ - {ORTH: "M", NORM: "Mwen"}, - {ORTH: "ap", NORM: "ap"}, - ], - "lem": [ - {ORTH: "le", NORM: "le"}, - {ORTH: "m", NORM: "mwen"}, - ], - "Lem": [ - {ORTH: "Le", NORM: "Le"}, - {ORTH: "m", NORM: "mwen"}, - ], - "lew": [ - {ORTH: "le", NORM: "le"}, - {ORTH: "w", NORM: "ou"}, - ], - "Lew": [ - {ORTH: "Le", NORM: "Le"}, - {ORTH: "w", NORM: "ou"}, - ], - "nap": [ - {ORTH: "n", NORM: "nou"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Nap": [ - {ORTH: "N", NORM: "Nou"}, - {ORTH: "ap", NORM: "ap"}, - ], - "lap": [ - {ORTH: "l", NORM: "li"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Lap": [ - {ORTH: "L", NORM: "Li"}, - {ORTH: "ap", NORM: "ap"}, - ], - "yap": [ - {ORTH: "y", NORM: "yo"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Yap": [ - {ORTH: "Y", NORM: "Yo"}, - {ORTH: "ap", NORM: "ap"}, - ], - "mte": [ - {ORTH: "m", NORM: "mwen"}, - {ORTH: "te", NORM: "te"}, - ], - "Mte": [ - {ORTH: "M", NORM: "Mwen"}, - {ORTH: "te", NORM: "te"}, - ], - "mpral": [ - {ORTH: "m", NORM: "mwen"}, - {ORTH: "pral", NORM: "pral"}, - ], - "Mpral": [ - {ORTH: "M", NORM: "Mwen"}, - {ORTH: "pral", NORM: "pral"}, - ], - "wap": [ - {ORTH: "w", NORM: "ou"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Wap": [ - {ORTH: "W", NORM: "Ou"}, - {ORTH: "ap", NORM: "ap"}, - ], - "kap": [ - {ORTH: "k", NORM: "ki"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Kap": [ - {ORTH: "K", NORM: "Ki"}, - {ORTH: "ap", NORM: "ap"}, - ], - "tap": [ - {ORTH: "t", NORM: "te"}, - {ORTH: "ap", NORM: "ap"}, - ], - "Tap": [ - {ORTH: "T", NORM: "Te"}, - {ORTH: "ap", NORM: "ap"}, - ], -}) +TOKENIZER_EXCEPTIONS.update( + { + "map": [ + {ORTH: "m", NORM: "mwen"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Map": [ + {ORTH: "M", NORM: "Mwen"}, + {ORTH: "ap", NORM: "ap"}, + ], + "lem": [ + {ORTH: "le", NORM: "le"}, + {ORTH: "m", NORM: "mwen"}, + ], + "Lem": [ + {ORTH: "Le", NORM: "Le"}, + {ORTH: "m", NORM: "mwen"}, + ], + "lew": [ + {ORTH: "le", NORM: "le"}, + {ORTH: "w", NORM: "ou"}, + ], + "Lew": [ + {ORTH: "Le", NORM: "Le"}, + {ORTH: "w", NORM: "ou"}, + ], + "nap": [ + {ORTH: "n", NORM: "nou"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Nap": [ + {ORTH: "N", NORM: "Nou"}, + {ORTH: "ap", NORM: "ap"}, + ], + "lap": [ + {ORTH: "l", NORM: "li"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Lap": [ + {ORTH: "L", NORM: "Li"}, + {ORTH: "ap", NORM: "ap"}, + ], + "yap": [ + {ORTH: "y", NORM: "yo"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Yap": [ + {ORTH: "Y", NORM: "Yo"}, + {ORTH: "ap", NORM: "ap"}, + ], + "mte": [ + {ORTH: "m", NORM: "mwen"}, + {ORTH: "te", NORM: "te"}, + ], + "Mte": [ + {ORTH: "M", NORM: "Mwen"}, + {ORTH: "te", NORM: "te"}, + ], + "mpral": [ + {ORTH: "m", NORM: "mwen"}, + {ORTH: "pral", NORM: "pral"}, + ], + "Mpral": [ + {ORTH: "M", NORM: "Mwen"}, + {ORTH: "pral", NORM: "pral"}, + ], + "wap": [ + {ORTH: "w", NORM: "ou"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Wap": [ + {ORTH: "W", NORM: "Ou"}, + {ORTH: "ap", NORM: "ap"}, + ], + "kap": [ + {ORTH: "k", NORM: "ki"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Kap": [ + {ORTH: "K", NORM: "Ki"}, + {ORTH: "ap", NORM: "ap"}, + ], + "tap": [ + {ORTH: "t", NORM: "te"}, + {ORTH: "ap", NORM: "ap"}, + ], + "Tap": [ + {ORTH: "T", NORM: "Te"}, + {ORTH: "ap", NORM: "ap"}, + ], + } +) diff --git a/spacy/lang/hu/examples.py b/spacy/lang/hu/examples.py index 711a438bd..c056c0967 100644 --- a/spacy/lang/hu/examples.py +++ b/spacy/lang/hu/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Az Apple egy brit startup vásárlását tervezi 1 milliárd dollár értékben.", "San Francisco vezetése mérlegeli a járdát használó szállító robotok betiltását.", diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py index dbf93c622..dc9741076 100644 --- a/spacy/lang/hu/punctuation.py +++ b/spacy/lang/hu/punctuation.py @@ -11,7 +11,7 @@ from ..char_classes import ( ) # removing ° from the special icons to keep e.g. 99° as one token -_concat_icons = CONCAT_ICONS.replace("\u00B0", "") +_concat_icons = CONCAT_ICONS.replace("\u00b0", "") _currency = r"\$¢£€¥฿" _quotes = CONCAT_QUOTES.replace("'", "") diff --git a/spacy/lang/hy/examples.py b/spacy/lang/hy/examples.py index 212a2ec86..9455396db 100644 --- a/spacy/lang/hy/examples.py +++ b/spacy/lang/hy/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Լոնդոնը Միացյալ Թագավորության մեծ քաղաք է։", "Ո՞վ է Ֆրանսիայի նախագահը։", diff --git a/spacy/lang/id/examples.py b/spacy/lang/id/examples.py index d35271551..17d1c5f28 100644 --- a/spacy/lang/id/examples.py +++ b/spacy/lang/id/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Indonesia merupakan negara kepulauan yang kaya akan budaya.", "Berapa banyak warga yang dibutuhkan saat kerja bakti?", diff --git a/spacy/lang/it/examples.py b/spacy/lang/it/examples.py index 506721276..ae857382a 100644 --- a/spacy/lang/it/examples.py +++ b/spacy/lang/it/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple vuole comprare una startup del Regno Unito per un miliardo di dollari", "Le automobili a guida autonoma spostano la responsabilità assicurativa verso i produttori", diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index e21e85cd9..492478af3 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -102,9 +102,9 @@ class JapaneseTokenizer(DummyTokenizer): token.dictionary_form(), # lemma token.normalized_form(), token.reading_form(), - sub_tokens_list[idx] - if sub_tokens_list - else None, # user_data['sub_tokens'] + ( + sub_tokens_list[idx] if sub_tokens_list else None + ), # user_data['sub_tokens'] ) for idx, token in enumerate(sudachipy_tokens) if len(token.surface()) > 0 diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py index c3a011862..a07711c53 100644 --- a/spacy/lang/ja/examples.py +++ b/spacy/lang/ja/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "アップルがイギリスの新興企業を10億ドルで購入を検討", "自動運転車の損害賠償責任、自動車メーカーに一定の負担を求める", diff --git a/spacy/lang/ja/tag_map.py b/spacy/lang/ja/tag_map.py index 5c14f41bf..527c83629 100644 --- a/spacy/lang/ja/tag_map.py +++ b/spacy/lang/ja/tag_map.py @@ -25,7 +25,9 @@ TAG_MAP = { # Universal Dependencies Mapping: (Some of the entries in this mapping are updated to v2.6 in the list below) # http://universaldependencies.org/ja/overview/morphology.html # http://universaldependencies.org/ja/pos/all.html - "記号-一般": {POS: NOUN}, # this includes characters used to represent sounds like ドレミ + "記号-一般": { + POS: NOUN + }, # this includes characters used to represent sounds like ドレミ "記号-文字": { POS: NOUN }, # this is for Greek and Latin characters having some meanings, or used as symbols, as in math @@ -72,7 +74,9 @@ TAG_MAP = { "名詞-固有名詞-地名-国": {POS: PROPN}, # country name "名詞-助動詞語幹": {POS: AUX}, "名詞-数詞": {POS: NUM}, # includes Chinese numerals - "名詞-普通名詞-サ変可能": {POS: NOUN}, # XXX: sometimes VERB in UDv2; suru-verb noun + "名詞-普通名詞-サ変可能": { + POS: NOUN + }, # XXX: sometimes VERB in UDv2; suru-verb noun "名詞-普通名詞-サ変形状詞可能": {POS: NOUN}, "名詞-普通名詞-一般": {POS: NOUN}, "名詞-普通名詞-形状詞可能": {POS: NOUN}, # XXX: sometimes ADJ in UDv2 diff --git a/spacy/lang/kn/examples.py b/spacy/lang/kn/examples.py index 3e055752e..7cbb7fc07 100644 --- a/spacy/lang/kn/examples.py +++ b/spacy/lang/kn/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ಆಪಲ್ ಒಂದು ಯು.ಕೆ. ಸ್ಟಾರ್ಟ್ಅಪ್ ಅನ್ನು ೧ ಶತಕೋಟಿ ಡಾಲರ್ಗಳಿಗೆ ಖರೀದಿಸಲು ನೋಡುತ್ತಿದೆ.", "ಸ್ವಾಯತ್ತ ಕಾರುಗಳು ವಿಮಾ ಹೊಣೆಗಾರಿಕೆಯನ್ನು ತಯಾರಕರ ಕಡೆಗೆ ಬದಲಾಯಿಸುತ್ತವೆ.", diff --git a/spacy/lang/lij/examples.py b/spacy/lang/lij/examples.py index ba7fe43fd..ec336b07f 100644 --- a/spacy/lang/lij/examples.py +++ b/spacy/lang/lij/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Sciusciâ e sciorbî no se peu.", "Graçie di çetroin, che me son arrivæ.", diff --git a/spacy/lang/lt/examples.py b/spacy/lang/lt/examples.py index eaf941f1a..57d6eb4d1 100644 --- a/spacy/lang/lt/examples.py +++ b/spacy/lang/lt/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Jaunikis pirmąją vestuvinę naktį iškeitė į areštinės gultą", "Bepiločiai automobiliai išnaikins vairavimo mokyklas, autoservisus ir eismo nelaimes", diff --git a/spacy/lang/ml/examples.py b/spacy/lang/ml/examples.py index 9794eab29..d067b8b66 100644 --- a/spacy/lang/ml/examples.py +++ b/spacy/lang/ml/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "അനാവശ്യമായി കണ്ണിലും മൂക്കിലും വായിലും സ്പർശിക്കാതിരിക്കുക", "പൊതുരംഗത്ത് മലയാള ഭാഷയുടെ സമഗ്രപുരോഗതി ലക്ഷ്യമാക്കി പ്രവർത്തിക്കുന്ന സംഘടനയായ മലയാളഐക്യവേദിയുടെ വിദ്യാർത്ഥിക്കൂട്ടായ്മയാണ് വിദ്യാർത്ഥി മലയാളവേദി", diff --git a/spacy/lang/ms/examples.py b/spacy/lang/ms/examples.py index 97ab19b6e..1af439d4a 100644 --- a/spacy/lang/ms/examples.py +++ b/spacy/lang/ms/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Malaysia ialah sebuah negara yang terletak di Asia Tenggara.", "Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?", diff --git a/spacy/lang/nb/examples.py b/spacy/lang/nb/examples.py index b1a63ad74..242dab7c5 100644 --- a/spacy/lang/nb/examples.py +++ b/spacy/lang/nb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar.", "Selvkjørende biler flytter forsikringsansvaret over på produsentene.", diff --git a/spacy/lang/ne/examples.py b/spacy/lang/ne/examples.py index a29b77c2f..cc3b382df 100644 --- a/spacy/lang/ne/examples.py +++ b/spacy/lang/ne/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "एप्पलले अमेरिकी स्टार्टअप १ अर्ब डलरमा किन्ने सोच्दै छ", "स्वायत्त कारहरूले बीमा दायित्व निर्माताहरु तिर बदल्छन्", diff --git a/spacy/lang/nl/examples.py b/spacy/lang/nl/examples.py index 8c8c50c60..3440f01db 100644 --- a/spacy/lang/nl/examples.py +++ b/spacy/lang/nl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple overweegt om voor 1 miljard een U.K. startup te kopen", "Autonome auto's verschuiven de verzekeringverantwoordelijkheid naar producenten", diff --git a/spacy/lang/nn/examples.py b/spacy/lang/nn/examples.py index 95ec0aadd..ee03bf95e 100644 --- a/spacy/lang/nn/examples.py +++ b/spacy/lang/nn/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - # sentences taken from Omsetjingsminne frå Nynorsk pressekontor 2022 (https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-80/) sentences = [ "Konseptet går ut på at alle tre omgangar tel, alle hopparar må stille i kvalifiseringa og poengsummen skal telje.", diff --git a/spacy/lang/pl/examples.py b/spacy/lang/pl/examples.py index b1ea5880f..cb55ed07d 100644 --- a/spacy/lang/pl/examples.py +++ b/spacy/lang/pl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Poczuł przyjemną woń mocnej kawy.", "Istnieje wiele dróg oddziaływania substancji psychoaktywnej na układ nerwowy.", diff --git a/spacy/lang/pt/examples.py b/spacy/lang/pt/examples.py index 13f3512cf..42ae602c1 100644 --- a/spacy/lang/pt/examples.py +++ b/spacy/lang/pt/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares", "Carros autônomos empurram a responsabilidade do seguro para os fabricantes." diff --git a/spacy/lang/ro/examples.py b/spacy/lang/ro/examples.py index bfa258ffc..46b4c9a67 100644 --- a/spacy/lang/ro/examples.py +++ b/spacy/lang/ro/examples.py @@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple plănuiește să cumpere o companie britanică pentru un miliard de dolari", "Municipalitatea din San Francisco ia în calcul interzicerea roboților curieri pe trotuar", diff --git a/spacy/lang/ru/examples.py b/spacy/lang/ru/examples.py index adb007625..9595d583a 100644 --- a/spacy/lang/ru/examples.py +++ b/spacy/lang/ru/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ # Translations from English: "Apple рассматривает возможность покупки стартапа из Соединённого Королевства за $1 млрд", diff --git a/spacy/lang/sa/examples.py b/spacy/lang/sa/examples.py index 60243c04c..6a0bc4e13 100644 --- a/spacy/lang/sa/examples.py +++ b/spacy/lang/sa/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "अभ्यावहति कल्याणं विविधं वाक् सुभाषिता ।", "मनसि व्याकुले चक्षुः पश्यन्नपि न पश्यति ।", diff --git a/spacy/lang/si/examples.py b/spacy/lang/si/examples.py index b34051d00..8e0ffec69 100644 --- a/spacy/lang/si/examples.py +++ b/spacy/lang/si/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "මෙය වාක්‍යයකි.", "ඔබ කවුද?", diff --git a/spacy/lang/sk/examples.py b/spacy/lang/sk/examples.py index 736109a7c..079d0d2b1 100644 --- a/spacy/lang/sk/examples.py +++ b/spacy/lang/sk/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Ardevop, s.r.o. je malá startup firma na území SR.", "Samojazdiace autá presúvajú poistnú zodpovednosť na výrobcov automobilov.", diff --git a/spacy/lang/sl/examples.py b/spacy/lang/sl/examples.py index bf483c6a4..79846114b 100644 --- a/spacy/lang/sl/examples.py +++ b/spacy/lang/sl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple načrtuje nakup britanskega startupa za 1 bilijon dolarjev", "France Prešeren je umrl 8. februarja 1849 v Kranju", diff --git a/spacy/lang/sq/examples.py b/spacy/lang/sq/examples.py index 06ed20fa1..61bf713a6 100644 --- a/spacy/lang/sq/examples.py +++ b/spacy/lang/sq/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple po shqyrton blerjen e nje shoqërie të U.K. për 1 miliard dollarë", "Makinat autonome ndryshojnë përgjegjësinë e sigurimit ndaj prodhuesve", diff --git a/spacy/lang/sr/examples.py b/spacy/lang/sr/examples.py index ec7f57ced..2d34d42b4 100644 --- a/spacy/lang/sr/examples.py +++ b/spacy/lang/sr/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ # Translations from English "Apple планира куповину америчког стартапа за $1 милијарду.", diff --git a/spacy/lang/sv/examples.py b/spacy/lang/sv/examples.py index bc6cd7a54..ffea6e457 100644 --- a/spacy/lang/sv/examples.py +++ b/spacy/lang/sv/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple överväger att köpa brittisk startup för 1 miljard dollar.", "Självkörande bilar förskjuter försäkringsansvar mot tillverkare.", diff --git a/spacy/lang/ta/examples.py b/spacy/lang/ta/examples.py index e68dc6237..522cd926d 100644 --- a/spacy/lang/ta/examples.py +++ b/spacy/lang/ta/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "கிறிஸ்துமஸ் மற்றும் இனிய புத்தாண்டு வாழ்த்துக்கள்", "எனக்கு என் குழந்தைப் பருவம் நினைவிருக்கிறது", diff --git a/spacy/lang/te/examples.py b/spacy/lang/te/examples.py index cff7d3cb0..4af872828 100644 --- a/spacy/lang/te/examples.py +++ b/spacy/lang/te/examples.py @@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ఆపిల్ 1 బిలియన్ డాలర్స్ కి యూ.కె. స్టార్ట్అప్ ని కొనాలని అనుకుంటుంది.", "ఆటోనోమోస్ కార్లు భీమా బాధ్యతను తయారీదారులపైకి మళ్లిస్తాయి.", diff --git a/spacy/lang/ti/examples.py b/spacy/lang/ti/examples.py index 167b58d09..146ac349b 100644 --- a/spacy/lang/ti/examples.py +++ b/spacy/lang/ti/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "አፕል ብዩኬ ትርከብ ንግድ ብ1 ቢሊዮን ዶላር ንምግዛዕ ሐሲባ።", "ፈላማይ ክታበት ኮቪድ 19 ተጀሚሩ፤ሓዱሽ ተስፋ ሂቡ ኣሎ", diff --git a/spacy/lang/tn/examples.py b/spacy/lang/tn/examples.py index 7b33fae5a..fb6d96f97 100644 --- a/spacy/lang/tn/examples.py +++ b/spacy/lang/tn/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple e nyaka go reka JSE ka tlhwatlhwa ta R1 billion", "Johannesburg ke toropo e kgolo mo Afrika Borwa.", diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py index c912c950d..c96e54032 100644 --- a/spacy/lang/tr/examples.py +++ b/spacy/lang/tr/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Neredesin?", "Neredesiniz?", diff --git a/spacy/lang/uk/examples.py b/spacy/lang/uk/examples.py index f75d44488..3335c82ac 100644 --- a/spacy/lang/uk/examples.py +++ b/spacy/lang/uk/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Ніч на середу буде морозною.", "Чим кращі книги ти читав, тим гірше спиш.", # Serhiy Zhadan diff --git a/spacy/lang/ur/examples.py b/spacy/lang/ur/examples.py index e55b337be..f612c6b81 100644 --- a/spacy/lang/ur/examples.py +++ b/spacy/lang/ur/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "اردو ہے جس کا نام ہم جانتے ہیں داغ", "سارے جہاں میں دھوم ہماری زباں کی ہے", diff --git a/spacy/lang/vi/examples.py b/spacy/lang/vi/examples.py index 36575f67c..5f2a9b2ba 100644 --- a/spacy/lang/vi/examples.py +++ b/spacy/lang/vi/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Đây là đâu, tôi là ai?", "Căn phòng có nhiều cửa sổ nên nó khá sáng", diff --git a/spacy/language.py b/spacy/language.py index 5b9eb8bd2..dcf436c65 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1519,8 +1519,7 @@ class Language: disable: Iterable[str] = ..., component_cfg: Optional[Dict[str, Dict[str, Any]]] = ..., n_process: int = ..., - ) -> Iterator[Doc]: - ... + ) -> Iterator[Doc]: ... @overload def pipe( # noqa: F811 @@ -1532,8 +1531,7 @@ class Language: disable: Iterable[str] = ..., component_cfg: Optional[Dict[str, Dict[str, Any]]] = ..., n_process: int = ..., - ) -> Iterator[Tuple[Doc, _AnyContext]]: - ... + ) -> Iterator[Tuple[Doc, _AnyContext]]: ... def pipe( # noqa: F811 self, @@ -1641,7 +1639,7 @@ class Language: batch_size: int, ) -> Iterator[Doc]: def prepare_input( - texts: Iterable[Union[str, Doc]] + texts: Iterable[Union[str, Doc]], ) -> Iterable[Tuple[Union[str, bytes], _AnyContext]]: # Serialize Doc inputs to bytes to avoid incurring pickling # overhead when they are passed to child processes. Also yield @@ -1943,9 +1941,9 @@ class Language: ) if "_sourced_vectors_hashes" not in nlp.meta: nlp.meta["_sourced_vectors_hashes"] = {} - nlp.meta["_sourced_vectors_hashes"][ - pipe_name - ] = source_nlp_vectors_hashes[model] + nlp.meta["_sourced_vectors_hashes"][pipe_name] = ( + source_nlp_vectors_hashes[model] + ) # Delete from cache if listeners were replaced if listeners_replaced: del source_nlps[model] diff --git a/spacy/matcher/dependencymatcher.pyi b/spacy/matcher/dependencymatcher.pyi index b9fbabda7..d84a30a58 100644 --- a/spacy/matcher/dependencymatcher.pyi +++ b/spacy/matcher/dependencymatcher.pyi @@ -51,9 +51,7 @@ class DependencyMatcher: ] = ... ) -> None: ... def has_key(self, key: Union[str, int]) -> bool: ... - def get( - self, key: Union[str, int], default: Optional[Any] = ... - ) -> Tuple[ + def get(self, key: Union[str, int], default: Optional[Any] = ...) -> Tuple[ Optional[ Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any] ], diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py index 2f869ad65..fb4e3c39a 100644 --- a/spacy/ml/featureextractor.py +++ b/spacy/ml/featureextractor.py @@ -7,7 +7,7 @@ from ..tokens import Doc def FeatureExtractor( - columns: Union[List[str], List[int], List[Union[int, str]]] + columns: Union[List[str], List[int], List[Union[int, str]]], ) -> Model[List[Doc], List[Ints2d]]: return Model("extract_features", forward, attrs={"columns": columns}) diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index 752d1c443..8b12720db 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -122,7 +122,7 @@ def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]: return get_candidates -def create_candidates_batch() -> Callable[ - [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]] -]: +def create_candidates_batch() -> ( + Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]] +): return get_candidates_batch diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index 6029ed313..0941b43c1 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -93,7 +93,7 @@ class EditTreeLemmatizer(TrainablePipe): truths = [] for eg in examples: eg_truths = [] - for (predicted, gold_lemma) in zip( + for predicted, gold_lemma in zip( eg.predicted, eg.get_aligned("LEMMA", as_string=True) ): if gold_lemma is None or gold_lemma == "": diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 030572850..805a0538f 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -80,8 +80,7 @@ DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str( @runtime_checkable class Suggester(Protocol): - def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged: - ... + def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged: ... def ngram_suggester( diff --git a/spacy/registrations.py b/spacy/registrations.py index f742da9d3..7e29486b6 100644 --- a/spacy/registrations.py +++ b/spacy/registrations.py @@ -6,6 +6,7 @@ remain in their original locations, but decoration is moved here. Component definitions and registrations are in spacy/pipeline/factories.py """ + # Global flag to track if registry has been populated REGISTRY_POPULATED = False diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 73544c51a..d72c916ef 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -141,7 +141,8 @@ def test_issue3869(sentence): @pytest.mark.issue(3962) def test_issue3962(en_vocab): """Ensure that as_doc does not result in out-of-bound access of tokens. - This is achieved by setting the head to itself if it would lie out of the span otherwise.""" + This is achieved by setting the head to itself if it would lie out of the span otherwise. + """ # fmt: off words = ["He", "jests", "at", "scars", ",", "that", "never", "felt", "a", "wound", "."] heads = [1, 7, 1, 2, 7, 7, 7, 7, 9, 7, 7] @@ -180,7 +181,8 @@ def test_issue3962(en_vocab): @pytest.mark.issue(3962) def test_issue3962_long(en_vocab): """Ensure that as_doc does not result in out-of-bound access of tokens. - This is achieved by setting the head to itself if it would lie out of the span otherwise.""" + This is achieved by setting the head to itself if it would lie out of the span otherwise. + """ # fmt: off words = ["He", "jests", "at", "scars", ".", "They", "never", "felt", "a", "wound", "."] heads = [1, 1, 1, 2, 1, 7, 7, 7, 9, 7, 7] diff --git a/spacy/tests/lang/ht/test_exceptions.py b/spacy/tests/lang/ht/test_exceptions.py index 685b72c07..ea2e2b204 100644 --- a/spacy/tests/lang/ht/test_exceptions.py +++ b/spacy/tests/lang/ht/test_exceptions.py @@ -29,4 +29,16 @@ def test_ht_tokenizer_handles_basic_abbreviation(ht_tokenizer, text): def test_ht_tokenizer_full_sentence(ht_tokenizer): text = "Si'm ka vini, m'ap pale ak li." tokens = [t.text for t in ht_tokenizer(text)] - assert tokens == ["Si", "'m", "ka", "vini", ",", "m'", "ap", "pale", "ak", "li", "."] + assert tokens == [ + "Si", + "'m", + "ka", + "vini", + ",", + "m'", + "ap", + "pale", + "ak", + "li", + ".", + ] diff --git a/spacy/tests/lang/ht/test_prefix_suffix_infix.py b/spacy/tests/lang/ht/test_prefix_suffix_infix.py index 7dabec17a..5ff409cd9 100644 --- a/spacy/tests/lang/ht/test_prefix_suffix_infix.py +++ b/spacy/tests/lang/ht/test_prefix_suffix_infix.py @@ -37,7 +37,9 @@ def test_ht_tokenizer_splits_uneven_wrap(ht_tokenizer, text): assert len(tokens) == 5 -@pytest.mark.parametrize("text,length", [("Ozetazini.", 2), ("Frans.", 2), ("(Ozetazini.", 3)]) +@pytest.mark.parametrize( + "text,length", [("Ozetazini.", 2), ("Frans.", 2), ("(Ozetazini.", 3)] +) def test_ht_tokenizer_splits_prefix_interact(ht_tokenizer, text, length): tokens = ht_tokenizer(text) assert len(tokens) == length diff --git a/spacy/tests/lang/ht/test_text.py b/spacy/tests/lang/ht/test_text.py index f396e352a..e63299fc0 100644 --- a/spacy/tests/lang/ht/test_text.py +++ b/spacy/tests/lang/ht/test_text.py @@ -16,7 +16,6 @@ Nan Washington, Depatman Deta Etazini pibliye yon deklarasyon ki eksprime "regre assert len(tokens) == 84 - @pytest.mark.parametrize( "text,length", [ @@ -66,14 +65,14 @@ def test_ht_lex_attrs_capitals(word): @pytest.mark.parametrize( - "word, expected", [ + "word, expected", + [ ("'m", "mwen"), ("'n", "nou"), ("'l", "li"), ("'y", "yo"), ("'w", "ou"), - ] + ], ) def test_ht_lex_attrs_norm_custom(word, expected): assert norm_custom(word) == expected - diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py index fa689c8f3..30f3e9487 100644 --- a/spacy/tests/lang/hu/test_tokenizer.py +++ b/spacy/tests/lang/hu/test_tokenizer.py @@ -304,9 +304,11 @@ TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0]) SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0] TESTS.extend( [ - pytest.param(x[0], x[1], marks=pytest.mark.slow()) - if not isinstance(x[0], tuple) - else x + ( + pytest.param(x[0], x[1], marks=pytest.mark.slow()) + if not isinstance(x[0], tuple) + else x + ) for x in SLOW_TESTS ] ) diff --git a/spacy/tests/matcher/test_matcher_logic.py b/spacy/tests/matcher/test_matcher_logic.py index 3b65fee23..1109766dc 100644 --- a/spacy/tests/matcher/test_matcher_logic.py +++ b/spacy/tests/matcher/test_matcher_logic.py @@ -544,7 +544,7 @@ def test_greedy_matching_longest(doc, text, pattern, longest): matcher = Matcher(doc.vocab) matcher.add("RULE", [pattern], greedy="LONGEST") matches = matcher(doc) - for (key, s, e) in matches: + for key, s, e in matches: assert doc[s:e].text == longest diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 5e50a4d28..1b6f49f4c 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -496,15 +496,15 @@ def test_el_pipe_configuration(nlp): return [get_lowercased_candidates(kb, span) for span in spans] @registry.misc("spacy.LowercaseCandidateGenerator.v1") - def create_candidates() -> Callable[ - [InMemoryLookupKB, "Span"], Iterable[Candidate] - ]: + def create_candidates() -> ( + Callable[[InMemoryLookupKB, "Span"], Iterable[Candidate]] + ): return get_lowercased_candidates @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1") - def create_candidates_batch() -> Callable[ - [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]] - ]: + def create_candidates_batch() -> ( + Callable[[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]] + ): return get_lowercased_candidates_batch # replace the pipe with a new one with with a different candidate generator diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py index c45dccb06..b355379bf 100644 --- a/spacy/tests/pipeline/test_pipe_factories.py +++ b/spacy/tests/pipeline/test_pipe_factories.py @@ -279,20 +279,17 @@ def test_pipe_factories_wrong_formats(): with pytest.raises(ValueError): # Decorator is not called @Language.component - def component(foo: int, bar: str): - ... + def component(foo: int, bar: str): ... with pytest.raises(ValueError): # Decorator is not called @Language.factory - def factory1(foo: int, bar: str): - ... + def factory1(foo: int, bar: str): ... with pytest.raises(ValueError): # Factory function is missing "nlp" and "name" arguments @Language.factory("test_pipe_factories_missing_args") - def factory2(foo: int, bar: str): - ... + def factory2(foo: int, bar: str): ... def test_pipe_factory_meta_config_cleanup(): @@ -329,8 +326,7 @@ def test_pipe_factories_empty_dict_default(): name = "test_pipe_factories_empty_dict_default" @Language.factory(name, default_config={"foo": {}}) - def factory(nlp: Language, name: str, foo: dict): - ... + def factory(nlp: Language, name: str, foo: dict): ... nlp = Language() nlp.create_pipe(name) @@ -549,11 +545,9 @@ def test_pipe_factories_from_source_config(): class PipeFactoriesIdempotent: - def __init__(self, nlp, name): - ... + def __init__(self, nlp, name): ... - def __call__(self, doc): - ... + def __call__(self, doc): ... @pytest.mark.parametrize( diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 8e4a5ed7c..4310e41ab 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -874,7 +874,8 @@ def test_textcat_eval_missing(multi_label: bool, spring_p: float): def test_textcat_loss(multi_label: bool, expected_loss: float): """ multi-label: the missing 'spring' in gold_doc_2 doesn't incur an increase in loss - exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss""" + exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss + """ train_examples = [] nlp = English() diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 7b729d78f..43d5f6283 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -890,7 +890,7 @@ def test_cli_find_threshold(capsys): return docs def init_nlp( - components: Tuple[Tuple[str, Dict[str, Any]], ...] = () + components: Tuple[Tuple[str, Dict[str, Any]], ...] = (), ) -> Tuple[Language, List[Example]]: new_nlp = English() new_nlp.add_pipe( # type: ignore diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index f0b68862c..d92f04d05 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -57,9 +57,7 @@ class Doc: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], @@ -68,9 +66,7 @@ class Doc: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index b982eb810..070aaffb3 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -23,9 +23,7 @@ class Span: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[SpanMethod], Optional[Callable[[Span], Any]], @@ -34,9 +32,7 @@ class Span: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[SpanMethod], Optional[Callable[[Span], Any]], diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi index 435ace527..7e56ae3bc 100644 --- a/spacy/tokens/token.pyi +++ b/spacy/tokens/token.pyi @@ -27,9 +27,7 @@ class Token: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[TokenMethod], Optional[Callable[[Token], Any]], @@ -38,9 +36,7 @@ class Token: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[TokenMethod], Optional[Callable[[Token], Any]], diff --git a/spacy/training/loop.py b/spacy/training/loop.py index 56df53957..6f5099858 100644 --- a/spacy/training/loop.py +++ b/spacy/training/loop.py @@ -354,7 +354,7 @@ def update_meta( def create_before_to_disk_callback( - callback: Optional[Callable[["Language"], "Language"]] + callback: Optional[Callable[["Language"], "Language"]], ) -> Callable[["Language"], "Language"]: from ..language import Language # noqa: F811 diff --git a/spacy/ty.py b/spacy/ty.py index f389456c0..b37f2e18a 100644 --- a/spacy/ty.py +++ b/spacy/ty.py @@ -30,11 +30,9 @@ class TrainableComponent(Protocol): drop: float = 0.0, sgd: Optional[Optimizer] = None, losses: Optional[Dict[str, float]] = None - ) -> Dict[str, float]: - ... + ) -> Dict[str, float]: ... - def finish_update(self, sgd: Optimizer) -> None: - ... + def finish_update(self, sgd: Optimizer) -> None: ... @runtime_checkable @@ -44,8 +42,7 @@ class InitializableComponent(Protocol): get_examples: Callable[[], Iterable["Example"]], nlp: "Language", **kwargs: Any - ): - ... + ): ... @runtime_checkable @@ -55,11 +52,8 @@ class ListenedToComponent(Protocol): listener_map: Dict[str, Sequence[Model]] listening_components: List[str] - def add_listener(self, listener: Model, component_name: str) -> None: - ... + def add_listener(self, listener: Model, component_name: str) -> None: ... - def remove_listener(self, listener: Model, component_name: str) -> bool: - ... + def remove_listener(self, listener: Model, component_name: str) -> bool: ... - def find_listeners(self, component) -> None: - ... + def find_listeners(self, component) -> None: ... diff --git a/spacy/util.py b/spacy/util.py index 527e6eb3a..ad5a7e0ba 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -657,7 +657,7 @@ def load_model_from_config( def get_sourced_components( - config: Union[Dict[str, Any], Config] + config: Union[Dict[str, Any], Config], ) -> Dict[str, Dict[str, Any]]: """RETURNS (List[str]): All sourced components in the original config, e.g. {"source": "en_core_web_sm"}. If the config contains a key From 352d774cb720dbb7361c1d38714eccf0c04c3650 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 4 Nov 2025 15:19:56 +0100 Subject: [PATCH 06/28] Update black --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7fc8ab32e..d8ccfc0b6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,6 +33,6 @@ types-mock>=0.1.1 types-setuptools>=57.0.0 types-requests types-setuptools>=57.0.0 -black==22.3.0 +black>=25.0.0 cython-lint>=0.15.0 isort>=5.0,<6.0 From 4ebe7741204a433890ad0236f0f73cc015cb25e6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 4 Nov 2025 15:50:26 +0100 Subject: [PATCH 07/28] isort --- spacy/lang/ht/__init__.py | 4 ++-- spacy/lang/ht/lemmatizer.py | 2 +- spacy/lang/ht/punctuation.py | 4 ++-- spacy/lang/ht/tag_map.py | 16 ++++++++-------- spacy/lang/ht/tokenizer_exceptions.py | 2 +- spacy/tests/lang/ht/test_noun_chunks.py | 1 + 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/spacy/lang/ht/__init__.py b/spacy/lang/ht/__init__.py index 9fc2df40c..7f9feb057 100644 --- a/spacy/lang/ht/__init__.py +++ b/spacy/lang/ht/__init__.py @@ -5,11 +5,11 @@ from thinc.api import Model from ...language import BaseDefaults, Language from .lemmatizer import HaitianCreoleLemmatizer from .lex_attrs import LEX_ATTRS -from .punctuation import TOKENIZER_PREFIXES, TOKENIZER_INFIXES, TOKENIZER_SUFFIXES +from .punctuation import TOKENIZER_INFIXES, TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES from .stop_words import STOP_WORDS from .syntax_iterators import SYNTAX_ITERATORS -from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .tag_map import TAG_MAP +from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS class HaitianCreoleDefaults(BaseDefaults): diff --git a/spacy/lang/ht/lemmatizer.py b/spacy/lang/ht/lemmatizer.py index 9ac096f6d..52bf23d23 100644 --- a/spacy/lang/ht/lemmatizer.py +++ b/spacy/lang/ht/lemmatizer.py @@ -1,8 +1,8 @@ from typing import List, Tuple +from ...lookups import Lookups from ...pipeline import Lemmatizer from ...tokens import Token -from ...lookups import Lookups class HaitianCreoleLemmatizer(Lemmatizer): diff --git a/spacy/lang/ht/punctuation.py b/spacy/lang/ht/punctuation.py index 0077db1c0..c4a5d090e 100644 --- a/spacy/lang/ht/punctuation.py +++ b/spacy/lang/ht/punctuation.py @@ -4,10 +4,10 @@ from ..char_classes import ( ALPHA_UPPER, CONCAT_QUOTES, HYPHENS, - LIST_PUNCT, - LIST_QUOTES, LIST_ELLIPSES, LIST_ICONS, + LIST_PUNCT, + LIST_QUOTES, merge_chars, ) diff --git a/spacy/lang/ht/tag_map.py b/spacy/lang/ht/tag_map.py index 261d1aef3..a190984a6 100644 --- a/spacy/lang/ht/tag_map.py +++ b/spacy/lang/ht/tag_map.py @@ -1,20 +1,20 @@ from spacy.symbols import ( - NOUN, - VERB, - AUX, ADJ, - ADV, - PRON, - DET, ADP, - SCONJ, + ADV, + AUX, CCONJ, - PART, + DET, INTJ, + NOUN, NUM, + PART, + PRON, PROPN, PUNCT, + SCONJ, SYM, + VERB, X, ) diff --git a/spacy/lang/ht/tokenizer_exceptions.py b/spacy/lang/ht/tokenizer_exceptions.py index 4d617fd36..deb152c25 100644 --- a/spacy/lang/ht/tokenizer_exceptions.py +++ b/spacy/lang/ht/tokenizer_exceptions.py @@ -1,4 +1,4 @@ -from spacy.symbols import ORTH, NORM +from spacy.symbols import NORM, ORTH def make_variants(base, first_norm, second_orth, second_norm): diff --git a/spacy/tests/lang/ht/test_noun_chunks.py b/spacy/tests/lang/ht/test_noun_chunks.py index 76c5a1df3..fcefd7dfd 100644 --- a/spacy/tests/lang/ht/test_noun_chunks.py +++ b/spacy/tests/lang/ht/test_noun_chunks.py @@ -1,4 +1,5 @@ import pytest + from spacy.tokens import Doc From 68bf84ec5cb2a9550e1bc1d78332d64119818835 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 5 Nov 2025 10:10:42 +0100 Subject: [PATCH 08/28] Fix type errors --- spacy/cli/download.py | 2 +- spacy/pipeline/lemmatizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 8ab6a2997..8104fd2d2 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -42,7 +42,7 @@ def download_cli( DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models """ - download(model, direct, sdist, custom_url=url, *ctx.args) + download(model, direct, sdist, url, *ctx.args) def download( diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 26867b473..e8d467ef8 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -210,7 +210,7 @@ class Lemmatizer(Pipe): rules = rules_table.get(univ_pos, {}) orig = string string = string.lower() - forms = [] + forms = [] # type: ignore oov_forms = [] for old, new in rules: if string.endswith(old): From d01a180a7f62c8fa4b12c5b00fea44b682fbf90b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 5 Nov 2025 10:26:48 +0100 Subject: [PATCH 09/28] Update build matrix for tests --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ee1b8af4..bb4eb2781 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.9", "3.12", "3.13"] + python_version: ["3.10", "3.11", "3.12", "3.13"] runs-on: ${{ matrix.os }} From ac95fc541c4094f05ccc55456f58ca491dec7bfe Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 5 Nov 2025 14:56:48 +0100 Subject: [PATCH 10/28] Update weasel dependency --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index d8ccfc0b6..6e79ed526 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ wasabi>=0.9.1,<1.2.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 typer-slim>=0.3.0,<1.0.0 -weasel>=0.1.0,<0.5.0 +weasel>=0.4.2,<0.5.0 # Third party dependencies numpy>=2.0.0,<3.0.0 requests>=2.13.0,<3.0.0 From b0ba71d4e71bfa1fcc182059aee1fa8213d6d303 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 5 Nov 2025 14:57:08 +0100 Subject: [PATCH 11/28] Update weasel dependency --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index f4d50d424..c64d533b8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,7 +53,7 @@ install_requires = wasabi>=0.9.1,<1.2.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 - weasel>=0.1.0,<0.5.0 + weasel>=0.4.2,<0.5.0 # Third-party dependencies typer-slim>=0.3.0,<1.0.0 tqdm>=4.38.0,<5.0.0 From 2d7f8506765afda581c2996884c49a82e86e0067 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 5 Nov 2025 14:57:37 +0100 Subject: [PATCH 12/28] Increment version --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 017fa35bf..cc6c1db0b 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.7" +__version__ = "3.8.8" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From 38056e9012e2a6a7e9e1e2e018c15dcc4d009344 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 6 Nov 2025 01:55:38 +0100 Subject: [PATCH 13/28] Disable 3.9 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 06289ccab..a465605c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "pp* cp36* cp37* cp38* *-win32 *i686*" +skip = "pp* cp36* cp37* cp38* cp39* *-win32 *i686*" test-skip = "" free-threaded-support = false From 09b4eb4ebe73fac888dae60df8f5d752d213ed16 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 10 Nov 2025 11:53:30 +0100 Subject: [PATCH 14/28] Use reuseable gha --- .github/workflows/cibuildwheel.yml | 92 +++--------------------------- pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 86 deletions(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 91313a7ff..d4bbc9579 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -9,91 +9,13 @@ on: - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**' jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - # macos-13 is an intel runner, macos-14 is apple silicon - os: [ubuntu-latest, windows-latest, macos-13, macos-14, ubuntu-24.04-arm] - - steps: - - uses: actions/checkout@v4 - # aarch64 (arm) is built via qemu emulation - # QEMU is sadly too slow. We need to wait for public ARM support - #- name: Set up QEMU - # if: runner.os == 'Linux' - # uses: docker/setup-qemu-action@v3 - # with: - # platforms: all - - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 - env: - CIBW_ARCHS_LINUX: auto - with: - package-dir: . - output-dir: wheelhouse - config-file: "{package}/pyproject.toml" - - uses: actions/upload-artifact@v4 - with: - name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} - path: ./wheelhouse/*.whl - - build_sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Build sdist - run: pipx run build --sdist - - uses: actions/upload-artifact@v4 - with: - name: cibw-sdist - path: dist/*.tar.gz - create_release: - needs: [build_wheels, build_sdist] - runs-on: ubuntu-latest + uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@main permissions: contents: write - checks: write actions: read - issues: read - packages: write - pull-requests: read - repository-projects: read - statuses: read - steps: - - name: Get the tag name and determine if it's a prerelease - id: get_tag_info - run: | - FULL_TAG=${GITHUB_REF#refs/tags/} - if [[ $FULL_TAG == release-* ]]; then - TAG_NAME=${FULL_TAG#release-} - IS_PRERELEASE=false - elif [[ $FULL_TAG == prerelease-* ]]; then - TAG_NAME=${FULL_TAG#prerelease-} - IS_PRERELEASE=true - else - echo "Tag does not match expected patterns" >&2 - exit 1 - fi - echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV - echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV - echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: cibw-* - path: dist - merge-multiple: true - - name: Create Draft Release - id: create_release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - name: ${{ env.TAG_NAME }} - draft: true - prerelease: ${{ env.IS_PRERELEASE }} - files: "./dist/*" + with: + wheel-name-pattern: "thinc-*.whl" + pure-python: false + secrets: + gh-token: ${{ secrets.GITHUB_TOKEN }} + diff --git a/pyproject.toml b/pyproject.toml index a465605c1..892c7659f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "pp* cp36* cp37* cp38* cp39* *-win32 *i686*" +skip = "pp* cp38* cp39* *-win32 *i686*" test-skip = "" free-threaded-support = false From b49c537a0b19ebc20ffb899c96501193f90a2664 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 10 Nov 2025 11:54:00 +0100 Subject: [PATCH 15/28] Update version --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index cc6c1db0b..5242f3cb4 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.8" +__version__ = "3.8.9" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From c9d77932f9fafb720226e8856f787e33ef57a034 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 10 Nov 2025 11:56:03 +0100 Subject: [PATCH 16/28] Update pyproject.yml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 892c7659f..1d26c6c89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,6 @@ build-backend = "setuptools.build_meta" build = "*" skip = "pp* cp38* cp39* *-win32 *i686*" test-skip = "" -free-threaded-support = false archs = ["native"] From a534b43ced20850324de539ab91cc2ae6fdbffbf Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 13 Nov 2025 14:23:55 +0100 Subject: [PATCH 17/28] Fix wheel path name on cibuildwheel --- .github/workflows/cibuildwheel.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index d4bbc9579..5f8ba9285 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -14,7 +14,7 @@ jobs: contents: write actions: read with: - wheel-name-pattern: "thinc-*.whl" + wheel-name-pattern: "spacy-*.whl" pure-python: false secrets: gh-token: ${{ secrets.GITHUB_TOKEN }} From 6d386bf707a33e9a6535d85355f09daf0ee4f137 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 13 Nov 2025 14:24:43 +0100 Subject: [PATCH 18/28] Skip building free-threaded --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 1d26c6c89..8d7f6eb5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "pp* cp38* cp39* *-win32 *i686*" +skip = "cp39* *-win32 *i686* cp3??t-*" test-skip = "" archs = ["native"] From 305ffd5560e8b94477294b6515ed07f7edf3c4fc Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 13 Nov 2025 14:25:38 +0100 Subject: [PATCH 19/28] Fix cdef declaration for cython 3 --- spacy/tokens/token.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd index 3252fcdeb..e3e270a24 100644 --- a/spacy/tokens/token.pxd +++ b/spacy/tokens/token.pxd @@ -11,7 +11,7 @@ from .doc cimport Doc from ..errors import Errors -cdef int MISSING_DEP = 0 +cdef const int MISSING_DEP = 0 cdef class Token: cdef readonly Vocab vocab From a24bb01613a57981dbe7921b2379d56ddbcacdc2 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 13:19:11 +0100 Subject: [PATCH 20/28] Support python 3.14 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c64d533b8..c4928af92 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ project_urls = [options] zip_safe = false include_package_data = true -python_requires = >=3.9,<3.14 +python_requires = >=3.9,<3.15 # NOTE: This section is superseded by pyproject.toml and will be removed in # spaCy v4 setup_requires = From 7abd196000e3985b8afe9f8d8428475cf21900b5 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 13:19:29 +0100 Subject: [PATCH 21/28] Set version to 3.8.10 --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 5242f3cb4..55115e94c 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.9" +__version__ = "3.8.10" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From 71e938dbf73e4215140828de0c219047c2d9c452 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 14:11:25 +0100 Subject: [PATCH 22/28] Skip windows arm --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d7f6eb5a..7a938679f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-*" +skip = "cp39* *-win32 *i686* cp3??t-* *win_arm64" test-skip = "" archs = ["native"] From 75f1160c8ca13c626bfbd785f5ae7fb094c6ce39 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 14:23:39 +0100 Subject: [PATCH 23/28] Skip windows arm --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7a938679f..b17e44da9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-* *win_arm64" +skip = "cp39* *-win32 *i686* cp3??t-* *win-arm*" test-skip = "" archs = ["native"] From 160d72852ebdeaddde9cd3e0d62f544d8540acb8 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 14:29:41 +0100 Subject: [PATCH 24/28] Try again to skip windows arm --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b17e44da9..a76a61856 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-* *win-arm*" +skip = "cp39* *-win32 *i686* cp3??t-* *win*arm*" test-skip = "" archs = ["native"] From c273c231e1d34b0d85fe00b9b5c25429373fa0d6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 14:36:48 +0100 Subject: [PATCH 25/28] Try again to fix the skip selector --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a76a61856..a70b3d10b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-* *win*arm*" +skip = "cp39* *-win32 *i686* cp3??t-* *-win_arm64" test-skip = "" archs = ["native"] From e7d1c3a30dd54c8a1d7d7af7b20f975c0fbe1966 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 14:41:17 +0100 Subject: [PATCH 26/28] Windows arm needs to be disabled at the ci level, so remove this skip selector --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a70b3d10b..8d7f6eb5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-* *-win_arm64" +skip = "cp39* *-win32 *i686* cp3??t-*" test-skip = "" archs = ["native"] From f628c69bdbbb74d7887138828c2606821c76ef97 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 18:21:16 +0100 Subject: [PATCH 27/28] Increment version --- spacy/about.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/about.py b/spacy/about.py index 55115e94c..a93d91532 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.10" +__version__ = "3.8.11" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" From e7a662acf8eda206a091aa3e7316700e0e6c699d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 17 Nov 2025 18:21:33 +0100 Subject: [PATCH 28/28] Skip Python 3.10 on Windows ARM --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8d7f6eb5a..64b71429e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "cp39* *-win32 *i686* cp3??t-*" +skip = "cp39* *-win32 *i686* cp3??t-* *cp310-win_arm64" test-skip = "" archs = ["native"]