diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index 91313a7ff..5f8ba9285 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -9,91 +9,13 @@ on: - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**' jobs: build_wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - # macos-13 is an intel runner, macos-14 is apple silicon - os: [ubuntu-latest, windows-latest, macos-13, macos-14, ubuntu-24.04-arm] - - steps: - - uses: actions/checkout@v4 - # aarch64 (arm) is built via qemu emulation - # QEMU is sadly too slow. We need to wait for public ARM support - #- name: Set up QEMU - # if: runner.os == 'Linux' - # uses: docker/setup-qemu-action@v3 - # with: - # platforms: all - - name: Build wheels - uses: pypa/cibuildwheel@v2.21.3 - env: - CIBW_ARCHS_LINUX: auto - with: - package-dir: . - output-dir: wheelhouse - config-file: "{package}/pyproject.toml" - - uses: actions/upload-artifact@v4 - with: - name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} - path: ./wheelhouse/*.whl - - build_sdist: - name: Build source distribution - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Build sdist - run: pipx run build --sdist - - uses: actions/upload-artifact@v4 - with: - name: cibw-sdist - path: dist/*.tar.gz - create_release: - needs: [build_wheels, build_sdist] - runs-on: ubuntu-latest + uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@main permissions: contents: write - checks: write actions: read - issues: read - packages: write - pull-requests: read - repository-projects: read - statuses: read - steps: - - name: Get the tag name and determine if it's a prerelease - id: get_tag_info - run: | - FULL_TAG=${GITHUB_REF#refs/tags/} - if [[ $FULL_TAG == release-* ]]; then - TAG_NAME=${FULL_TAG#release-} - IS_PRERELEASE=false - elif [[ $FULL_TAG == prerelease-* ]]; then - TAG_NAME=${FULL_TAG#prerelease-} - IS_PRERELEASE=true - else - echo "Tag does not match expected patterns" >&2 - exit 1 - fi - echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV - echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV - echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: cibw-* - path: dist - merge-multiple: true - - name: Create Draft Release - id: create_release - uses: softprops/action-gh-release@v2 - if: startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - name: ${{ env.TAG_NAME }} - draft: true - prerelease: ${{ env.IS_PRERELEASE }} - files: "./dist/*" + with: + wheel-name-pattern: "spacy-*.whl" + pure-python: false + secrets: + gh-token: ${{ secrets.GITHUB_TOKEN }} + diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6ee1b8af4..bb4eb2781 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -59,7 +59,7 @@ jobs: fail-fast: true matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.9", "3.12", "3.13"] + python_version: ["3.10", "3.11", "3.12", "3.13"] runs-on: ${{ matrix.os }} diff --git a/pyproject.toml b/pyproject.toml index 06289ccab..64b71429e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,9 +12,8 @@ build-backend = "setuptools.build_meta" [tool.cibuildwheel] build = "*" -skip = "pp* cp36* cp37* cp38* *-win32 *i686*" +skip = "cp39* *-win32 *i686* cp3??t-* *cp310-win_arm64" test-skip = "" -free-threaded-support = false archs = ["native"] diff --git a/requirements.txt b/requirements.txt index 7fc8ab32e..6e79ed526 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ wasabi>=0.9.1,<1.2.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 typer-slim>=0.3.0,<1.0.0 -weasel>=0.1.0,<0.5.0 +weasel>=0.4.2,<0.5.0 # Third party dependencies numpy>=2.0.0,<3.0.0 requests>=2.13.0,<3.0.0 @@ -33,6 +33,6 @@ types-mock>=0.1.1 types-setuptools>=57.0.0 types-requests types-setuptools>=57.0.0 -black==22.3.0 +black>=25.0.0 cython-lint>=0.15.0 isort>=5.0,<6.0 diff --git a/setup.cfg b/setup.cfg index f4d50d424..c4928af92 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,7 +30,7 @@ project_urls = [options] zip_safe = false include_package_data = true -python_requires = >=3.9,<3.14 +python_requires = >=3.9,<3.15 # NOTE: This section is superseded by pyproject.toml and will be removed in # spaCy v4 setup_requires = @@ -53,7 +53,7 @@ install_requires = wasabi>=0.9.1,<1.2.0 srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 - weasel>=0.1.0,<0.5.0 + weasel>=0.4.2,<0.5.0 # Third-party dependencies typer-slim>=0.3.0,<1.0.0 tqdm>=4.38.0,<5.0.0 diff --git a/spacy/about.py b/spacy/about.py index 017fa35bf..a93d91532 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -1,5 +1,5 @@ # fmt: off __title__ = "spacy" -__version__ = "3.8.7" +__version__ = "3.8.11" __download_url__ = "https://github.com/explosion/spacy-models/releases/download" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 7b976ec58..cd6eb47ff 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -20,7 +20,7 @@ from typing import ( import srsly import typer from click import NoSuchOption -from click.parser import split_arg_string +from click.shell_completion import split_arg_string from thinc.api import Config, ConfigValidationError, require_gpu from thinc.util import gpu_is_available from typer.main import get_command @@ -225,13 +225,11 @@ def get_git_version( @overload -def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]: - ... +def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]: ... @overload -def string_to_list(value: str, intify: Literal[True]) -> List[int]: - ... +def string_to_list(value: str, intify: Literal[True]) -> List[int]: ... def string_to_list(value: str, intify: bool = False) -> Union[List[str], List[int]]: diff --git a/spacy/cli/debug_data.py b/spacy/cli/debug_data.py index af3c24f3b..1c9c0e0ea 100644 --- a/spacy/cli/debug_data.py +++ b/spacy/cli/debug_data.py @@ -968,16 +968,14 @@ def _compile_gold( @overload -def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str: - ... +def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str: ... @overload def _format_labels( labels: Iterable[Tuple[str, int]], counts: Literal[True], -) -> str: - ... +) -> str: ... def _format_labels( diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 4261fb830..8104fd2d2 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -29,6 +29,7 @@ def download_cli( model: str = Arg(..., help="Name of pipeline package to download"), direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"), sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"), + url: str = Opt(None, "--url", "-U", help="Download from given url") # fmt: on ): """ @@ -41,13 +42,14 @@ def download_cli( DOCS: https://spacy.io/api/cli#download AVAILABLE PACKAGES: https://spacy.io/models """ - download(model, direct, sdist, *ctx.args) + download(model, direct, sdist, url, *ctx.args) def download( model: str, direct: bool = False, sdist: bool = False, + custom_url: Optional[str] = None, *pip_args, ) -> None: if ( @@ -87,7 +89,7 @@ def download( filename = get_model_filename(model_name, version, sdist) - download_model(filename, pip_args) + download_model(filename, pip_args, custom_url) msg.good( "Download and installation successful", f"You can now load the package via spacy.load('{model_name}')", @@ -159,12 +161,14 @@ def get_latest_version(model: str) -> str: def download_model( - filename: str, user_pip_args: Optional[Sequence[str]] = None + filename: str, + user_pip_args: Optional[Sequence[str]] = None, + custom_url: Optional[str] = None, ) -> None: # Construct the download URL carefully. We need to make sure we don't # allow relative paths or other shenanigans to trick us into download # from outside our own repo. - base_url = about.__download_url__ + base_url = custom_url if custom_url else about.__download_url__ # urljoin requires that the path ends with /, or the last path part will be dropped if not base_url.endswith("/"): base_url = about.__download_url__ + "/" diff --git a/spacy/cli/find_threshold.py b/spacy/cli/find_threshold.py index 3e86495e7..ff7af32e6 100644 --- a/spacy/cli/find_threshold.py +++ b/spacy/cli/find_threshold.py @@ -157,9 +157,11 @@ def find_threshold( exits=1, ) return { - keys[0]: filter_config(config[keys[0]], keys[1:], full_key) - if len(keys) > 1 - else config[keys[0]] + keys[0]: ( + filter_config(config[keys[0]], keys[1:], full_key) + if len(keys) > 1 + else config[keys[0]] + ) } # Evaluate with varying threshold values. @@ -216,12 +218,14 @@ def find_threshold( if len(set(scores.values())) == 1: wasabi.msg.warn( title="All scores are identical. Verify that all settings are correct.", - text="" - if ( - not isinstance(pipe, MultiLabel_TextCategorizer) - or scores_key in ("cats_macro_f", "cats_micro_f") - ) - else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`.", + text=( + "" + if ( + not isinstance(pipe, MultiLabel_TextCategorizer) + or scores_key in ("cats_macro_f", "cats_micro_f") + ) + else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`." + ), ) else: diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index a7c03d00f..a7fb2b5b8 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -195,9 +195,11 @@ def init_config( "Pipeline": ", ".join(pipeline), "Optimize for": optimize, "Hardware": variables["hardware"].upper(), - "Transformer": template_vars.transformer.get("name") # type: ignore[attr-defined] - if template_vars.use_transformer # type: ignore[attr-defined] - else None, + "Transformer": ( + template_vars.transformer.get("name") # type: ignore[attr-defined] + if template_vars.use_transformer # type: ignore[attr-defined] + else None + ), } msg.info("Generated config template specific for your use case") for label, value in use_case.items(): diff --git a/spacy/compat.py b/spacy/compat.py index 522fa30dd..a9e7d5a20 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -1,4 +1,5 @@ """Helpers for Python and platform compatibility.""" + import sys from thinc.util import copy_array diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index bde2d04fe..55474734a 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -4,6 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities. DOCS: https://spacy.io/api/top-level#displacy USAGE: https://spacy.io/usage/visualizers """ + import warnings from typing import Any, Callable, Dict, Iterable, Optional, Union @@ -66,7 +67,8 @@ def render( if jupyter or (jupyter is None and is_in_jupyter()): # return HTML rendered by IPython display() # See #4840 for details on span wrapper to disable mathjax - from IPython.core.display import HTML, display + from IPython.core.display import HTML + from IPython.display import display return display(HTML('{}'.format(html))) return html diff --git a/spacy/lang/am/examples.py b/spacy/lang/am/examples.py index 253d32d1d..b156cb84f 100644 --- a/spacy/lang/am/examples.py +++ b/spacy/lang/am/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "አፕል የዩኬን ጅምር ድርጅት በ 1 ቢሊዮን ዶላር ለመግዛት አስቧል።", "የራስ ገዝ መኪኖች የኢንሹራንስ ኃላፊነትን ወደ አምራቾች ያዛውራሉ", diff --git a/spacy/lang/az/examples.py b/spacy/lang/az/examples.py index f3331a8cb..df5e3521d 100644 --- a/spacy/lang/az/examples.py +++ b/spacy/lang/az/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Bu bir cümlədir.", "Necəsən?", diff --git a/spacy/lang/bg/stop_words.py b/spacy/lang/bg/stop_words.py index df708b65e..061850da5 100644 --- a/spacy/lang/bg/stop_words.py +++ b/spacy/lang/bg/stop_words.py @@ -3,6 +3,7 @@ References: https://github.com/Alir3z4/stop-words - Original list, serves as a base. https://postvai.com/books/stop-dumi.pdf - Additions to the original list in order to improve it. """ + STOP_WORDS = set( """ а автентичен аз ако ала diff --git a/spacy/lang/bn/examples.py b/spacy/lang/bn/examples.py index c3be4c556..11a65acb1 100644 --- a/spacy/lang/bn/examples.py +++ b/spacy/lang/bn/examples.py @@ -5,5 +5,4 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = ["তুই খুব ভালো", "আজ আমরা ডাক্তার দেখতে যাবো", "আমি জানি না "] diff --git a/spacy/lang/bo/examples.py b/spacy/lang/bo/examples.py index 8ed9372ec..8655f2d9d 100644 --- a/spacy/lang/bo/examples.py +++ b/spacy/lang/bo/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "དོན་དུ་རྒྱ་མཚོ་བླ་མ་ཞེས་བྱ་ཞིང༌།", "ཏཱ་ལའི་ཞེས་པ་ནི་སོག་སྐད་ཡིན་པ་དེ་བོད་སྐད་དུ་རྒྱ་མཚོའི་དོན་དུ་འཇུག", diff --git a/spacy/lang/ca/examples.py b/spacy/lang/ca/examples.py index ae6aa3e24..de54c05ce 100644 --- a/spacy/lang/ca/examples.py +++ b/spacy/lang/ca/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple està buscant comprar una startup del Regne Unit per mil milions de dòlars", "Els cotxes autònoms deleguen la responsabilitat de l'assegurança als seus fabricants", diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index 37c58c85f..69e752c91 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -277,10 +277,10 @@ _currency = ( # These expressions contain various unicode variations, including characters # used in Chinese (see #1333, #1340, #1351) – unless there are cross-language # conflicts, spaCy's base tokenizer should handle all of those by default -_punct = ( - r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪" +_punct = r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪" +_quotes = ( + r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧' ) -_quotes = r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧' _hyphens = "- – — -- --- —— ~" # Various symbols like dingbats, but also emoji diff --git a/spacy/lang/cs/examples.py b/spacy/lang/cs/examples.py index a30b5ac14..35d86dde7 100644 --- a/spacy/lang/cs/examples.py +++ b/spacy/lang/cs/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Máma mele maso.", "Příliš žluťoučký kůň úpěl ďábelské ódy.", diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py index 649d12022..15a943ad6 100644 --- a/spacy/lang/da/tokenizer_exceptions.py +++ b/spacy/lang/da/tokenizer_exceptions.py @@ -2,6 +2,7 @@ Tokenizer Exceptions. Source: https://forkortelse.dk/ and various others. """ + from ...symbols import NORM, ORTH from ...util import update_exc from ..tokenizer_exceptions import BASE_EXCEPTIONS diff --git a/spacy/lang/de/examples.py b/spacy/lang/de/examples.py index 735d1c316..30b8f195b 100644 --- a/spacy/lang/de/examples.py +++ b/spacy/lang/de/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Die ganze Stadt ist ein Startup: Shenzhen ist das Silicon Valley für Hardware-Firmen", "Wie deutsche Startups die Technologie vorantreiben wollen: Künstliche Intelligenz", diff --git a/spacy/lang/dsb/examples.py b/spacy/lang/dsb/examples.py index 6e9143826..11ecbddb2 100644 --- a/spacy/lang/dsb/examples.py +++ b/spacy/lang/dsb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Z tym stwori so wuměnjenje a zakład za dalše wobdźěłanje přez analyzu tekstoweje struktury a semantisku anotaciju a z tym tež za tu předstajenu digitalnu online-wersiju.", "Mi so tu jara derje spodoba.", diff --git a/spacy/lang/en/examples.py b/spacy/lang/en/examples.py index 2cca9e05f..7ed0ba0c1 100644 --- a/spacy/lang/en/examples.py +++ b/spacy/lang/en/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple is looking at buying U.K. startup for $1 billion", "Autonomous cars shift insurance liability toward manufacturers", diff --git a/spacy/lang/es/examples.py b/spacy/lang/es/examples.py index e4dfbcb6d..653a38bfd 100644 --- a/spacy/lang/es/examples.py +++ b/spacy/lang/es/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple está buscando comprar una startup del Reino Unido por mil millones de dólares.", "Los coches autónomos delegan la responsabilidad del seguro en sus fabricantes.", diff --git a/spacy/lang/fa/examples.py b/spacy/lang/fa/examples.py index 9c6fb0345..6810e48d5 100644 --- a/spacy/lang/fa/examples.py +++ b/spacy/lang/fa/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "این یک جمله نمونه می باشد.", "قرار ما، امروز ساعت ۲:۳۰ بعدازظهر هست!", diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py index 881d5b91d..0bbd7bd91 100644 --- a/spacy/lang/fi/tokenizer_exceptions.py +++ b/spacy/lang/fi/tokenizer_exceptions.py @@ -100,9 +100,9 @@ conj_contraction_negations = [ ("eivat", "eivät"), ("eivät", "eivät"), ] -for (base_lower, base_norm) in conj_contraction_bases: +for base_lower, base_norm in conj_contraction_bases: for base in [base_lower, base_lower.title()]: - for (suffix, suffix_norm) in conj_contraction_negations: + for suffix, suffix_norm in conj_contraction_negations: _exc[base + suffix] = [ {ORTH: base, NORM: base_norm}, {ORTH: suffix, NORM: suffix_norm}, diff --git a/spacy/lang/fr/examples.py b/spacy/lang/fr/examples.py index a74a62204..759de5615 100644 --- a/spacy/lang/fr/examples.py +++ b/spacy/lang/fr/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple cherche à acheter une start-up anglaise pour 1 milliard de dollars", "Les voitures autonomes déplacent la responsabilité de l'assurance vers les constructeurs", diff --git a/spacy/lang/grc/examples.py b/spacy/lang/grc/examples.py index 9c0bcb265..51ec8f8cc 100644 --- a/spacy/lang/grc/examples.py +++ b/spacy/lang/grc/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ἐρᾷ μὲν ἁγνὸς οὐρανὸς τρῶσαι χθόνα, ἔρως δὲ γαῖαν λαμβάνει γάμου τυχεῖν·", "εὐδαίμων Χαρίτων καὶ Μελάνιππος ἔφυ, θείας ἁγητῆρες ἐφαμερίοις φιλότατος.", diff --git a/spacy/lang/gu/examples.py b/spacy/lang/gu/examples.py index 1cf75fd32..e67b7ba9d 100644 --- a/spacy/lang/gu/examples.py +++ b/spacy/lang/gu/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "લોકશાહી એ સરકારનું એક એવું તંત્ર છે જ્યાં નાગરિકો મત દ્વારા સત્તાનો ઉપયોગ કરે છે.", "તે ગુજરાત રાજ્યના ધરમપુર શહેરમાં આવેલું હતું", diff --git a/spacy/lang/he/examples.py b/spacy/lang/he/examples.py index d54d2a145..ee484e07b 100644 --- a/spacy/lang/he/examples.py +++ b/spacy/lang/he/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "סין מקימה קרן של 440 מיליון דולר להשקעה בהייטק בישראל", 'רה"מ הודיע כי יחרים טקס בחסותו', diff --git a/spacy/lang/hi/examples.py b/spacy/lang/hi/examples.py index 1443b4908..f3196c58f 100644 --- a/spacy/lang/hi/examples.py +++ b/spacy/lang/hi/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है।", "स्वायत्त कारें निर्माताओं की ओर बीमा दायित्व रखतीं हैं।", diff --git a/spacy/lang/hsb/examples.py b/spacy/lang/hsb/examples.py index 21f6f7584..754011c6f 100644 --- a/spacy/lang/hsb/examples.py +++ b/spacy/lang/hsb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "To běšo wjelgin raźone a jo se wót luźi derje pśiwzeło. Tak som dožywiła wjelgin", "Jogo pśewóźowarce stej groniłej, až how w serbskich stronach njama Santa Claus nic pytaś.", diff --git a/spacy/lang/ht/examples.py b/spacy/lang/ht/examples.py index 456d34a5f..0afeb19c8 100644 --- a/spacy/lang/ht/examples.py +++ b/spacy/lang/ht/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple ap panse achte yon demaraj nan Wayòm Ini pou $1 milya dola", "Machin otonòm fè responsablite asirans lan ale sou men fabrikan yo", diff --git a/spacy/lang/hu/examples.py b/spacy/lang/hu/examples.py index 711a438bd..c056c0967 100644 --- a/spacy/lang/hu/examples.py +++ b/spacy/lang/hu/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Az Apple egy brit startup vásárlását tervezi 1 milliárd dollár értékben.", "San Francisco vezetése mérlegeli a járdát használó szállító robotok betiltását.", diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py index dbf93c622..dc9741076 100644 --- a/spacy/lang/hu/punctuation.py +++ b/spacy/lang/hu/punctuation.py @@ -11,7 +11,7 @@ from ..char_classes import ( ) # removing ° from the special icons to keep e.g. 99° as one token -_concat_icons = CONCAT_ICONS.replace("\u00B0", "") +_concat_icons = CONCAT_ICONS.replace("\u00b0", "") _currency = r"\$¢£€¥฿" _quotes = CONCAT_QUOTES.replace("'", "") diff --git a/spacy/lang/hy/examples.py b/spacy/lang/hy/examples.py index 212a2ec86..9455396db 100644 --- a/spacy/lang/hy/examples.py +++ b/spacy/lang/hy/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Լոնդոնը Միացյալ Թագավորության մեծ քաղաք է։", "Ո՞վ է Ֆրանսիայի նախագահը։", diff --git a/spacy/lang/id/examples.py b/spacy/lang/id/examples.py index d35271551..17d1c5f28 100644 --- a/spacy/lang/id/examples.py +++ b/spacy/lang/id/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Indonesia merupakan negara kepulauan yang kaya akan budaya.", "Berapa banyak warga yang dibutuhkan saat kerja bakti?", diff --git a/spacy/lang/it/examples.py b/spacy/lang/it/examples.py index 506721276..ae857382a 100644 --- a/spacy/lang/it/examples.py +++ b/spacy/lang/it/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple vuole comprare una startup del Regno Unito per un miliardo di dollari", "Le automobili a guida autonoma spostano la responsabilità assicurativa verso i produttori", diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index e21e85cd9..492478af3 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -102,9 +102,9 @@ class JapaneseTokenizer(DummyTokenizer): token.dictionary_form(), # lemma token.normalized_form(), token.reading_form(), - sub_tokens_list[idx] - if sub_tokens_list - else None, # user_data['sub_tokens'] + ( + sub_tokens_list[idx] if sub_tokens_list else None + ), # user_data['sub_tokens'] ) for idx, token in enumerate(sudachipy_tokens) if len(token.surface()) > 0 diff --git a/spacy/lang/ja/examples.py b/spacy/lang/ja/examples.py index c3a011862..a07711c53 100644 --- a/spacy/lang/ja/examples.py +++ b/spacy/lang/ja/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "アップルがイギリスの新興企業を10億ドルで購入を検討", "自動運転車の損害賠償責任、自動車メーカーに一定の負担を求める", diff --git a/spacy/lang/ja/tag_map.py b/spacy/lang/ja/tag_map.py index 5c14f41bf..527c83629 100644 --- a/spacy/lang/ja/tag_map.py +++ b/spacy/lang/ja/tag_map.py @@ -25,7 +25,9 @@ TAG_MAP = { # Universal Dependencies Mapping: (Some of the entries in this mapping are updated to v2.6 in the list below) # http://universaldependencies.org/ja/overview/morphology.html # http://universaldependencies.org/ja/pos/all.html - "記号-一般": {POS: NOUN}, # this includes characters used to represent sounds like ドレミ + "記号-一般": { + POS: NOUN + }, # this includes characters used to represent sounds like ドレミ "記号-文字": { POS: NOUN }, # this is for Greek and Latin characters having some meanings, or used as symbols, as in math @@ -72,7 +74,9 @@ TAG_MAP = { "名詞-固有名詞-地名-国": {POS: PROPN}, # country name "名詞-助動詞語幹": {POS: AUX}, "名詞-数詞": {POS: NUM}, # includes Chinese numerals - "名詞-普通名詞-サ変可能": {POS: NOUN}, # XXX: sometimes VERB in UDv2; suru-verb noun + "名詞-普通名詞-サ変可能": { + POS: NOUN + }, # XXX: sometimes VERB in UDv2; suru-verb noun "名詞-普通名詞-サ変形状詞可能": {POS: NOUN}, "名詞-普通名詞-一般": {POS: NOUN}, "名詞-普通名詞-形状詞可能": {POS: NOUN}, # XXX: sometimes ADJ in UDv2 diff --git a/spacy/lang/kn/examples.py b/spacy/lang/kn/examples.py index 3e055752e..7cbb7fc07 100644 --- a/spacy/lang/kn/examples.py +++ b/spacy/lang/kn/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ಆಪಲ್ ಒಂದು ಯು.ಕೆ. ಸ್ಟಾರ್ಟ್ಅಪ್ ಅನ್ನು ೧ ಶತಕೋಟಿ ಡಾಲರ್ಗಳಿಗೆ ಖರೀದಿಸಲು ನೋಡುತ್ತಿದೆ.", "ಸ್ವಾಯತ್ತ ಕಾರುಗಳು ವಿಮಾ ಹೊಣೆಗಾರಿಕೆಯನ್ನು ತಯಾರಕರ ಕಡೆಗೆ ಬದಲಾಯಿಸುತ್ತವೆ.", diff --git a/spacy/lang/lij/examples.py b/spacy/lang/lij/examples.py index ba7fe43fd..ec336b07f 100644 --- a/spacy/lang/lij/examples.py +++ b/spacy/lang/lij/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Sciusciâ e sciorbî no se peu.", "Graçie di çetroin, che me son arrivæ.", diff --git a/spacy/lang/lt/examples.py b/spacy/lang/lt/examples.py index eaf941f1a..57d6eb4d1 100644 --- a/spacy/lang/lt/examples.py +++ b/spacy/lang/lt/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Jaunikis pirmąją vestuvinę naktį iškeitė į areštinės gultą", "Bepiločiai automobiliai išnaikins vairavimo mokyklas, autoservisus ir eismo nelaimes", diff --git a/spacy/lang/ml/examples.py b/spacy/lang/ml/examples.py index 9794eab29..d067b8b66 100644 --- a/spacy/lang/ml/examples.py +++ b/spacy/lang/ml/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "അനാവശ്യമായി കണ്ണിലും മൂക്കിലും വായിലും സ്പർശിക്കാതിരിക്കുക", "പൊതുരംഗത്ത് മലയാള ഭാഷയുടെ സമഗ്രപുരോഗതി ലക്ഷ്യമാക്കി പ്രവർത്തിക്കുന്ന സംഘടനയായ മലയാളഐക്യവേദിയുടെ വിദ്യാർത്ഥിക്കൂട്ടായ്മയാണ് വിദ്യാർത്ഥി മലയാളവേദി", diff --git a/spacy/lang/ms/examples.py b/spacy/lang/ms/examples.py index 97ab19b6e..1af439d4a 100644 --- a/spacy/lang/ms/examples.py +++ b/spacy/lang/ms/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Malaysia ialah sebuah negara yang terletak di Asia Tenggara.", "Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?", diff --git a/spacy/lang/nb/examples.py b/spacy/lang/nb/examples.py index b1a63ad74..242dab7c5 100644 --- a/spacy/lang/nb/examples.py +++ b/spacy/lang/nb/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar.", "Selvkjørende biler flytter forsikringsansvaret over på produsentene.", diff --git a/spacy/lang/ne/examples.py b/spacy/lang/ne/examples.py index a29b77c2f..cc3b382df 100644 --- a/spacy/lang/ne/examples.py +++ b/spacy/lang/ne/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "एप्पलले अमेरिकी स्टार्टअप १ अर्ब डलरमा किन्ने सोच्दै छ", "स्वायत्त कारहरूले बीमा दायित्व निर्माताहरु तिर बदल्छन्", diff --git a/spacy/lang/nl/examples.py b/spacy/lang/nl/examples.py index 8c8c50c60..3440f01db 100644 --- a/spacy/lang/nl/examples.py +++ b/spacy/lang/nl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple overweegt om voor 1 miljard een U.K. startup te kopen", "Autonome auto's verschuiven de verzekeringverantwoordelijkheid naar producenten", diff --git a/spacy/lang/nn/examples.py b/spacy/lang/nn/examples.py index 95ec0aadd..ee03bf95e 100644 --- a/spacy/lang/nn/examples.py +++ b/spacy/lang/nn/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - # sentences taken from Omsetjingsminne frå Nynorsk pressekontor 2022 (https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-80/) sentences = [ "Konseptet går ut på at alle tre omgangar tel, alle hopparar må stille i kvalifiseringa og poengsummen skal telje.", diff --git a/spacy/lang/pl/examples.py b/spacy/lang/pl/examples.py index b1ea5880f..cb55ed07d 100644 --- a/spacy/lang/pl/examples.py +++ b/spacy/lang/pl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Poczuł przyjemną woń mocnej kawy.", "Istnieje wiele dróg oddziaływania substancji psychoaktywnej na układ nerwowy.", diff --git a/spacy/lang/pt/examples.py b/spacy/lang/pt/examples.py index 13f3512cf..42ae602c1 100644 --- a/spacy/lang/pt/examples.py +++ b/spacy/lang/pt/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares", "Carros autônomos empurram a responsabilidade do seguro para os fabricantes." diff --git a/spacy/lang/ro/examples.py b/spacy/lang/ro/examples.py index bfa258ffc..46b4c9a67 100644 --- a/spacy/lang/ro/examples.py +++ b/spacy/lang/ro/examples.py @@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple plănuiește să cumpere o companie britanică pentru un miliard de dolari", "Municipalitatea din San Francisco ia în calcul interzicerea roboților curieri pe trotuar", diff --git a/spacy/lang/ru/examples.py b/spacy/lang/ru/examples.py index adb007625..9595d583a 100644 --- a/spacy/lang/ru/examples.py +++ b/spacy/lang/ru/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ # Translations from English: "Apple рассматривает возможность покупки стартапа из Соединённого Королевства за $1 млрд", diff --git a/spacy/lang/sa/examples.py b/spacy/lang/sa/examples.py index 60243c04c..6a0bc4e13 100644 --- a/spacy/lang/sa/examples.py +++ b/spacy/lang/sa/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "अभ्यावहति कल्याणं विविधं वाक् सुभाषिता ।", "मनसि व्याकुले चक्षुः पश्यन्नपि न पश्यति ।", diff --git a/spacy/lang/si/examples.py b/spacy/lang/si/examples.py index b34051d00..8e0ffec69 100644 --- a/spacy/lang/si/examples.py +++ b/spacy/lang/si/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "මෙය වාක්‍යයකි.", "ඔබ කවුද?", diff --git a/spacy/lang/sk/examples.py b/spacy/lang/sk/examples.py index 736109a7c..079d0d2b1 100644 --- a/spacy/lang/sk/examples.py +++ b/spacy/lang/sk/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Ardevop, s.r.o. je malá startup firma na území SR.", "Samojazdiace autá presúvajú poistnú zodpovednosť na výrobcov automobilov.", diff --git a/spacy/lang/sl/examples.py b/spacy/lang/sl/examples.py index bf483c6a4..79846114b 100644 --- a/spacy/lang/sl/examples.py +++ b/spacy/lang/sl/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple načrtuje nakup britanskega startupa za 1 bilijon dolarjev", "France Prešeren je umrl 8. februarja 1849 v Kranju", diff --git a/spacy/lang/sq/examples.py b/spacy/lang/sq/examples.py index 06ed20fa1..61bf713a6 100644 --- a/spacy/lang/sq/examples.py +++ b/spacy/lang/sq/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple po shqyrton blerjen e nje shoqërie të U.K. për 1 miliard dollarë", "Makinat autonome ndryshojnë përgjegjësinë e sigurimit ndaj prodhuesve", diff --git a/spacy/lang/sr/examples.py b/spacy/lang/sr/examples.py index ec7f57ced..2d34d42b4 100644 --- a/spacy/lang/sr/examples.py +++ b/spacy/lang/sr/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ # Translations from English "Apple планира куповину америчког стартапа за $1 милијарду.", diff --git a/spacy/lang/sv/examples.py b/spacy/lang/sv/examples.py index bc6cd7a54..ffea6e457 100644 --- a/spacy/lang/sv/examples.py +++ b/spacy/lang/sv/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple överväger att köpa brittisk startup för 1 miljard dollar.", "Självkörande bilar förskjuter försäkringsansvar mot tillverkare.", diff --git a/spacy/lang/ta/examples.py b/spacy/lang/ta/examples.py index e68dc6237..522cd926d 100644 --- a/spacy/lang/ta/examples.py +++ b/spacy/lang/ta/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "கிறிஸ்துமஸ் மற்றும் இனிய புத்தாண்டு வாழ்த்துக்கள்", "எனக்கு என் குழந்தைப் பருவம் நினைவிருக்கிறது", diff --git a/spacy/lang/te/examples.py b/spacy/lang/te/examples.py index cff7d3cb0..4af872828 100644 --- a/spacy/lang/te/examples.py +++ b/spacy/lang/te/examples.py @@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "ఆపిల్ 1 బిలియన్ డాలర్స్ కి యూ.కె. స్టార్ట్అప్ ని కొనాలని అనుకుంటుంది.", "ఆటోనోమోస్ కార్లు భీమా బాధ్యతను తయారీదారులపైకి మళ్లిస్తాయి.", diff --git a/spacy/lang/ti/examples.py b/spacy/lang/ti/examples.py index 167b58d09..146ac349b 100644 --- a/spacy/lang/ti/examples.py +++ b/spacy/lang/ti/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "አፕል ብዩኬ ትርከብ ንግድ ብ1 ቢሊዮን ዶላር ንምግዛዕ ሐሲባ።", "ፈላማይ ክታበት ኮቪድ 19 ተጀሚሩ፤ሓዱሽ ተስፋ ሂቡ ኣሎ", diff --git a/spacy/lang/tn/examples.py b/spacy/lang/tn/examples.py index 7b33fae5a..fb6d96f97 100644 --- a/spacy/lang/tn/examples.py +++ b/spacy/lang/tn/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Apple e nyaka go reka JSE ka tlhwatlhwa ta R1 billion", "Johannesburg ke toropo e kgolo mo Afrika Borwa.", diff --git a/spacy/lang/tr/examples.py b/spacy/lang/tr/examples.py index c912c950d..c96e54032 100644 --- a/spacy/lang/tr/examples.py +++ b/spacy/lang/tr/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Neredesin?", "Neredesiniz?", diff --git a/spacy/lang/uk/examples.py b/spacy/lang/uk/examples.py index f75d44488..3335c82ac 100644 --- a/spacy/lang/uk/examples.py +++ b/spacy/lang/uk/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Ніч на середу буде морозною.", "Чим кращі книги ти читав, тим гірше спиш.", # Serhiy Zhadan diff --git a/spacy/lang/ur/examples.py b/spacy/lang/ur/examples.py index e55b337be..f612c6b81 100644 --- a/spacy/lang/ur/examples.py +++ b/spacy/lang/ur/examples.py @@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "اردو ہے جس کا نام ہم جانتے ہیں داغ", "سارے جہاں میں دھوم ہماری زباں کی ہے", diff --git a/spacy/lang/vi/examples.py b/spacy/lang/vi/examples.py index 36575f67c..5f2a9b2ba 100644 --- a/spacy/lang/vi/examples.py +++ b/spacy/lang/vi/examples.py @@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models. >>> docs = nlp.pipe(sentences) """ - sentences = [ "Đây là đâu, tôi là ai?", "Căn phòng có nhiều cửa sổ nên nó khá sáng", diff --git a/spacy/language.py b/spacy/language.py index 06210a327..b8f95e3b3 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1519,8 +1519,7 @@ class Language: disable: Iterable[str] = ..., component_cfg: Optional[Dict[str, Dict[str, Any]]] = ..., n_process: int = ..., - ) -> Iterator[Doc]: - ... + ) -> Iterator[Doc]: ... @overload def pipe( # noqa: F811 @@ -1532,8 +1531,7 @@ class Language: disable: Iterable[str] = ..., component_cfg: Optional[Dict[str, Dict[str, Any]]] = ..., n_process: int = ..., - ) -> Iterator[Tuple[Doc, _AnyContext]]: - ... + ) -> Iterator[Tuple[Doc, _AnyContext]]: ... def pipe( # noqa: F811 self, @@ -1641,7 +1639,7 @@ class Language: batch_size: int, ) -> Iterator[Doc]: def prepare_input( - texts: Iterable[Union[str, Doc]] + texts: Iterable[Union[str, Doc]], ) -> Iterable[Tuple[Union[str, bytes], _AnyContext]]: # Serialize Doc inputs to bytes to avoid incurring pickling # overhead when they are passed to child processes. Also yield @@ -1943,9 +1941,9 @@ class Language: ) if "_sourced_vectors_hashes" not in nlp.meta: nlp.meta["_sourced_vectors_hashes"] = {} - nlp.meta["_sourced_vectors_hashes"][ - pipe_name - ] = source_nlp_vectors_hashes[model] + nlp.meta["_sourced_vectors_hashes"][pipe_name] = ( + source_nlp_vectors_hashes[model] + ) # Delete from cache if listeners were replaced if listeners_replaced: del source_nlps[model] diff --git a/spacy/matcher/dependencymatcher.pyi b/spacy/matcher/dependencymatcher.pyi index b9fbabda7..d84a30a58 100644 --- a/spacy/matcher/dependencymatcher.pyi +++ b/spacy/matcher/dependencymatcher.pyi @@ -51,9 +51,7 @@ class DependencyMatcher: ] = ... ) -> None: ... def has_key(self, key: Union[str, int]) -> bool: ... - def get( - self, key: Union[str, int], default: Optional[Any] = ... - ) -> Tuple[ + def get(self, key: Union[str, int], default: Optional[Any] = ...) -> Tuple[ Optional[ Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any] ], diff --git a/spacy/ml/featureextractor.py b/spacy/ml/featureextractor.py index 2f869ad65..fb4e3c39a 100644 --- a/spacy/ml/featureextractor.py +++ b/spacy/ml/featureextractor.py @@ -7,7 +7,7 @@ from ..tokens import Doc def FeatureExtractor( - columns: Union[List[str], List[int], List[Union[int, str]]] + columns: Union[List[str], List[int], List[Union[int, str]]], ) -> Model[List[Doc], List[Ints2d]]: return Model("extract_features", forward, attrs={"columns": columns}) diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py index 752d1c443..8b12720db 100644 --- a/spacy/ml/models/entity_linker.py +++ b/spacy/ml/models/entity_linker.py @@ -122,7 +122,7 @@ def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]: return get_candidates -def create_candidates_batch() -> Callable[ - [KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]] -]: +def create_candidates_batch() -> ( + Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]] +): return get_candidates_batch diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py index 6029ed313..0941b43c1 100644 --- a/spacy/pipeline/edit_tree_lemmatizer.py +++ b/spacy/pipeline/edit_tree_lemmatizer.py @@ -93,7 +93,7 @@ class EditTreeLemmatizer(TrainablePipe): truths = [] for eg in examples: eg_truths = [] - for (predicted, gold_lemma) in zip( + for predicted, gold_lemma in zip( eg.predicted, eg.get_aligned("LEMMA", as_string=True) ): if gold_lemma is None or gold_lemma == "": diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 24212af48..ef57577ed 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -210,7 +210,7 @@ class Lemmatizer(Pipe): rules = rules_table.get(univ_pos, {}) orig = string string = string.lower() - forms: List[str] = [] + forms = [] # type: ignore oov_forms = [] for old, new in rules: if string.endswith(old): diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py index 030572850..805a0538f 100644 --- a/spacy/pipeline/spancat.py +++ b/spacy/pipeline/spancat.py @@ -80,8 +80,7 @@ DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str( @runtime_checkable class Suggester(Protocol): - def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged: - ... + def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged: ... def ngram_suggester( diff --git a/spacy/registrations.py b/spacy/registrations.py index f742da9d3..7e29486b6 100644 --- a/spacy/registrations.py +++ b/spacy/registrations.py @@ -6,6 +6,7 @@ remain in their original locations, but decoration is moved here. Component definitions and registrations are in spacy/pipeline/factories.py """ + # Global flag to track if registry has been populated REGISTRY_POPULATED = False diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py index 73544c51a..d72c916ef 100644 --- a/spacy/tests/doc/test_doc_api.py +++ b/spacy/tests/doc/test_doc_api.py @@ -141,7 +141,8 @@ def test_issue3869(sentence): @pytest.mark.issue(3962) def test_issue3962(en_vocab): """Ensure that as_doc does not result in out-of-bound access of tokens. - This is achieved by setting the head to itself if it would lie out of the span otherwise.""" + This is achieved by setting the head to itself if it would lie out of the span otherwise. + """ # fmt: off words = ["He", "jests", "at", "scars", ",", "that", "never", "felt", "a", "wound", "."] heads = [1, 7, 1, 2, 7, 7, 7, 7, 9, 7, 7] @@ -180,7 +181,8 @@ def test_issue3962(en_vocab): @pytest.mark.issue(3962) def test_issue3962_long(en_vocab): """Ensure that as_doc does not result in out-of-bound access of tokens. - This is achieved by setting the head to itself if it would lie out of the span otherwise.""" + This is achieved by setting the head to itself if it would lie out of the span otherwise. + """ # fmt: off words = ["He", "jests", "at", "scars", ".", "They", "never", "felt", "a", "wound", "."] heads = [1, 1, 1, 2, 1, 7, 7, 7, 9, 7, 7] diff --git a/spacy/tests/lang/hu/test_tokenizer.py b/spacy/tests/lang/hu/test_tokenizer.py index fa689c8f3..30f3e9487 100644 --- a/spacy/tests/lang/hu/test_tokenizer.py +++ b/spacy/tests/lang/hu/test_tokenizer.py @@ -304,9 +304,11 @@ TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0]) SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0] TESTS.extend( [ - pytest.param(x[0], x[1], marks=pytest.mark.slow()) - if not isinstance(x[0], tuple) - else x + ( + pytest.param(x[0], x[1], marks=pytest.mark.slow()) + if not isinstance(x[0], tuple) + else x + ) for x in SLOW_TESTS ] ) diff --git a/spacy/tests/matcher/test_matcher_logic.py b/spacy/tests/matcher/test_matcher_logic.py index 33f628004..d3188b149 100644 --- a/spacy/tests/matcher/test_matcher_logic.py +++ b/spacy/tests/matcher/test_matcher_logic.py @@ -544,7 +544,7 @@ def test_greedy_matching_longest(doc, text, pattern, longest): matcher = Matcher(doc.vocab) matcher.add("RULE", [pattern], greedy="LONGEST") matches = matcher(doc) - for (key, s, e) in matches: + for key, s, e in matches: assert doc[s:e].text == longest diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py index 193a0bc1c..811a09150 100644 --- a/spacy/tests/pipeline/test_entity_linker.py +++ b/spacy/tests/pipeline/test_entity_linker.py @@ -496,15 +496,15 @@ def test_el_pipe_configuration(nlp): return [get_lowercased_candidates(kb, span) for span in spans] @registry.misc("spacy.LowercaseCandidateGenerator.v1") - def create_candidates() -> Callable[ - [InMemoryLookupKB, "Span"], Iterable[Candidate] - ]: + def create_candidates() -> ( + Callable[[InMemoryLookupKB, "Span"], Iterable[Candidate]] + ): return get_lowercased_candidates @registry.misc("spacy.LowercaseCandidateBatchGenerator.v1") - def create_candidates_batch() -> Callable[ - [InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]] - ]: + def create_candidates_batch() -> ( + Callable[[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]] + ): return get_lowercased_candidates_batch # replace the pipe with a new one with with a different candidate generator diff --git a/spacy/tests/pipeline/test_pipe_factories.py b/spacy/tests/pipeline/test_pipe_factories.py index c45dccb06..b355379bf 100644 --- a/spacy/tests/pipeline/test_pipe_factories.py +++ b/spacy/tests/pipeline/test_pipe_factories.py @@ -279,20 +279,17 @@ def test_pipe_factories_wrong_formats(): with pytest.raises(ValueError): # Decorator is not called @Language.component - def component(foo: int, bar: str): - ... + def component(foo: int, bar: str): ... with pytest.raises(ValueError): # Decorator is not called @Language.factory - def factory1(foo: int, bar: str): - ... + def factory1(foo: int, bar: str): ... with pytest.raises(ValueError): # Factory function is missing "nlp" and "name" arguments @Language.factory("test_pipe_factories_missing_args") - def factory2(foo: int, bar: str): - ... + def factory2(foo: int, bar: str): ... def test_pipe_factory_meta_config_cleanup(): @@ -329,8 +326,7 @@ def test_pipe_factories_empty_dict_default(): name = "test_pipe_factories_empty_dict_default" @Language.factory(name, default_config={"foo": {}}) - def factory(nlp: Language, name: str, foo: dict): - ... + def factory(nlp: Language, name: str, foo: dict): ... nlp = Language() nlp.create_pipe(name) @@ -549,11 +545,9 @@ def test_pipe_factories_from_source_config(): class PipeFactoriesIdempotent: - def __init__(self, nlp, name): - ... + def __init__(self, nlp, name): ... - def __call__(self, doc): - ... + def __call__(self, doc): ... @pytest.mark.parametrize( diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py index 8e4a5ed7c..4310e41ab 100644 --- a/spacy/tests/pipeline/test_textcat.py +++ b/spacy/tests/pipeline/test_textcat.py @@ -874,7 +874,8 @@ def test_textcat_eval_missing(multi_label: bool, spring_p: float): def test_textcat_loss(multi_label: bool, expected_loss: float): """ multi-label: the missing 'spring' in gold_doc_2 doesn't incur an increase in loss - exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss""" + exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss + """ train_examples = [] nlp = English() diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 7b729d78f..43d5f6283 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -890,7 +890,7 @@ def test_cli_find_threshold(capsys): return docs def init_nlp( - components: Tuple[Tuple[str, Dict[str, Any]], ...] = () + components: Tuple[Tuple[str, Dict[str, Any]], ...] = (), ) -> Tuple[Language, List[Example]]: new_nlp = English() new_nlp.add_pipe( # type: ignore diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index f0b68862c..d92f04d05 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -57,9 +57,7 @@ class Doc: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], @@ -68,9 +66,7 @@ class Doc: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index b982eb810..070aaffb3 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -23,9 +23,7 @@ class Span: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[SpanMethod], Optional[Callable[[Span], Any]], @@ -34,9 +32,7 @@ class Span: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[SpanMethod], Optional[Callable[[Span], Any]], diff --git a/spacy/tokens/token.pxd b/spacy/tokens/token.pxd index 3252fcdeb..e3e270a24 100644 --- a/spacy/tokens/token.pxd +++ b/spacy/tokens/token.pxd @@ -11,7 +11,7 @@ from .doc cimport Doc from ..errors import Errors -cdef int MISSING_DEP = 0 +cdef const int MISSING_DEP = 0 cdef class Token: cdef readonly Vocab vocab diff --git a/spacy/tokens/token.pyi b/spacy/tokens/token.pyi index 435ace527..7e56ae3bc 100644 --- a/spacy/tokens/token.pyi +++ b/spacy/tokens/token.pyi @@ -27,9 +27,7 @@ class Token: force: bool = ..., ) -> None: ... @classmethod - def get_extension( - cls, name: str - ) -> Tuple[ + def get_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[TokenMethod], Optional[Callable[[Token], Any]], @@ -38,9 +36,7 @@ class Token: @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod - def remove_extension( - cls, name: str - ) -> Tuple[ + def remove_extension(cls, name: str) -> Tuple[ Optional[Any], Optional[TokenMethod], Optional[Callable[[Token], Any]], diff --git a/spacy/training/loop.py b/spacy/training/loop.py index 56df53957..6f5099858 100644 --- a/spacy/training/loop.py +++ b/spacy/training/loop.py @@ -354,7 +354,7 @@ def update_meta( def create_before_to_disk_callback( - callback: Optional[Callable[["Language"], "Language"]] + callback: Optional[Callable[["Language"], "Language"]], ) -> Callable[["Language"], "Language"]: from ..language import Language # noqa: F811 diff --git a/spacy/ty.py b/spacy/ty.py index f389456c0..b37f2e18a 100644 --- a/spacy/ty.py +++ b/spacy/ty.py @@ -30,11 +30,9 @@ class TrainableComponent(Protocol): drop: float = 0.0, sgd: Optional[Optimizer] = None, losses: Optional[Dict[str, float]] = None - ) -> Dict[str, float]: - ... + ) -> Dict[str, float]: ... - def finish_update(self, sgd: Optimizer) -> None: - ... + def finish_update(self, sgd: Optimizer) -> None: ... @runtime_checkable @@ -44,8 +42,7 @@ class InitializableComponent(Protocol): get_examples: Callable[[], Iterable["Example"]], nlp: "Language", **kwargs: Any - ): - ... + ): ... @runtime_checkable @@ -55,11 +52,8 @@ class ListenedToComponent(Protocol): listener_map: Dict[str, Sequence[Model]] listening_components: List[str] - def add_listener(self, listener: Model, component_name: str) -> None: - ... + def add_listener(self, listener: Model, component_name: str) -> None: ... - def remove_listener(self, listener: Model, component_name: str) -> bool: - ... + def remove_listener(self, listener: Model, component_name: str) -> bool: ... - def find_listeners(self, component) -> None: - ... + def find_listeners(self, component) -> None: ... diff --git a/spacy/util.py b/spacy/util.py index 527e6eb3a..ad5a7e0ba 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -657,7 +657,7 @@ def load_model_from_config( def get_sourced_components( - config: Union[Dict[str, Any], Config] + config: Union[Dict[str, Any], Config], ) -> Dict[str, Dict[str, Any]]: """RETURNS (List[str]): All sourced components in the original config, e.g. {"source": "en_core_web_sm"}. If the config contains a key diff --git a/website/docs/api/cli.mdx b/website/docs/api/cli.mdx index 09a978259..add6b1446 100644 --- a/website/docs/api/cli.mdx +++ b/website/docs/api/cli.mdx @@ -47,7 +47,7 @@ pipeline name to be specified with its version (e.g. `en_core_web_sm-3.0.0`). > project. ```bash -$ python -m spacy download [model] [--direct] [--sdist] [pip_args] +$ python -m spacy download [model] [--direct] [--sdist] [pip_args] [--url url] ``` | Name | Description | @@ -58,6 +58,7 @@ $ python -m spacy download [model] [--direct] [--sdist] [pip_args] | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | pip args | Additional installation options to be passed to `pip install` when installing the pipeline package. For example, `--user` to install to the user home directory or `--no-deps` to not install package dependencies. ~~Any (option/flag)~~ | | **CREATES** | The installed pipeline package in your `site-packages` directory. | +| `--url`, `-U` | Download from a mirror repository at the given url | ## info {id="info",tag="command"} diff --git a/website/meta/universe.json b/website/meta/universe.json index b7842bddc..1f55d9616 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -2739,20 +2739,7 @@ "courses" ] }, - { - "type": "education", - "id": "spacy-quickstart", - "title": "spaCy Quickstart", - "slogan": "Learn spaCy basics quickly by visualizing various Doc objects", - "description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.", - "url": "https://learnspacy.com/courses/spacy-quickstart/", - "image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png", - "thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png", - "author": "Aravind Mohanoor", - "category": [ - "courses" - ] - }, + { "type": "education", "id": "video-spacys-ner-model",