mirror of
https://github.com/explosion/spaCy.git
synced 2026-01-09 18:21:14 +03:00
Merge branch 'master' into codespell
This commit is contained in:
commit
c4f8c90fd2
92
.github/workflows/cibuildwheel.yml
vendored
92
.github/workflows/cibuildwheel.yml
vendored
|
|
@ -9,91 +9,13 @@ on:
|
|||
- 'prerelease-v[0-9]+.[0-9]+.[0-9]+**'
|
||||
jobs:
|
||||
build_wheels:
|
||||
name: Build wheels on ${{ matrix.os }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
# macos-13 is an intel runner, macos-14 is apple silicon
|
||||
os: [ubuntu-latest, windows-latest, macos-13, macos-14, ubuntu-24.04-arm]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
# aarch64 (arm) is built via qemu emulation
|
||||
# QEMU is sadly too slow. We need to wait for public ARM support
|
||||
#- name: Set up QEMU
|
||||
# if: runner.os == 'Linux'
|
||||
# uses: docker/setup-qemu-action@v3
|
||||
# with:
|
||||
# platforms: all
|
||||
- name: Build wheels
|
||||
uses: pypa/cibuildwheel@v2.21.3
|
||||
env:
|
||||
CIBW_ARCHS_LINUX: auto
|
||||
with:
|
||||
package-dir: .
|
||||
output-dir: wheelhouse
|
||||
config-file: "{package}/pyproject.toml"
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
|
||||
path: ./wheelhouse/*.whl
|
||||
|
||||
build_sdist:
|
||||
name: Build source distribution
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Build sdist
|
||||
run: pipx run build --sdist
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: cibw-sdist
|
||||
path: dist/*.tar.gz
|
||||
create_release:
|
||||
needs: [build_wheels, build_sdist]
|
||||
runs-on: ubuntu-latest
|
||||
uses: explosion/gha-cibuildwheel/.github/workflows/cibuildwheel.yml@main
|
||||
permissions:
|
||||
contents: write
|
||||
checks: write
|
||||
actions: read
|
||||
issues: read
|
||||
packages: write
|
||||
pull-requests: read
|
||||
repository-projects: read
|
||||
statuses: read
|
||||
steps:
|
||||
- name: Get the tag name and determine if it's a prerelease
|
||||
id: get_tag_info
|
||||
run: |
|
||||
FULL_TAG=${GITHUB_REF#refs/tags/}
|
||||
if [[ $FULL_TAG == release-* ]]; then
|
||||
TAG_NAME=${FULL_TAG#release-}
|
||||
IS_PRERELEASE=false
|
||||
elif [[ $FULL_TAG == prerelease-* ]]; then
|
||||
TAG_NAME=${FULL_TAG#prerelease-}
|
||||
IS_PRERELEASE=true
|
||||
else
|
||||
echo "Tag does not match expected patterns" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV
|
||||
echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV
|
||||
echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
# unpacks all CIBW artifacts into dist/
|
||||
pattern: cibw-*
|
||||
path: dist
|
||||
merge-multiple: true
|
||||
- name: Create Draft Release
|
||||
id: create_release
|
||||
uses: softprops/action-gh-release@v2
|
||||
if: startsWith(github.ref, 'refs/tags/')
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
name: ${{ env.TAG_NAME }}
|
||||
draft: true
|
||||
prerelease: ${{ env.IS_PRERELEASE }}
|
||||
files: "./dist/*"
|
||||
with:
|
||||
wheel-name-pattern: "spacy-*.whl"
|
||||
pure-python: false
|
||||
secrets:
|
||||
gh-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
|
|
|
|||
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
|
|
@ -59,7 +59,7 @@ jobs:
|
|||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.9", "3.12", "3.13"]
|
||||
python_version: ["3.10", "3.11", "3.12", "3.13"]
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,9 +12,8 @@ build-backend = "setuptools.build_meta"
|
|||
|
||||
[tool.cibuildwheel]
|
||||
build = "*"
|
||||
skip = "pp* cp36* cp37* cp38* *-win32 *i686*"
|
||||
skip = "cp39* *-win32 *i686* cp3??t-* *cp310-win_arm64"
|
||||
test-skip = ""
|
||||
free-threaded-support = false
|
||||
|
||||
archs = ["native"]
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ wasabi>=0.9.1,<1.2.0
|
|||
srsly>=2.4.3,<3.0.0
|
||||
catalogue>=2.0.6,<2.1.0
|
||||
typer-slim>=0.3.0,<1.0.0
|
||||
weasel>=0.1.0,<0.5.0
|
||||
weasel>=0.4.2,<0.5.0
|
||||
# Third party dependencies
|
||||
numpy>=2.0.0,<3.0.0
|
||||
requests>=2.13.0,<3.0.0
|
||||
|
|
@ -33,6 +33,6 @@ types-mock>=0.1.1
|
|||
types-setuptools>=57.0.0
|
||||
types-requests
|
||||
types-setuptools>=57.0.0
|
||||
black==22.3.0
|
||||
black>=25.0.0
|
||||
cython-lint>=0.15.0
|
||||
isort>=5.0,<6.0
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ project_urls =
|
|||
[options]
|
||||
zip_safe = false
|
||||
include_package_data = true
|
||||
python_requires = >=3.9,<3.14
|
||||
python_requires = >=3.9,<3.15
|
||||
# NOTE: This section is superseded by pyproject.toml and will be removed in
|
||||
# spaCy v4
|
||||
setup_requires =
|
||||
|
|
@ -53,7 +53,7 @@ install_requires =
|
|||
wasabi>=0.9.1,<1.2.0
|
||||
srsly>=2.4.3,<3.0.0
|
||||
catalogue>=2.0.6,<2.1.0
|
||||
weasel>=0.1.0,<0.5.0
|
||||
weasel>=0.4.2,<0.5.0
|
||||
# Third-party dependencies
|
||||
typer-slim>=0.3.0,<1.0.0
|
||||
tqdm>=4.38.0,<5.0.0
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# fmt: off
|
||||
__title__ = "spacy"
|
||||
__version__ = "3.8.7"
|
||||
__version__ = "3.8.11"
|
||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
|
||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from typing import (
|
|||
import srsly
|
||||
import typer
|
||||
from click import NoSuchOption
|
||||
from click.parser import split_arg_string
|
||||
from click.shell_completion import split_arg_string
|
||||
from thinc.api import Config, ConfigValidationError, require_gpu
|
||||
from thinc.util import gpu_is_available
|
||||
from typer.main import get_command
|
||||
|
|
@ -225,13 +225,11 @@ def get_git_version(
|
|||
|
||||
|
||||
@overload
|
||||
def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]:
|
||||
...
|
||||
def string_to_list(value: str, intify: Literal[False] = ...) -> List[str]: ...
|
||||
|
||||
|
||||
@overload
|
||||
def string_to_list(value: str, intify: Literal[True]) -> List[int]:
|
||||
...
|
||||
def string_to_list(value: str, intify: Literal[True]) -> List[int]: ...
|
||||
|
||||
|
||||
def string_to_list(value: str, intify: bool = False) -> Union[List[str], List[int]]:
|
||||
|
|
|
|||
|
|
@ -968,16 +968,14 @@ def _compile_gold(
|
|||
|
||||
|
||||
@overload
|
||||
def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str:
|
||||
...
|
||||
def _format_labels(labels: Iterable[str], counts: Literal[False] = False) -> str: ...
|
||||
|
||||
|
||||
@overload
|
||||
def _format_labels(
|
||||
labels: Iterable[Tuple[str, int]],
|
||||
counts: Literal[True],
|
||||
) -> str:
|
||||
...
|
||||
) -> str: ...
|
||||
|
||||
|
||||
def _format_labels(
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ def download_cli(
|
|||
model: str = Arg(..., help="Name of pipeline package to download"),
|
||||
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
|
||||
sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
|
||||
url: str = Opt(None, "--url", "-U", help="Download from given url")
|
||||
# fmt: on
|
||||
):
|
||||
"""
|
||||
|
|
@ -41,13 +42,14 @@ def download_cli(
|
|||
DOCS: https://spacy.io/api/cli#download
|
||||
AVAILABLE PACKAGES: https://spacy.io/models
|
||||
"""
|
||||
download(model, direct, sdist, *ctx.args)
|
||||
download(model, direct, sdist, url, *ctx.args)
|
||||
|
||||
|
||||
def download(
|
||||
model: str,
|
||||
direct: bool = False,
|
||||
sdist: bool = False,
|
||||
custom_url: Optional[str] = None,
|
||||
*pip_args,
|
||||
) -> None:
|
||||
if (
|
||||
|
|
@ -87,7 +89,7 @@ def download(
|
|||
|
||||
filename = get_model_filename(model_name, version, sdist)
|
||||
|
||||
download_model(filename, pip_args)
|
||||
download_model(filename, pip_args, custom_url)
|
||||
msg.good(
|
||||
"Download and installation successful",
|
||||
f"You can now load the package via spacy.load('{model_name}')",
|
||||
|
|
@ -159,12 +161,14 @@ def get_latest_version(model: str) -> str:
|
|||
|
||||
|
||||
def download_model(
|
||||
filename: str, user_pip_args: Optional[Sequence[str]] = None
|
||||
filename: str,
|
||||
user_pip_args: Optional[Sequence[str]] = None,
|
||||
custom_url: Optional[str] = None,
|
||||
) -> None:
|
||||
# Construct the download URL carefully. We need to make sure we don't
|
||||
# allow relative paths or other shenanigans to trick us into download
|
||||
# from outside our own repo.
|
||||
base_url = about.__download_url__
|
||||
base_url = custom_url if custom_url else about.__download_url__
|
||||
# urljoin requires that the path ends with /, or the last path part will be dropped
|
||||
if not base_url.endswith("/"):
|
||||
base_url = about.__download_url__ + "/"
|
||||
|
|
|
|||
|
|
@ -157,9 +157,11 @@ def find_threshold(
|
|||
exits=1,
|
||||
)
|
||||
return {
|
||||
keys[0]: filter_config(config[keys[0]], keys[1:], full_key)
|
||||
if len(keys) > 1
|
||||
else config[keys[0]]
|
||||
keys[0]: (
|
||||
filter_config(config[keys[0]], keys[1:], full_key)
|
||||
if len(keys) > 1
|
||||
else config[keys[0]]
|
||||
)
|
||||
}
|
||||
|
||||
# Evaluate with varying threshold values.
|
||||
|
|
@ -216,12 +218,14 @@ def find_threshold(
|
|||
if len(set(scores.values())) == 1:
|
||||
wasabi.msg.warn(
|
||||
title="All scores are identical. Verify that all settings are correct.",
|
||||
text=""
|
||||
if (
|
||||
not isinstance(pipe, MultiLabel_TextCategorizer)
|
||||
or scores_key in ("cats_macro_f", "cats_micro_f")
|
||||
)
|
||||
else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`.",
|
||||
text=(
|
||||
""
|
||||
if (
|
||||
not isinstance(pipe, MultiLabel_TextCategorizer)
|
||||
or scores_key in ("cats_macro_f", "cats_micro_f")
|
||||
)
|
||||
else "Use `cats_macro_f` or `cats_micro_f` when optimizing the threshold for `textcat_multilabel`."
|
||||
),
|
||||
)
|
||||
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -195,9 +195,11 @@ def init_config(
|
|||
"Pipeline": ", ".join(pipeline),
|
||||
"Optimize for": optimize,
|
||||
"Hardware": variables["hardware"].upper(),
|
||||
"Transformer": template_vars.transformer.get("name") # type: ignore[attr-defined]
|
||||
if template_vars.use_transformer # type: ignore[attr-defined]
|
||||
else None,
|
||||
"Transformer": (
|
||||
template_vars.transformer.get("name") # type: ignore[attr-defined]
|
||||
if template_vars.use_transformer # type: ignore[attr-defined]
|
||||
else None
|
||||
),
|
||||
}
|
||||
msg.info("Generated config template specific for your use case")
|
||||
for label, value in use_case.items():
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
"""Helpers for Python and platform compatibility."""
|
||||
|
||||
import sys
|
||||
|
||||
from thinc.util import copy_array
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities.
|
|||
DOCS: https://spacy.io/api/top-level#displacy
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
|
||||
import warnings
|
||||
from typing import Any, Callable, Dict, Iterable, Optional, Union
|
||||
|
||||
|
|
@ -66,7 +67,8 @@ def render(
|
|||
if jupyter or (jupyter is None and is_in_jupyter()):
|
||||
# return HTML rendered by IPython display()
|
||||
# See #4840 for details on span wrapper to disable mathjax
|
||||
from IPython.core.display import HTML, display
|
||||
from IPython.core.display import HTML
|
||||
from IPython.display import display
|
||||
|
||||
return display(HTML('<span class="tex2jax_ignore">{}</span>'.format(html)))
|
||||
return html
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"አፕል የዩኬን ጅምር ድርጅት በ 1 ቢሊዮን ዶላር ለመግዛት አስቧል።",
|
||||
"የራስ ገዝ መኪኖች የኢንሹራንስ ኃላፊነትን ወደ አምራቾች ያዛውራሉ",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Bu bir cümlədir.",
|
||||
"Necəsən?",
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ References:
|
|||
https://github.com/Alir3z4/stop-words - Original list, serves as a base.
|
||||
https://postvai.com/books/stop-dumi.pdf - Additions to the original list in order to improve it.
|
||||
"""
|
||||
|
||||
STOP_WORDS = set(
|
||||
"""
|
||||
а автентичен аз ако ала
|
||||
|
|
|
|||
|
|
@ -5,5 +5,4 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = ["তুই খুব ভালো", "আজ আমরা ডাক্তার দেখতে যাবো", "আমি জানি না "]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"དོན་དུ་རྒྱ་མཚོ་བླ་མ་ཞེས་བྱ་ཞིང༌།",
|
||||
"ཏཱ་ལའི་ཞེས་པ་ནི་སོག་སྐད་ཡིན་པ་དེ་བོད་སྐད་དུ་རྒྱ་མཚོའི་དོན་དུ་འཇུག",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple està buscant comprar una startup del Regne Unit per mil milions de dòlars",
|
||||
"Els cotxes autònoms deleguen la responsabilitat de l'assegurança als seus fabricants",
|
||||
|
|
|
|||
|
|
@ -277,10 +277,10 @@ _currency = (
|
|||
# These expressions contain various unicode variations, including characters
|
||||
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
||||
# conflicts, spaCy's base tokenizer should handle all of those by default
|
||||
_punct = (
|
||||
r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪"
|
||||
_punct = r"… …… , : ; \! \? ¿ ؟ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · । ، ۔ ؛ ٪"
|
||||
_quotes = (
|
||||
r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧'
|
||||
)
|
||||
_quotes = r'\' " ” “ ` ‘ ´ ’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉 〈 〉 ⟦ ⟧'
|
||||
_hyphens = "- – — -- --- —— ~"
|
||||
|
||||
# Various symbols like dingbats, but also emoji
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Máma mele maso.",
|
||||
"Příliš žluťoučký kůň úpěl ďábelské ódy.",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
Tokenizer Exceptions.
|
||||
Source: https://forkortelse.dk/ and various others.
|
||||
"""
|
||||
|
||||
from ...symbols import NORM, ORTH
|
||||
from ...util import update_exc
|
||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Die ganze Stadt ist ein Startup: Shenzhen ist das Silicon Valley für Hardware-Firmen",
|
||||
"Wie deutsche Startups die Technologie vorantreiben wollen: Künstliche Intelligenz",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Z tym stwori so wuměnjenje a zakład za dalše wobdźěłanje přez analyzu tekstoweje struktury a semantisku anotaciju a z tym tež za tu předstajenu digitalnu online-wersiju.",
|
||||
"Mi so tu jara derje spodoba.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple is looking at buying U.K. startup for $1 billion",
|
||||
"Autonomous cars shift insurance liability toward manufacturers",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple está buscando comprar una startup del Reino Unido por mil millones de dólares.",
|
||||
"Los coches autónomos delegan la responsabilidad del seguro en sus fabricantes.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"این یک جمله نمونه می باشد.",
|
||||
"قرار ما، امروز ساعت ۲:۳۰ بعدازظهر هست!",
|
||||
|
|
|
|||
|
|
@ -100,9 +100,9 @@ conj_contraction_negations = [
|
|||
("eivat", "eivät"),
|
||||
("eivät", "eivät"),
|
||||
]
|
||||
for (base_lower, base_norm) in conj_contraction_bases:
|
||||
for base_lower, base_norm in conj_contraction_bases:
|
||||
for base in [base_lower, base_lower.title()]:
|
||||
for (suffix, suffix_norm) in conj_contraction_negations:
|
||||
for suffix, suffix_norm in conj_contraction_negations:
|
||||
_exc[base + suffix] = [
|
||||
{ORTH: base, NORM: base_norm},
|
||||
{ORTH: suffix, NORM: suffix_norm},
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple cherche à acheter une start-up anglaise pour 1 milliard de dollars",
|
||||
"Les voitures autonomes déplacent la responsabilité de l'assurance vers les constructeurs",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"ἐρᾷ μὲν ἁγνὸς οὐρανὸς τρῶσαι χθόνα, ἔρως δὲ γαῖαν λαμβάνει γάμου τυχεῖν·",
|
||||
"εὐδαίμων Χαρίτων καὶ Μελάνιππος ἔφυ, θείας ἁγητῆρες ἐφαμερίοις φιλότατος.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"લોકશાહી એ સરકારનું એક એવું તંત્ર છે જ્યાં નાગરિકો મત દ્વારા સત્તાનો ઉપયોગ કરે છે.",
|
||||
"તે ગુજરાત રાજ્યના ધરમપુર શહેરમાં આવેલું હતું",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"סין מקימה קרן של 440 מיליון דולר להשקעה בהייטק בישראל",
|
||||
'רה"מ הודיע כי יחרים טקס בחסותו',
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"एप्पल 1 अरब डॉलर के लिए यू.के. स्टार्टअप खरीदने पर विचार कर रहा है।",
|
||||
"स्वायत्त कारें निर्माताओं की ओर बीमा दायित्व रखतीं हैं।",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"To běšo wjelgin raźone a jo se wót luźi derje pśiwzeło. Tak som dožywiła wjelgin",
|
||||
"Jogo pśewóźowarce stej groniłej, až how w serbskich stronach njama Santa Claus nic pytaś.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple ap panse achte yon demaraj nan Wayòm Ini pou $1 milya dola",
|
||||
"Machin otonòm fè responsablite asirans lan ale sou men fabrikan yo",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Az Apple egy brit startup vásárlását tervezi 1 milliárd dollár értékben.",
|
||||
"San Francisco vezetése mérlegeli a járdát használó szállító robotok betiltását.",
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from ..char_classes import (
|
|||
)
|
||||
|
||||
# removing ° from the special icons to keep e.g. 99° as one token
|
||||
_concat_icons = CONCAT_ICONS.replace("\u00B0", "")
|
||||
_concat_icons = CONCAT_ICONS.replace("\u00b0", "")
|
||||
|
||||
_currency = r"\$¢£€¥฿"
|
||||
_quotes = CONCAT_QUOTES.replace("'", "")
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Լոնդոնը Միացյալ Թագավորության մեծ քաղաք է։",
|
||||
"Ո՞վ է Ֆրանսիայի նախագահը։",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Indonesia merupakan negara kepulauan yang kaya akan budaya.",
|
||||
"Berapa banyak warga yang dibutuhkan saat kerja bakti?",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple vuole comprare una startup del Regno Unito per un miliardo di dollari",
|
||||
"Le automobili a guida autonoma spostano la responsabilità assicurativa verso i produttori",
|
||||
|
|
|
|||
|
|
@ -102,9 +102,9 @@ class JapaneseTokenizer(DummyTokenizer):
|
|||
token.dictionary_form(), # lemma
|
||||
token.normalized_form(),
|
||||
token.reading_form(),
|
||||
sub_tokens_list[idx]
|
||||
if sub_tokens_list
|
||||
else None, # user_data['sub_tokens']
|
||||
(
|
||||
sub_tokens_list[idx] if sub_tokens_list else None
|
||||
), # user_data['sub_tokens']
|
||||
)
|
||||
for idx, token in enumerate(sudachipy_tokens)
|
||||
if len(token.surface()) > 0
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"アップルがイギリスの新興企業を10億ドルで購入を検討",
|
||||
"自動運転車の損害賠償責任、自動車メーカーに一定の負担を求める",
|
||||
|
|
|
|||
|
|
@ -25,7 +25,9 @@ TAG_MAP = {
|
|||
# Universal Dependencies Mapping: (Some of the entries in this mapping are updated to v2.6 in the list below)
|
||||
# http://universaldependencies.org/ja/overview/morphology.html
|
||||
# http://universaldependencies.org/ja/pos/all.html
|
||||
"記号-一般": {POS: NOUN}, # this includes characters used to represent sounds like ドレミ
|
||||
"記号-一般": {
|
||||
POS: NOUN
|
||||
}, # this includes characters used to represent sounds like ドレミ
|
||||
"記号-文字": {
|
||||
POS: NOUN
|
||||
}, # this is for Greek and Latin characters having some meanings, or used as symbols, as in math
|
||||
|
|
@ -72,7 +74,9 @@ TAG_MAP = {
|
|||
"名詞-固有名詞-地名-国": {POS: PROPN}, # country name
|
||||
"名詞-助動詞語幹": {POS: AUX},
|
||||
"名詞-数詞": {POS: NUM}, # includes Chinese numerals
|
||||
"名詞-普通名詞-サ変可能": {POS: NOUN}, # XXX: sometimes VERB in UDv2; suru-verb noun
|
||||
"名詞-普通名詞-サ変可能": {
|
||||
POS: NOUN
|
||||
}, # XXX: sometimes VERB in UDv2; suru-verb noun
|
||||
"名詞-普通名詞-サ変形状詞可能": {POS: NOUN},
|
||||
"名詞-普通名詞-一般": {POS: NOUN},
|
||||
"名詞-普通名詞-形状詞可能": {POS: NOUN}, # XXX: sometimes ADJ in UDv2
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"ಆಪಲ್ ಒಂದು ಯು.ಕೆ. ಸ್ಟಾರ್ಟ್ಅಪ್ ಅನ್ನು ೧ ಶತಕೋಟಿ ಡಾಲರ್ಗಳಿಗೆ ಖರೀದಿಸಲು ನೋಡುತ್ತಿದೆ.",
|
||||
"ಸ್ವಾಯತ್ತ ಕಾರುಗಳು ವಿಮಾ ಹೊಣೆಗಾರಿಕೆಯನ್ನು ತಯಾರಕರ ಕಡೆಗೆ ಬದಲಾಯಿಸುತ್ತವೆ.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Sciusciâ e sciorbî no se peu.",
|
||||
"Graçie di çetroin, che me son arrivæ.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Jaunikis pirmąją vestuvinę naktį iškeitė į areštinės gultą",
|
||||
"Bepiločiai automobiliai išnaikins vairavimo mokyklas, autoservisus ir eismo nelaimes",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"അനാവശ്യമായി കണ്ണിലും മൂക്കിലും വായിലും സ്പർശിക്കാതിരിക്കുക",
|
||||
"പൊതുരംഗത്ത് മലയാള ഭാഷയുടെ സമഗ്രപുരോഗതി ലക്ഷ്യമാക്കി പ്രവർത്തിക്കുന്ന സംഘടനയായ മലയാളഐക്യവേദിയുടെ വിദ്യാർത്ഥിക്കൂട്ടായ്മയാണ് വിദ്യാർത്ഥി മലയാളവേദി",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Malaysia ialah sebuah negara yang terletak di Asia Tenggara.",
|
||||
"Berapa banyak pelajar yang akan menghadiri majlis perpisahan sekolah?",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar.",
|
||||
"Selvkjørende biler flytter forsikringsansvaret over på produsentene.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"एप्पलले अमेरिकी स्टार्टअप १ अर्ब डलरमा किन्ने सोच्दै छ",
|
||||
"स्वायत्त कारहरूले बीमा दायित्व निर्माताहरु तिर बदल्छन्",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple overweegt om voor 1 miljard een U.K. startup te kopen",
|
||||
"Autonome auto's verschuiven de verzekeringverantwoordelijkheid naar producenten",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
# sentences taken from Omsetjingsminne frå Nynorsk pressekontor 2022 (https://www.nb.no/sprakbanken/en/resource-catalogue/oai-nb-no-sbr-80/)
|
||||
sentences = [
|
||||
"Konseptet går ut på at alle tre omgangar tel, alle hopparar må stille i kvalifiseringa og poengsummen skal telje.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Poczuł przyjemną woń mocnej kawy.",
|
||||
"Istnieje wiele dróg oddziaływania substancji psychoaktywnej na układ nerwowy.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares",
|
||||
"Carros autônomos empurram a responsabilidade do seguro para os fabricantes."
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple plănuiește să cumpere o companie britanică pentru un miliard de dolari",
|
||||
"Municipalitatea din San Francisco ia în calcul interzicerea roboților curieri pe trotuar",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
# Translations from English:
|
||||
"Apple рассматривает возможность покупки стартапа из Соединённого Королевства за $1 млрд",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"अभ्यावहति कल्याणं विविधं वाक् सुभाषिता ।",
|
||||
"मनसि व्याकुले चक्षुः पश्यन्नपि न पश्यति ।",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"මෙය වාක්යයකි.",
|
||||
"ඔබ කවුද?",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Ardevop, s.r.o. je malá startup firma na území SR.",
|
||||
"Samojazdiace autá presúvajú poistnú zodpovednosť na výrobcov automobilov.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple načrtuje nakup britanskega startupa za 1 bilijon dolarjev",
|
||||
"France Prešeren je umrl 8. februarja 1849 v Kranju",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple po shqyrton blerjen e nje shoqërie të U.K. për 1 miliard dollarë",
|
||||
"Makinat autonome ndryshojnë përgjegjësinë e sigurimit ndaj prodhuesve",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
# Translations from English
|
||||
"Apple планира куповину америчког стартапа за $1 милијарду.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple överväger att köpa brittisk startup för 1 miljard dollar.",
|
||||
"Självkörande bilar förskjuter försäkringsansvar mot tillverkare.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"கிறிஸ்துமஸ் மற்றும் இனிய புத்தாண்டு வாழ்த்துக்கள்",
|
||||
"எனக்கு என் குழந்தைப் பருவம் நினைவிருக்கிறது",
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"ఆపిల్ 1 బిలియన్ డాలర్స్ కి యూ.కె. స్టార్ట్అప్ ని కొనాలని అనుకుంటుంది.",
|
||||
"ఆటోనోమోస్ కార్లు భీమా బాధ్యతను తయారీదారులపైకి మళ్లిస్తాయి.",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"አፕል ብዩኬ ትርከብ ንግድ ብ1 ቢሊዮን ዶላር ንምግዛዕ ሐሲባ።",
|
||||
"ፈላማይ ክታበት ኮቪድ 19 ተጀሚሩ፤ሓዱሽ ተስፋ ሂቡ ኣሎ",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Apple e nyaka go reka JSE ka tlhwatlhwa ta R1 billion",
|
||||
"Johannesburg ke toropo e kgolo mo Afrika Borwa.",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Neredesin?",
|
||||
"Neredesiniz?",
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Ніч на середу буде морозною.",
|
||||
"Чим кращі книги ти читав, тим гірше спиш.", # Serhiy Zhadan
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"اردو ہے جس کا نام ہم جانتے ہیں داغ",
|
||||
"سارے جہاں میں دھوم ہماری زباں کی ہے",
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ Example sentences to test spaCy and its language models.
|
|||
>>> docs = nlp.pipe(sentences)
|
||||
"""
|
||||
|
||||
|
||||
sentences = [
|
||||
"Đây là đâu, tôi là ai?",
|
||||
"Căn phòng có nhiều cửa sổ nên nó khá sáng",
|
||||
|
|
|
|||
|
|
@ -1519,8 +1519,7 @@ class Language:
|
|||
disable: Iterable[str] = ...,
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = ...,
|
||||
n_process: int = ...,
|
||||
) -> Iterator[Doc]:
|
||||
...
|
||||
) -> Iterator[Doc]: ...
|
||||
|
||||
@overload
|
||||
def pipe( # noqa: F811
|
||||
|
|
@ -1532,8 +1531,7 @@ class Language:
|
|||
disable: Iterable[str] = ...,
|
||||
component_cfg: Optional[Dict[str, Dict[str, Any]]] = ...,
|
||||
n_process: int = ...,
|
||||
) -> Iterator[Tuple[Doc, _AnyContext]]:
|
||||
...
|
||||
) -> Iterator[Tuple[Doc, _AnyContext]]: ...
|
||||
|
||||
def pipe( # noqa: F811
|
||||
self,
|
||||
|
|
@ -1641,7 +1639,7 @@ class Language:
|
|||
batch_size: int,
|
||||
) -> Iterator[Doc]:
|
||||
def prepare_input(
|
||||
texts: Iterable[Union[str, Doc]]
|
||||
texts: Iterable[Union[str, Doc]],
|
||||
) -> Iterable[Tuple[Union[str, bytes], _AnyContext]]:
|
||||
# Serialize Doc inputs to bytes to avoid incurring pickling
|
||||
# overhead when they are passed to child processes. Also yield
|
||||
|
|
@ -1943,9 +1941,9 @@ class Language:
|
|||
)
|
||||
if "_sourced_vectors_hashes" not in nlp.meta:
|
||||
nlp.meta["_sourced_vectors_hashes"] = {}
|
||||
nlp.meta["_sourced_vectors_hashes"][
|
||||
pipe_name
|
||||
] = source_nlp_vectors_hashes[model]
|
||||
nlp.meta["_sourced_vectors_hashes"][pipe_name] = (
|
||||
source_nlp_vectors_hashes[model]
|
||||
)
|
||||
# Delete from cache if listeners were replaced
|
||||
if listeners_replaced:
|
||||
del source_nlps[model]
|
||||
|
|
|
|||
|
|
@ -51,9 +51,7 @@ class DependencyMatcher:
|
|||
] = ...
|
||||
) -> None: ...
|
||||
def has_key(self, key: Union[str, int]) -> bool: ...
|
||||
def get(
|
||||
self, key: Union[str, int], default: Optional[Any] = ...
|
||||
) -> Tuple[
|
||||
def get(self, key: Union[str, int], default: Optional[Any] = ...) -> Tuple[
|
||||
Optional[
|
||||
Callable[[DependencyMatcher, Doc, int, List[Tuple[int, List[int]]]], Any]
|
||||
],
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from ..tokens import Doc
|
|||
|
||||
|
||||
def FeatureExtractor(
|
||||
columns: Union[List[str], List[int], List[Union[int, str]]]
|
||||
columns: Union[List[str], List[int], List[Union[int, str]]],
|
||||
) -> Model[List[Doc], List[Ints2d]]:
|
||||
return Model("extract_features", forward, attrs={"columns": columns})
|
||||
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ def create_candidates() -> Callable[[KnowledgeBase, Span], Iterable[Candidate]]:
|
|||
return get_candidates
|
||||
|
||||
|
||||
def create_candidates_batch() -> Callable[
|
||||
[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]
|
||||
]:
|
||||
def create_candidates_batch() -> (
|
||||
Callable[[KnowledgeBase, Iterable[Span]], Iterable[Iterable[Candidate]]]
|
||||
):
|
||||
return get_candidates_batch
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
|||
truths = []
|
||||
for eg in examples:
|
||||
eg_truths = []
|
||||
for (predicted, gold_lemma) in zip(
|
||||
for predicted, gold_lemma in zip(
|
||||
eg.predicted, eg.get_aligned("LEMMA", as_string=True)
|
||||
):
|
||||
if gold_lemma is None or gold_lemma == "":
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ class Lemmatizer(Pipe):
|
|||
rules = rules_table.get(univ_pos, {})
|
||||
orig = string
|
||||
string = string.lower()
|
||||
forms: List[str] = []
|
||||
forms = [] # type: ignore
|
||||
oov_forms = []
|
||||
for old, new in rules:
|
||||
if string.endswith(old):
|
||||
|
|
|
|||
|
|
@ -80,8 +80,7 @@ DEFAULT_SPANCAT_SINGLELABEL_MODEL = Config().from_str(
|
|||
|
||||
@runtime_checkable
|
||||
class Suggester(Protocol):
|
||||
def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged:
|
||||
...
|
||||
def __call__(self, docs: Iterable[Doc], *, ops: Optional[Ops] = None) -> Ragged: ...
|
||||
|
||||
|
||||
def ngram_suggester(
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ remain in their original locations, but decoration is moved here.
|
|||
|
||||
Component definitions and registrations are in spacy/pipeline/factories.py
|
||||
"""
|
||||
|
||||
# Global flag to track if registry has been populated
|
||||
REGISTRY_POPULATED = False
|
||||
|
||||
|
|
|
|||
|
|
@ -141,7 +141,8 @@ def test_issue3869(sentence):
|
|||
@pytest.mark.issue(3962)
|
||||
def test_issue3962(en_vocab):
|
||||
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
||||
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
||||
This is achieved by setting the head to itself if it would lie out of the span otherwise.
|
||||
"""
|
||||
# fmt: off
|
||||
words = ["He", "jests", "at", "scars", ",", "that", "never", "felt", "a", "wound", "."]
|
||||
heads = [1, 7, 1, 2, 7, 7, 7, 7, 9, 7, 7]
|
||||
|
|
@ -180,7 +181,8 @@ def test_issue3962(en_vocab):
|
|||
@pytest.mark.issue(3962)
|
||||
def test_issue3962_long(en_vocab):
|
||||
"""Ensure that as_doc does not result in out-of-bound access of tokens.
|
||||
This is achieved by setting the head to itself if it would lie out of the span otherwise."""
|
||||
This is achieved by setting the head to itself if it would lie out of the span otherwise.
|
||||
"""
|
||||
# fmt: off
|
||||
words = ["He", "jests", "at", "scars", ".", "They", "never", "felt", "a", "wound", "."]
|
||||
heads = [1, 1, 1, 2, 1, 7, 7, 7, 9, 7, 7]
|
||||
|
|
|
|||
|
|
@ -304,9 +304,11 @@ TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0])
|
|||
SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0]
|
||||
TESTS.extend(
|
||||
[
|
||||
pytest.param(x[0], x[1], marks=pytest.mark.slow())
|
||||
if not isinstance(x[0], tuple)
|
||||
else x
|
||||
(
|
||||
pytest.param(x[0], x[1], marks=pytest.mark.slow())
|
||||
if not isinstance(x[0], tuple)
|
||||
else x
|
||||
)
|
||||
for x in SLOW_TESTS
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -544,7 +544,7 @@ def test_greedy_matching_longest(doc, text, pattern, longest):
|
|||
matcher = Matcher(doc.vocab)
|
||||
matcher.add("RULE", [pattern], greedy="LONGEST")
|
||||
matches = matcher(doc)
|
||||
for (key, s, e) in matches:
|
||||
for key, s, e in matches:
|
||||
assert doc[s:e].text == longest
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -496,15 +496,15 @@ def test_el_pipe_configuration(nlp):
|
|||
return [get_lowercased_candidates(kb, span) for span in spans]
|
||||
|
||||
@registry.misc("spacy.LowercaseCandidateGenerator.v1")
|
||||
def create_candidates() -> Callable[
|
||||
[InMemoryLookupKB, "Span"], Iterable[Candidate]
|
||||
]:
|
||||
def create_candidates() -> (
|
||||
Callable[[InMemoryLookupKB, "Span"], Iterable[Candidate]]
|
||||
):
|
||||
return get_lowercased_candidates
|
||||
|
||||
@registry.misc("spacy.LowercaseCandidateBatchGenerator.v1")
|
||||
def create_candidates_batch() -> Callable[
|
||||
[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]
|
||||
]:
|
||||
def create_candidates_batch() -> (
|
||||
Callable[[InMemoryLookupKB, Iterable["Span"]], Iterable[Iterable[Candidate]]]
|
||||
):
|
||||
return get_lowercased_candidates_batch
|
||||
|
||||
# replace the pipe with a new one with with a different candidate generator
|
||||
|
|
|
|||
|
|
@ -279,20 +279,17 @@ def test_pipe_factories_wrong_formats():
|
|||
with pytest.raises(ValueError):
|
||||
# Decorator is not called
|
||||
@Language.component
|
||||
def component(foo: int, bar: str):
|
||||
...
|
||||
def component(foo: int, bar: str): ...
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# Decorator is not called
|
||||
@Language.factory
|
||||
def factory1(foo: int, bar: str):
|
||||
...
|
||||
def factory1(foo: int, bar: str): ...
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
# Factory function is missing "nlp" and "name" arguments
|
||||
@Language.factory("test_pipe_factories_missing_args")
|
||||
def factory2(foo: int, bar: str):
|
||||
...
|
||||
def factory2(foo: int, bar: str): ...
|
||||
|
||||
|
||||
def test_pipe_factory_meta_config_cleanup():
|
||||
|
|
@ -329,8 +326,7 @@ def test_pipe_factories_empty_dict_default():
|
|||
name = "test_pipe_factories_empty_dict_default"
|
||||
|
||||
@Language.factory(name, default_config={"foo": {}})
|
||||
def factory(nlp: Language, name: str, foo: dict):
|
||||
...
|
||||
def factory(nlp: Language, name: str, foo: dict): ...
|
||||
|
||||
nlp = Language()
|
||||
nlp.create_pipe(name)
|
||||
|
|
@ -549,11 +545,9 @@ def test_pipe_factories_from_source_config():
|
|||
|
||||
|
||||
class PipeFactoriesIdempotent:
|
||||
def __init__(self, nlp, name):
|
||||
...
|
||||
def __init__(self, nlp, name): ...
|
||||
|
||||
def __call__(self, doc):
|
||||
...
|
||||
def __call__(self, doc): ...
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
|
|||
|
|
@ -874,7 +874,8 @@ def test_textcat_eval_missing(multi_label: bool, spring_p: float):
|
|||
def test_textcat_loss(multi_label: bool, expected_loss: float):
|
||||
"""
|
||||
multi-label: the missing 'spring' in gold_doc_2 doesn't incur an increase in loss
|
||||
exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss"""
|
||||
exclusive labels: the missing 'spring' in gold_doc_2 is interpreted as 0.0 and adds to the loss
|
||||
"""
|
||||
train_examples = []
|
||||
nlp = English()
|
||||
|
||||
|
|
|
|||
|
|
@ -890,7 +890,7 @@ def test_cli_find_threshold(capsys):
|
|||
return docs
|
||||
|
||||
def init_nlp(
|
||||
components: Tuple[Tuple[str, Dict[str, Any]], ...] = ()
|
||||
components: Tuple[Tuple[str, Dict[str, Any]], ...] = (),
|
||||
) -> Tuple[Language, List[Example]]:
|
||||
new_nlp = English()
|
||||
new_nlp.add_pipe( # type: ignore
|
||||
|
|
|
|||
|
|
@ -57,9 +57,7 @@ class Doc:
|
|||
force: bool = ...,
|
||||
) -> None: ...
|
||||
@classmethod
|
||||
def get_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def get_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[DocMethod],
|
||||
Optional[Callable[[Doc], Any]],
|
||||
|
|
@ -68,9 +66,7 @@ class Doc:
|
|||
@classmethod
|
||||
def has_extension(cls, name: str) -> bool: ...
|
||||
@classmethod
|
||||
def remove_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def remove_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[DocMethod],
|
||||
Optional[Callable[[Doc], Any]],
|
||||
|
|
|
|||
|
|
@ -23,9 +23,7 @@ class Span:
|
|||
force: bool = ...,
|
||||
) -> None: ...
|
||||
@classmethod
|
||||
def get_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def get_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[SpanMethod],
|
||||
Optional[Callable[[Span], Any]],
|
||||
|
|
@ -34,9 +32,7 @@ class Span:
|
|||
@classmethod
|
||||
def has_extension(cls, name: str) -> bool: ...
|
||||
@classmethod
|
||||
def remove_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def remove_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[SpanMethod],
|
||||
Optional[Callable[[Span], Any]],
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from .doc cimport Doc
|
|||
from ..errors import Errors
|
||||
|
||||
|
||||
cdef int MISSING_DEP = 0
|
||||
cdef const int MISSING_DEP = 0
|
||||
|
||||
cdef class Token:
|
||||
cdef readonly Vocab vocab
|
||||
|
|
|
|||
|
|
@ -27,9 +27,7 @@ class Token:
|
|||
force: bool = ...,
|
||||
) -> None: ...
|
||||
@classmethod
|
||||
def get_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def get_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[TokenMethod],
|
||||
Optional[Callable[[Token], Any]],
|
||||
|
|
@ -38,9 +36,7 @@ class Token:
|
|||
@classmethod
|
||||
def has_extension(cls, name: str) -> bool: ...
|
||||
@classmethod
|
||||
def remove_extension(
|
||||
cls, name: str
|
||||
) -> Tuple[
|
||||
def remove_extension(cls, name: str) -> Tuple[
|
||||
Optional[Any],
|
||||
Optional[TokenMethod],
|
||||
Optional[Callable[[Token], Any]],
|
||||
|
|
|
|||
|
|
@ -354,7 +354,7 @@ def update_meta(
|
|||
|
||||
|
||||
def create_before_to_disk_callback(
|
||||
callback: Optional[Callable[["Language"], "Language"]]
|
||||
callback: Optional[Callable[["Language"], "Language"]],
|
||||
) -> Callable[["Language"], "Language"]:
|
||||
from ..language import Language # noqa: F811
|
||||
|
||||
|
|
|
|||
18
spacy/ty.py
18
spacy/ty.py
|
|
@ -30,11 +30,9 @@ class TrainableComponent(Protocol):
|
|||
drop: float = 0.0,
|
||||
sgd: Optional[Optimizer] = None,
|
||||
losses: Optional[Dict[str, float]] = None
|
||||
) -> Dict[str, float]:
|
||||
...
|
||||
) -> Dict[str, float]: ...
|
||||
|
||||
def finish_update(self, sgd: Optimizer) -> None:
|
||||
...
|
||||
def finish_update(self, sgd: Optimizer) -> None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
|
|
@ -44,8 +42,7 @@ class InitializableComponent(Protocol):
|
|||
get_examples: Callable[[], Iterable["Example"]],
|
||||
nlp: "Language",
|
||||
**kwargs: Any
|
||||
):
|
||||
...
|
||||
): ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
|
|
@ -55,11 +52,8 @@ class ListenedToComponent(Protocol):
|
|||
listener_map: Dict[str, Sequence[Model]]
|
||||
listening_components: List[str]
|
||||
|
||||
def add_listener(self, listener: Model, component_name: str) -> None:
|
||||
...
|
||||
def add_listener(self, listener: Model, component_name: str) -> None: ...
|
||||
|
||||
def remove_listener(self, listener: Model, component_name: str) -> bool:
|
||||
...
|
||||
def remove_listener(self, listener: Model, component_name: str) -> bool: ...
|
||||
|
||||
def find_listeners(self, component) -> None:
|
||||
...
|
||||
def find_listeners(self, component) -> None: ...
|
||||
|
|
|
|||
|
|
@ -657,7 +657,7 @@ def load_model_from_config(
|
|||
|
||||
|
||||
def get_sourced_components(
|
||||
config: Union[Dict[str, Any], Config]
|
||||
config: Union[Dict[str, Any], Config],
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""RETURNS (List[str]): All sourced components in the original config,
|
||||
e.g. {"source": "en_core_web_sm"}. If the config contains a key
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ pipeline name to be specified with its version (e.g. `en_core_web_sm-3.0.0`).
|
|||
> project.
|
||||
|
||||
```bash
|
||||
$ python -m spacy download [model] [--direct] [--sdist] [pip_args]
|
||||
$ python -m spacy download [model] [--direct] [--sdist] [pip_args] [--url url]
|
||||
```
|
||||
|
||||
| Name | Description |
|
||||
|
|
@ -58,6 +58,7 @@ $ python -m spacy download [model] [--direct] [--sdist] [pip_args]
|
|||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
| pip args | Additional installation options to be passed to `pip install` when installing the pipeline package. For example, `--user` to install to the user home directory or `--no-deps` to not install package dependencies. ~~Any (option/flag)~~ |
|
||||
| **CREATES** | The installed pipeline package in your `site-packages` directory. |
|
||||
| `--url`, `-U` | Download from a mirror repository at the given url |
|
||||
|
||||
## info {id="info",tag="command"}
|
||||
|
||||
|
|
|
|||
|
|
@ -2739,20 +2739,7 @@
|
|||
"courses"
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "education",
|
||||
"id": "spacy-quickstart",
|
||||
"title": "spaCy Quickstart",
|
||||
"slogan": "Learn spaCy basics quickly by visualizing various Doc objects",
|
||||
"description": "In this course, I use the itables Python library inside a Jupyter notebook so that you can visualize the different spaCy document objects. This will provide a solid foundation for people who wish to learn the spaCy NLP library.",
|
||||
"url": "https://learnspacy.com/courses/spacy-quickstart/",
|
||||
"image": "https://learnspacy.com/wp-content/uploads/2024/09/custom_search_builder_spacy-2048x1202.png",
|
||||
"thumb": "https://learnspacy.com/wp-content/uploads/2024/09/learnspacy_logo.png",
|
||||
"author": "Aravind Mohanoor",
|
||||
"category": [
|
||||
"courses"
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"type": "education",
|
||||
"id": "video-spacys-ner-model",
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user