diff --git a/.github/azure-steps.yml b/.github/azure-steps.yml
index 9d57219ca..cc0247b3a 100644
--- a/.github/azure-steps.yml
+++ b/.github/azure-steps.yml
@@ -10,6 +10,7 @@ steps:
inputs:
versionSpec: ${{ parameters.python_version }}
architecture: ${{ parameters.architecture }}
+ allowUnstable: true
- bash: |
echo "##vso[task.setvariable variable=python_version]${{ parameters.python_version }}"
diff --git a/.github/workflows/autoblack.yml b/.github/workflows/autoblack.yml
index 8d0282650..3ad4cf408 100644
--- a/.github/workflows/autoblack.yml
+++ b/.github/workflows/autoblack.yml
@@ -12,10 +12,10 @@ jobs:
if: github.repository_owner == 'explosion'
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
with:
ref: ${{ github.head_ref }}
- - uses: actions/setup-python@v2
+ - uses: actions/setup-python@v3
- run: pip install black
- name: Auto-format code if needed
run: black spacy
@@ -23,10 +23,11 @@ jobs:
# code and makes GitHub think the action failed
- name: Check for modified files
id: git-check
- run: echo ::set-output name=modified::$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi)
+ run: echo modified=$(if git diff-index --quiet HEAD --; then echo "false"; else echo "true"; fi) >> $GITHUB_OUTPUT
+
- name: Create Pull Request
if: steps.git-check.outputs.modified == 'true'
- uses: peter-evans/create-pull-request@v3
+ uses: peter-evans/create-pull-request@v4
with:
title: Auto-format code with black
labels: meta
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b959262e3..df59697b1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
language_version: python3.7
additional_dependencies: ['click==8.0.4']
- repo: https://gitlab.com/pycqa/flake8
- rev: 3.9.2
+ rev: 5.0.4
hooks:
- id: flake8
args:
diff --git a/README.md b/README.md
index d9ef83e01..abfc3da67 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ be used in real products.
spaCy comes with
[pretrained pipelines](https://spacy.io/models) and
-currently supports tokenization and training for **60+ languages**. It features
+currently supports tokenization and training for **70+ languages**. It features
state-of-the-art speed and **neural network models** for tagging,
parsing, **named entity recognition**, **text classification** and more,
multi-task learning with pretrained **transformers** like BERT, as well as a
@@ -16,7 +16,7 @@ production-ready [**training system**](https://spacy.io/usage/training) and easy
model packaging, deployment and workflow management. spaCy is commercial
open-source software, released under the MIT license.
-💫 **Version 3.4.0 out now!**
+💫 **Version 3.4 out now!**
[Check out the release notes here.](https://github.com/explosion/spaCy/releases)
[](https://dev.azure.com/explosion-ai/public/_build?definitionId=8)
@@ -79,7 +79,7 @@ more people can benefit from it.
## Features
-- Support for **60+ languages**
+- Support for **70+ languages**
- **Trained pipelines** for different languages and tasks
- Multi-task learning with pretrained **transformers** like BERT
- Support for pretrained **word vectors** and embeddings
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 2f5201614..eea07cb7a 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -76,15 +76,24 @@ jobs:
# Python39Mac:
# imageName: "macos-latest"
# python.version: "3.9"
- Python310Linux:
- imageName: "ubuntu-latest"
- python.version: "3.10"
+ # Python310Linux:
+ # imageName: "ubuntu-latest"
+ # python.version: "3.10"
Python310Windows:
imageName: "windows-latest"
python.version: "3.10"
- Python310Mac:
- imageName: "macos-latest"
- python.version: "3.10"
+ # Python310Mac:
+ # imageName: "macos-latest"
+ # python.version: "3.10"
+ Python311Linux:
+ imageName: 'ubuntu-latest'
+ python.version: '3.11.0-rc.2'
+ Python311Windows:
+ imageName: 'windows-latest'
+ python.version: '3.11.0-rc.2'
+ Python311Mac:
+ imageName: 'macos-latest'
+ python.version: '3.11.0-rc.2'
maxParallel: 4
pool:
vmImage: $(imageName)
diff --git a/requirements.txt b/requirements.txt
index 446560c06..9d6bbb2c4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,7 +15,7 @@ pathy>=0.3.5
numpy>=1.15.0
requests>=2.13.0,<3.0.0
tqdm>=4.38.0,<5.0.0
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
jinja2
langcodes>=3.2.0,<4.0.0
# Official Python utilities
@@ -28,7 +28,7 @@ cython>=0.25,<3.0
pytest>=5.2.0,!=7.1.0
pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0
-flake8>=3.8.0,<3.10.0
+flake8>=3.8.0,<6.0.0
hypothesis>=3.27.0,<7.0.0
mypy>=0.980,<0.990; platform_machine != "aarch64" and python_version >= "3.7"
types-dataclasses>=0.1.3; python_version < "3.7"
diff --git a/setup.cfg b/setup.cfg
index 2dc5e7042..c2653feba 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -56,7 +56,7 @@ install_requires =
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0
- pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
+ pydantic>=1.7.4,!=1.8,!=1.8.1,<1.11.0
jinja2
# Official Python utilities
setuptools
diff --git a/spacy/about.py b/spacy/about.py
index 843c15aba..ce86e6294 100644
--- a/spacy/about.py
+++ b/spacy/about.py
@@ -1,6 +1,6 @@
# fmt: off
__title__ = "spacy"
-__version__ = "3.4.1"
+__version__ = "3.4.2"
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
__projects__ = "https://github.com/explosion/projects"
diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py
index 83dc5efbf..a15353855 100644
--- a/spacy/cli/project/dvc.py
+++ b/spacy/cli/project/dvc.py
@@ -25,6 +25,7 @@ def project_update_dvc_cli(
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."),
verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"),
+ quiet: bool = Opt(False, "--quiet", "-q", help="Print less info"),
force: bool = Opt(False, "--force", "-F", help="Force update DVC config"),
# fmt: on
):
@@ -36,7 +37,7 @@ def project_update_dvc_cli(
DOCS: https://spacy.io/api/cli#project-dvc
"""
- project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
+ project_update_dvc(project_dir, workflow, verbose=verbose, quiet=quiet, force=force)
def project_update_dvc(
@@ -44,6 +45,7 @@ def project_update_dvc(
workflow: Optional[str] = None,
*,
verbose: bool = False,
+ quiet: bool = False,
force: bool = False,
) -> None:
"""Update the auto-generated Data Version Control (DVC) config file. A DVC
@@ -54,11 +56,12 @@ def project_update_dvc(
workflow (Optional[str]): Optional name of workflow defined in project.yml.
If not set, the first workflow will be used.
verbose (bool): Print more info.
+ quiet (bool): Print less info.
force (bool): Force update DVC config.
"""
config = load_project_config(project_dir)
updated = update_dvc_config(
- project_dir, config, workflow, verbose=verbose, force=force
+ project_dir, config, workflow, verbose=verbose, quiet=quiet, force=force
)
help_msg = "To execute the workflow with DVC, run: dvc repro"
if updated:
@@ -72,7 +75,7 @@ def update_dvc_config(
config: Dict[str, Any],
workflow: Optional[str] = None,
verbose: bool = False,
- silent: bool = False,
+ quiet: bool = False,
force: bool = False,
) -> bool:
"""Re-run the DVC commands in dry mode and update dvc.yaml file in the
@@ -83,7 +86,7 @@ def update_dvc_config(
path (Path): The path to the project directory.
config (Dict[str, Any]): The loaded project.yml.
verbose (bool): Whether to print additional info (via DVC).
- silent (bool): Don't output anything (via DVC).
+ quiet (bool): Don't output anything (via DVC).
force (bool): Force update, even if hashes match.
RETURNS (bool): Whether the DVC config file was updated.
"""
@@ -105,6 +108,14 @@ def update_dvc_config(
dvc_config_path.unlink()
dvc_commands = []
config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
+
+ # some flags that apply to every command
+ flags = []
+ if verbose:
+ flags.append("--verbose")
+ if quiet:
+ flags.append("--quiet")
+
for name in workflows[workflow]:
command = config_commands[name]
deps = command.get("deps", [])
@@ -118,14 +129,26 @@ def update_dvc_config(
deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl]
outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl]
outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl]
- dvc_cmd = ["run", "-n", name, "-w", str(path), "--no-exec"]
+
+ dvc_cmd = ["run", *flags, "-n", name, "-w", str(path), "--no-exec"]
if command.get("no_skip"):
dvc_cmd.append("--always-changed")
full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd]
dvc_commands.append(join_command(full_cmd))
+
+ if not dvc_commands:
+ # If we don't check for this, then there will be an error when reading the
+ # config, since DVC wouldn't create it.
+ msg.fail(
+ "No usable commands for DVC found. This can happen if none of your "
+ "commands have dependencies or outputs.",
+ exits=1,
+ )
+
with working_dir(path):
- dvc_flags = {"--verbose": verbose, "--quiet": silent}
- run_dvc_commands(dvc_commands, flags=dvc_flags)
+ for c in dvc_commands:
+ dvc_command = "dvc " + c
+ run_command(dvc_command)
with dvc_config_path.open("r+", encoding="utf8") as f:
content = f.read()
f.seek(0, 0)
@@ -133,26 +156,6 @@ def update_dvc_config(
return True
-def run_dvc_commands(
- commands: Iterable[str] = SimpleFrozenList(), flags: Dict[str, bool] = {}
-) -> None:
- """Run a sequence of DVC commands in a subprocess, in order.
-
- commands (List[str]): The string commands without the leading "dvc".
- flags (Dict[str, bool]): Conditional flags to be added to command. Makes it
- easier to pass flags like --quiet that depend on a variable or
- command-line setting while avoiding lots of nested conditionals.
- """
- for c in commands:
- command = split_command(c)
- dvc_command = ["dvc", *command]
- # Add the flags if they are set to True
- for flag, is_active in flags.items():
- if is_active:
- dvc_command.append(flag)
- run_command(dvc_command)
-
-
def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None:
"""Validate workflows provided in project.yml and check that a given
workflow can be used to generate a DVC config.
diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py
index 720d3a8cb..c37a3a91a 100644
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@@ -23,7 +23,7 @@ class RussianLemmatizer(Lemmatizer):
overwrite: bool = False,
scorer: Optional[Callable] = lemmatizer_score,
) -> None:
- if mode == "pymorphy2":
+ if mode in {"pymorphy2", "pymorphy2_lookup"}:
try:
from pymorphy2 import MorphAnalyzer
except ImportError:
diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py
index 97ee80479..8337e7328 100644
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@@ -18,7 +18,7 @@ class UkrainianLemmatizer(RussianLemmatizer):
overwrite: bool = False,
scorer: Optional[Callable] = lemmatizer_score,
) -> None:
- if mode == "pymorphy2":
+ if mode in {"pymorphy2", "pymorphy2_lookup"}:
try:
from pymorphy2 import MorphAnalyzer
except ImportError:
diff --git a/spacy/ml/models/entity_linker.py b/spacy/ml/models/entity_linker.py
index 4045a3206..2f8ab20a5 100644
--- a/spacy/ml/models/entity_linker.py
+++ b/spacy/ml/models/entity_linker.py
@@ -71,11 +71,10 @@ def span_maker_forward(model, docs: List[Doc], is_train) -> Tuple[Ragged, Callab
cands.append((start_token, end_token))
candidates.append(ops.asarray2i(cands))
- candlens = ops.asarray1i([len(cands) for cands in candidates])
- candidates = ops.xp.concatenate(candidates)
- outputs = Ragged(candidates, candlens)
+ lengths = model.ops.asarray1i([len(cands) for cands in candidates])
+ out = Ragged(model.ops.flatten(candidates), lengths)
# because this is just rearranging docs, the backprop does nothing
- return outputs, lambda x: []
+ return out, lambda x: []
@registry.misc("spacy.KBFromFile.v1")
diff --git a/spacy/pipeline/edit_tree_lemmatizer.py b/spacy/pipeline/edit_tree_lemmatizer.py
index b7d615f6d..12f9b73a3 100644
--- a/spacy/pipeline/edit_tree_lemmatizer.py
+++ b/spacy/pipeline/edit_tree_lemmatizer.py
@@ -1,7 +1,6 @@
from typing import cast, Any, Callable, Dict, Iterable, List, Optional
-from typing import Sequence, Tuple, Union
+from typing import Tuple
from collections import Counter
-from copy import deepcopy
from itertools import islice
import numpy as np
@@ -149,9 +148,7 @@ class EditTreeLemmatizer(TrainablePipe):
if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs.
n_labels = len(self.cfg["labels"])
- guesses: List[Ints2d] = [
- self.model.ops.alloc((0, n_labels), dtype="i") for doc in docs
- ]
+ guesses: List[Ints2d] = [self.model.ops.alloc2i(0, n_labels) for _ in docs]
assert len(guesses) == n_docs
return guesses
scores = self.model.predict(docs)
diff --git a/spacy/pipeline/spancat.py b/spacy/pipeline/spancat.py
index 5ede622c2..956bbb72c 100644
--- a/spacy/pipeline/spancat.py
+++ b/spacy/pipeline/spancat.py
@@ -133,6 +133,9 @@ def make_spancat(
spans_key (str): Key of the doc.spans dict to save the spans under. During
initialization and training, the component will look for spans on the
reference document under the same key.
+ scorer (Optional[Callable]): The scoring method. Defaults to
+ Scorer.score_spans for the Doc.spans[spans_key] with overlapping
+ spans allowed.
threshold (float): Minimum probability to consider a prediction positive.
Spans with a positive prediction will be saved on the Doc. Defaults to
0.5.
diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py
index c45f819fc..59549ad99 100644
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@@ -24,8 +24,8 @@ single_label_default_config = """
[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 64
-rows = [2000, 2000, 1000, 1000, 1000, 1000]
-attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
+rows = [2000, 2000, 500, 1000, 500]
+attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false
[model.tok2vec.encode]
diff --git a/spacy/pipeline/textcat_multilabel.py b/spacy/pipeline/textcat_multilabel.py
index 10aef46aa..eb83d9cb7 100644
--- a/spacy/pipeline/textcat_multilabel.py
+++ b/spacy/pipeline/textcat_multilabel.py
@@ -24,8 +24,8 @@ multi_label_default_config = """
[model.tok2vec.embed]
@architectures = "spacy.MultiHashEmbed.v2"
width = 64
-rows = [2000, 2000, 1000, 1000, 1000, 1000]
-attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
+rows = [2000, 2000, 500, 1000, 500]
+attrs = ["NORM", "LOWER", "PREFIX", "SUFFIX", "SHAPE"]
include_static_vectors = false
[model.tok2vec.encode]
@@ -96,8 +96,8 @@ def make_multilabel_textcat(
model: Model[List[Doc], List[Floats2d]],
threshold: float,
scorer: Optional[Callable],
-) -> "TextCategorizer":
- """Create a TextCategorizer component. The text categorizer predicts categories
+) -> "MultiLabel_TextCategorizer":
+ """Create a MultiLabel_TextCategorizer component. The text categorizer predicts categories
over a whole document. It can learn one or more labels, and the labels are considered
to be non-mutually exclusive, which means that there can be zero or more labels
per doc).
@@ -105,6 +105,7 @@ def make_multilabel_textcat(
model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
scores for each category.
threshold (float): Cutoff to consider a prediction "positive".
+ scorer (Optional[Callable]): The scoring method.
"""
return MultiLabel_TextCategorizer(
nlp.vocab, model, name, threshold=threshold, scorer=scorer
@@ -147,6 +148,7 @@ class MultiLabel_TextCategorizer(TextCategorizer):
name (str): The component instance name, used to add entries to the
losses during training.
threshold (float): Cutoff to consider a prediction "positive".
+ scorer (Optional[Callable]): The scoring method.
DOCS: https://spacy.io/api/textcategorizer#init
"""
diff --git a/spacy/schemas.py b/spacy/schemas.py
index 048082134..c824d76b9 100644
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@@ -181,12 +181,12 @@ class TokenPatternNumber(BaseModel):
IS_SUBSET: Optional[List[StrictInt]] = Field(None, alias="is_subset")
IS_SUPERSET: Optional[List[StrictInt]] = Field(None, alias="is_superset")
INTERSECTS: Optional[List[StrictInt]] = Field(None, alias="intersects")
- EQ: Union[StrictInt, StrictFloat] = Field(None, alias="==")
- NEQ: Union[StrictInt, StrictFloat] = Field(None, alias="!=")
- GEQ: Union[StrictInt, StrictFloat] = Field(None, alias=">=")
- LEQ: Union[StrictInt, StrictFloat] = Field(None, alias="<=")
- GT: Union[StrictInt, StrictFloat] = Field(None, alias=">")
- LT: Union[StrictInt, StrictFloat] = Field(None, alias="<")
+ EQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="==")
+ NEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="!=")
+ GEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">=")
+ LEQ: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<=")
+ GT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias=">")
+ LT: Optional[Union[StrictInt, StrictFloat]] = Field(None, alias="<")
class Config:
extra = "forbid"
@@ -430,7 +430,7 @@ class ProjectConfigAssetURL(BaseModel):
# fmt: off
dest: StrictStr = Field(..., title="Destination of downloaded asset")
url: Optional[StrictStr] = Field(None, title="URL of asset")
- checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+ checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
description: StrictStr = Field("", title="Description of asset")
# fmt: on
@@ -438,7 +438,7 @@ class ProjectConfigAssetURL(BaseModel):
class ProjectConfigAssetGit(BaseModel):
# fmt: off
git: ProjectConfigAssetGitItem = Field(..., title="Git repo information")
- checksum: str = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
+ checksum: Optional[str] = Field(None, title="MD5 hash of file", regex=r"([a-fA-F\d]{32})")
description: Optional[StrictStr] = Field(None, title="Description of asset")
# fmt: on
@@ -508,9 +508,9 @@ class DocJSONSchema(BaseModel):
None, title="Indices of sentences' start and end indices"
)
text: StrictStr = Field(..., title="Document text")
- spans: Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]] = Field(
- None, title="Span information - end/start indices, label, KB ID"
- )
+ spans: Optional[
+ Dict[StrictStr, List[Dict[StrictStr, Union[StrictStr, StrictInt]]]]
+ ] = Field(None, title="Span information - end/start indices, label, KB ID")
tokens: List[Dict[StrictStr, Union[StrictStr, StrictInt]]] = Field(
..., title="Token information - ID, start, annotations"
)
@@ -519,9 +519,9 @@ class DocJSONSchema(BaseModel):
title="Any custom data stored in the document's _ attribute",
alias="_",
)
- underscore_token: Optional[Dict[StrictStr, Dict[StrictStr, Any]]] = Field(
+ underscore_token: Optional[Dict[StrictStr, List[Dict[StrictStr, Any]]]] = Field(
None, title="Any custom data stored in the token's _ attribute"
)
- underscore_span: Optional[Dict[StrictStr, Dict[StrictStr, Any]]] = Field(
+ underscore_span: Optional[Dict[StrictStr, List[Dict[StrictStr, Any]]]] = Field(
None, title="Any custom data stored in the span's _ attribute"
)
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 3c1c1333a..0fc74243d 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -343,6 +343,14 @@ def ru_lemmatizer():
return get_lang_class("ru")().add_pipe("lemmatizer")
+@pytest.fixture
+def ru_lookup_lemmatizer():
+ pytest.importorskip("pymorphy2")
+ return get_lang_class("ru")().add_pipe(
+ "lemmatizer", config={"mode": "pymorphy2_lookup"}
+ )
+
+
@pytest.fixture(scope="session")
def sa_tokenizer():
return get_lang_class("sa")().tokenizer
@@ -422,6 +430,15 @@ def uk_lemmatizer():
return get_lang_class("uk")().add_pipe("lemmatizer")
+@pytest.fixture
+def uk_lookup_lemmatizer():
+ pytest.importorskip("pymorphy2")
+ pytest.importorskip("pymorphy2_dicts_uk")
+ return get_lang_class("uk")().add_pipe(
+ "lemmatizer", config={"mode": "pymorphy2_lookup"}
+ )
+
+
@pytest.fixture(scope="session")
def ur_tokenizer():
return get_lang_class("ur")().tokenizer
diff --git a/spacy/tests/doc/test_json_doc_conversion.py b/spacy/tests/doc/test_json_doc_conversion.py
index 0d7c061c9..19698cfb2 100644
--- a/spacy/tests/doc/test_json_doc_conversion.py
+++ b/spacy/tests/doc/test_json_doc_conversion.py
@@ -128,7 +128,9 @@ def test_doc_to_json_with_token_span_attributes(doc):
doc._.json_test1 = "hello world"
doc._.json_test2 = [1, 2, 3]
doc[0:1]._.span_test = "span_attribute"
+ doc[0:2]._.span_test = "span_attribute_2"
doc[0]._.token_test = 117
+ doc[1]._.token_test = 118
doc.spans["span_group"] = [doc[0:1]]
json_doc = doc.to_json(
underscore=["json_test1", "json_test2", "token_test", "span_test"]
@@ -139,8 +141,10 @@ def test_doc_to_json_with_token_span_attributes(doc):
assert json_doc["_"]["json_test2"] == [1, 2, 3]
assert "underscore_token" in json_doc
assert "underscore_span" in json_doc
- assert json_doc["underscore_token"]["token_test"]["value"] == 117
- assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
+ assert json_doc["underscore_token"]["token_test"][0]["value"] == 117
+ assert json_doc["underscore_token"]["token_test"][1]["value"] == 118
+ assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
+ assert json_doc["underscore_span"]["span_test"][1]["value"] == "span_attribute_2"
assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
@@ -161,8 +165,8 @@ def test_doc_to_json_with_custom_user_data(doc):
assert json_doc["_"]["json_test"] == "hello world"
assert "underscore_token" in json_doc
assert "underscore_span" in json_doc
- assert json_doc["underscore_token"]["token_test"]["value"] == 117
- assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
+ assert json_doc["underscore_token"]["token_test"][0]["value"] == 117
+ assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
@@ -181,8 +185,8 @@ def test_doc_to_json_with_token_span_same_identifier(doc):
assert json_doc["_"]["my_ext"] == "hello world"
assert "underscore_token" in json_doc
assert "underscore_span" in json_doc
- assert json_doc["underscore_token"]["my_ext"]["value"] == 117
- assert json_doc["underscore_span"]["my_ext"]["value"] == "span_attribute"
+ assert json_doc["underscore_token"]["my_ext"][0]["value"] == 117
+ assert json_doc["underscore_span"]["my_ext"][0]["value"] == "span_attribute"
assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
assert srsly.json_loads(srsly.json_dumps(json_doc)) == json_doc
@@ -195,10 +199,9 @@ def test_doc_to_json_with_token_attributes_missing(doc):
doc[0]._.token_test = 117
json_doc = doc.to_json(underscore=["span_test"])
- assert "underscore_token" in json_doc
assert "underscore_span" in json_doc
- assert json_doc["underscore_span"]["span_test"]["value"] == "span_attribute"
- assert "token_test" not in json_doc["underscore_token"]
+ assert json_doc["underscore_span"]["span_test"][0]["value"] == "span_attribute"
+ assert "underscore_token" not in json_doc
assert len(schemas.validate(schemas.DocJSONSchema, json_doc)) == 0
@@ -283,7 +286,9 @@ def test_json_to_doc_with_token_span_attributes(doc):
doc._.json_test1 = "hello world"
doc._.json_test2 = [1, 2, 3]
doc[0:1]._.span_test = "span_attribute"
+ doc[0:2]._.span_test = "span_attribute_2"
doc[0]._.token_test = 117
+ doc[1]._.token_test = 118
json_doc = doc.to_json(
underscore=["json_test1", "json_test2", "token_test", "span_test"]
@@ -295,7 +300,9 @@ def test_json_to_doc_with_token_span_attributes(doc):
assert new_doc._.json_test1 == "hello world"
assert new_doc._.json_test2 == [1, 2, 3]
assert new_doc[0]._.token_test == 117
+ assert new_doc[1]._.token_test == 118
assert new_doc[0:1]._.span_test == "span_attribute"
+ assert new_doc[0:2]._.span_test == "span_attribute_2"
assert new_doc.user_data == doc.user_data
assert new_doc.to_bytes(exclude=["user_data"]) == doc.to_bytes(
exclude=["user_data"]
diff --git a/spacy/tests/lang/ru/test_lemmatizer.py b/spacy/tests/lang/ru/test_lemmatizer.py
index 9ca7f441b..e82fd4f8c 100644
--- a/spacy/tests/lang/ru/test_lemmatizer.py
+++ b/spacy/tests/lang/ru/test_lemmatizer.py
@@ -78,3 +78,17 @@ def test_ru_lemmatizer_punct(ru_lemmatizer):
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
doc = Doc(ru_lemmatizer.vocab, words=["»"], pos=["PUNCT"])
assert ru_lemmatizer.pymorphy2_lemmatize(doc[0]) == ['"']
+
+
+def test_ru_doc_lookup_lemmatization(ru_lookup_lemmatizer):
+ words = ["мама", "мыла", "раму"]
+ pos = ["NOUN", "VERB", "NOUN"]
+ morphs = [
+ "Animacy=Anim|Case=Nom|Gender=Fem|Number=Sing",
+ "Aspect=Imp|Gender=Fem|Mood=Ind|Number=Sing|Tense=Past|VerbForm=Fin|Voice=Act",
+ "Animacy=Anim|Case=Acc|Gender=Fem|Number=Sing",
+ ]
+ doc = Doc(ru_lookup_lemmatizer.vocab, words=words, pos=pos, morphs=morphs)
+ doc = ru_lookup_lemmatizer(doc)
+ lemmas = [token.lemma_ for token in doc]
+ assert lemmas == ["мама", "мыла", "раму"]
diff --git a/spacy/tests/lang/uk/test_lemmatizer.py b/spacy/tests/lang/uk/test_lemmatizer.py
index 57dd4198a..788744aa1 100644
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@@ -9,3 +9,11 @@ def test_uk_lemmatizer(uk_lemmatizer):
"""Check that the default uk lemmatizer runs."""
doc = Doc(uk_lemmatizer.vocab, words=["a", "b", "c"])
uk_lemmatizer(doc)
+ assert [token.lemma for token in doc]
+
+
+def test_uk_lookup_lemmatizer(uk_lookup_lemmatizer):
+ """Check that the lookup uk lemmatizer runs."""
+ doc = Doc(uk_lookup_lemmatizer.vocab, words=["a", "b", "c"])
+ uk_lookup_lemmatizer(doc)
+ assert [token.lemma for token in doc]
diff --git a/spacy/tests/pipeline/test_entity_linker.py b/spacy/tests/pipeline/test_entity_linker.py
index 5ad092fbf..2fc183722 100644
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@@ -9,6 +9,7 @@ from spacy.compat import pickle
from spacy.kb import Candidate, InMemoryLookupKB, get_candidates, KnowledgeBase
from spacy.lang.en import English
from spacy.ml import load_kb
+from spacy.ml.models.entity_linker import build_span_maker
from spacy.pipeline import EntityLinker
from spacy.pipeline.legacy import EntityLinker_v1
from spacy.pipeline.tok2vec import DEFAULT_TOK2VEC_MODEL
@@ -728,7 +729,11 @@ TRAIN_DATA = [
("Russ Cochran was a member of University of Kentucky's golf team.",
{"links": {(0, 12): {"Q7381115": 0.0, "Q2146908": 1.0}},
"entities": [(0, 12, "PERSON"), (43, 51, "LOC")],
- "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]})
+ "sent_starts": [1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}),
+ # having a blank instance shouldn't break things
+ ("The weather is nice today.",
+ {"links": {}, "entities": [],
+ "sent_starts": [1, -1, 0, 0, 0, 0]})
]
GOLD_entities = ["Q2146908", "Q7381115", "Q7381115", "Q2146908"]
# fmt: on
@@ -1211,6 +1216,21 @@ def test_threshold(meet_threshold: bool, config: Dict[str, Any]):
assert doc.ents[0].kb_id_ == entity_id if meet_threshold else EntityLinker.NIL
+def test_span_maker_forward_with_empty():
+ """The forward pass of the span maker may have a doc with no entities."""
+ nlp = English()
+ doc1 = nlp("a b c")
+ ent = doc1[0:1]
+ ent.label_ = "X"
+ doc1.ents = [ent]
+ # no entities
+ doc2 = nlp("x y z")
+
+ # just to get a model
+ span_maker = build_span_maker()
+ span_maker([doc1, doc2], False)
+
+
def test_nel_candidate_processing():
"""Test that NEL handles candidate streams correctly in a set of documents with & without entities as well as empty
documents.
diff --git a/spacy/tests/pipeline/test_tok2vec.py b/spacy/tests/pipeline/test_tok2vec.py
index 659274db9..e423d9a19 100644
--- a/spacy/tests/pipeline/test_tok2vec.py
+++ b/spacy/tests/pipeline/test_tok2vec.py
@@ -231,7 +231,7 @@ def test_tok2vec_listener_callback():
def test_tok2vec_listener_overfitting():
- """ Test that a pipeline with a listener properly overfits, even if 'tok2vec' is in the annotating components """
+ """Test that a pipeline with a listener properly overfits, even if 'tok2vec' is in the annotating components"""
orig_config = Config().from_str(cfg_string)
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
train_examples = []
@@ -264,7 +264,7 @@ def test_tok2vec_listener_overfitting():
def test_tok2vec_frozen_not_annotating():
- """ Test that a pipeline with a frozen tok2vec raises an error when the tok2vec is not annotating """
+ """Test that a pipeline with a frozen tok2vec raises an error when the tok2vec is not annotating"""
orig_config = Config().from_str(cfg_string)
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
train_examples = []
@@ -274,12 +274,16 @@ def test_tok2vec_frozen_not_annotating():
for i in range(2):
losses = {}
- with pytest.raises(ValueError, match=r"the tok2vec embedding layer is not updated"):
- nlp.update(train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"])
+ with pytest.raises(
+ ValueError, match=r"the tok2vec embedding layer is not updated"
+ ):
+ nlp.update(
+ train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"]
+ )
def test_tok2vec_frozen_overfitting():
- """ Test that a pipeline with a frozen & annotating tok2vec can still overfit """
+ """Test that a pipeline with a frozen & annotating tok2vec can still overfit"""
orig_config = Config().from_str(cfg_string)
nlp = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
train_examples = []
@@ -289,7 +293,13 @@ def test_tok2vec_frozen_overfitting():
for i in range(100):
losses = {}
- nlp.update(train_examples, sgd=optimizer, losses=losses, exclude=["tok2vec"], annotates=["tok2vec"])
+ nlp.update(
+ train_examples,
+ sgd=optimizer,
+ losses=losses,
+ exclude=["tok2vec"],
+ annotates=["tok2vec"],
+ )
assert losses["tagger"] < 0.0001
# test the trained model
diff --git a/spacy/tests/test_models.py b/spacy/tests/test_models.py
index 2306cabb7..d91ed1201 100644
--- a/spacy/tests/test_models.py
+++ b/spacy/tests/test_models.py
@@ -23,7 +23,7 @@ def get_textcat_bow_kwargs():
def get_textcat_cnn_kwargs():
- return {"tok2vec": test_tok2vec(), "exclusive_classes": False, "nO": 13}
+ return {"tok2vec": make_test_tok2vec(), "exclusive_classes": False, "nO": 13}
def get_all_params(model):
@@ -65,7 +65,7 @@ def get_tok2vec_kwargs():
}
-def test_tok2vec():
+def make_test_tok2vec():
return build_Tok2Vec_model(**get_tok2vec_kwargs())
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index d7d2fd8e6..295f91c28 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1608,24 +1608,20 @@ cdef class Doc:
Doc.set_extension(attr)
self._.set(attr, doc_json["_"][attr])
- if doc_json.get("underscore_token", {}):
- for token_attr in doc_json["underscore_token"]:
- token_start = doc_json["underscore_token"][token_attr]["token_start"]
- value = doc_json["underscore_token"][token_attr]["value"]
-
- if not Token.has_extension(token_attr):
- Token.set_extension(token_attr)
- self[token_start]._.set(token_attr, value)
+ for token_attr in doc_json.get("underscore_token", {}):
+ if not Token.has_extension(token_attr):
+ Token.set_extension(token_attr)
+ for token_data in doc_json["underscore_token"][token_attr]:
+ start = token_by_char(self.c, self.length, token_data["start"])
+ value = token_data["value"]
+ self[start]._.set(token_attr, value)
- if doc_json.get("underscore_span", {}):
- for span_attr in doc_json["underscore_span"]:
- token_start = doc_json["underscore_span"][span_attr]["token_start"]
- token_end = doc_json["underscore_span"][span_attr]["token_end"]
- value = doc_json["underscore_span"][span_attr]["value"]
-
- if not Span.has_extension(span_attr):
- Span.set_extension(span_attr)
- self[token_start:token_end]._.set(span_attr, value)
+ for span_attr in doc_json.get("underscore_span", {}):
+ if not Span.has_extension(span_attr):
+ Span.set_extension(span_attr)
+ for span_data in doc_json["underscore_span"][span_attr]:
+ value = span_data["value"]
+ self.char_span(span_data["start"], span_data["end"])._.set(span_attr, value)
return self
def to_json(self, underscore=None):
@@ -1673,30 +1669,34 @@ cdef class Doc:
if underscore:
user_keys = set()
if self.user_data:
- data["_"] = {}
- data["underscore_token"] = {}
- data["underscore_span"] = {}
- for data_key in self.user_data:
+ for data_key, value in self.user_data.copy().items():
if type(data_key) == tuple and len(data_key) >= 4 and data_key[0] == "._.":
attr = data_key[1]
start = data_key[2]
end = data_key[3]
if attr in underscore:
user_keys.add(attr)
- value = self.user_data[data_key]
if not srsly.is_json_serializable(value):
raise ValueError(Errors.E107.format(attr=attr, value=repr(value)))
# Check if doc attribute
if start is None:
+ if "_" not in data:
+ data["_"] = {}
data["_"][attr] = value
# Check if token attribute
elif end is None:
+ if "underscore_token" not in data:
+ data["underscore_token"] = {}
if attr not in data["underscore_token"]:
- data["underscore_token"][attr] = {"token_start": start, "value": value}
+ data["underscore_token"][attr] = []
+ data["underscore_token"][attr].append({"start": start, "value": value})
# Else span attribute
else:
+ if "underscore_span" not in data:
+ data["underscore_span"] = {}
if attr not in data["underscore_span"]:
- data["underscore_span"][attr] = {"token_start": start, "token_end": end, "value": value}
+ data["underscore_span"][attr] = []
+ data["underscore_span"][attr].append({"start": start, "end": end, "value": value})
for attr in underscore:
if attr not in user_keys:
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index e5cd3089b..fc2c46022 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -1482,7 +1482,7 @@ You'll also need to add the assets you want to track with
```cli
-$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
+$ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose] [--quiet]
```
> #### Example
@@ -1499,6 +1499,7 @@ $ python -m spacy project dvc [project_dir] [workflow] [--force] [--verbose]
| `workflow` | Name of workflow defined in `project.yml`. Defaults to first workflow if not set. ~~Optional[str] \(option)~~ |
| `--force`, `-F` | Force-updating config file. ~~bool (flag)~~ |
| `--verbose`, `-V` | Print more output generated by DVC. ~~bool (flag)~~ |
+| `--quiet`, `-q` | Print no output generated by DVC. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
| **CREATES** | A `dvc.yaml` file in the project directory, based on the steps defined in the given workflow. |
diff --git a/website/docs/usage/projects.md b/website/docs/usage/projects.md
index 4797bbfe3..90b612358 100644
--- a/website/docs/usage/projects.md
+++ b/website/docs/usage/projects.md
@@ -243,6 +243,27 @@ pipelines.
> python -m spacy project run test . --vars.foo bar
> ```
+> #### Tip: Environment Variables
+>
+> Commands in a project file are not executed in a shell, so they don't have
+> direct access to environment variables. But you can insert environment
+> variables using the `env` dictionary to make values available for
+> interpolation, just like values in `vars`. Here's an example `env` dict that
+> makes `$PATH` available as `ENV_PATH`:
+>
+> ```yaml
+> env:
+> ENV_PATH: PATH
+> ```
+>
+> This can be used in a project command like so:
+>
+> ```yaml
+> - name: "echo-path"
+> script:
+> - "echo ${env.ENV_PATH}"
+> ```
+
| Section | Description |
| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `title` | An optional project title used in `--help` message and [auto-generated docs](#custom-docs). |
diff --git a/website/meta/languages.json b/website/meta/languages.json
index 0028b4a5f..bd1535c90 100644
--- a/website/meta/languages.json
+++ b/website/meta/languages.json
@@ -4,12 +4,22 @@
"code": "af",
"name": "Afrikaans"
},
+ {
+ "code": "am",
+ "name": "Amharic",
+ "has_examples": true
+ },
{
"code": "ar",
"name": "Arabic",
"example": "هذه جملة",
"has_examples": true
},
+ {
+ "code": "az",
+ "name": "Azerbaijani",
+ "has_examples": true
+ },
{
"code": "bg",
"name": "Bulgarian",
@@ -65,7 +75,7 @@
{
"code": "dsb",
"name": "Lower Sorbian",
- "has_examples": true
+ "has_examples": true
},
{
"code": "el",
@@ -142,6 +152,11 @@
"code": "ga",
"name": "Irish"
},
+ {
+ "code": "grc",
+ "name": "Ancient Greek",
+ "has_examples": true
+ },
{
"code": "gu",
"name": "Gujarati",
@@ -172,7 +187,7 @@
{
"code": "hsb",
"name": "Upper Sorbian",
- "has_examples": true
+ "has_examples": true
},
{
"code": "hu",
@@ -260,6 +275,10 @@
"example": "Адамга эң кыйыны — күн сайын адам болуу",
"has_examples": true
},
+ {
+ "code": "la",
+ "name": "Latin"
+ },
{
"code": "lb",
"name": "Luxembourgish",
@@ -448,6 +467,11 @@
"example": "นี่คือประโยค",
"has_examples": true
},
+ {
+ "code": "ti",
+ "name": "Tigrinya",
+ "has_examples": true
+ },
{
"code": "tl",
"name": "Tagalog"
diff --git a/website/meta/universe.json b/website/meta/universe.json
index a6a1a0fc7..d7c99956b 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,5 +1,46 @@
{
"resources": [
+ {
+ "id": "spacy-cleaner",
+ "title": "spacy-cleaner",
+ "slogan": "Easily clean text with spaCy!",
+ "description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
+ "github": "Ce11an/spacy-cleaner",
+ "pip": "spacy-cleaner",
+ "code_example": [
+ "import spacy",
+ "import spacy_cleaner",
+ "from spacy_cleaner.processing import removers, replacers, mutators",
+ "",
+ "model = spacy.load(\"en_core_web_sm\")",
+ "pipeline = spacy_cleaner.Pipeline(",
+ " model,",
+ " removers.remove_stopword_token,",
+ " replacers.replace_punctuation_token,",
+ " mutators.mutate_lemma_token,",
+ ")",
+ "",
+ "texts = [\"Hello, my name is Cellan! I love to swim!\"]",
+ "",
+ "pipeline.clean(texts)",
+ "# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
+ ],
+ "code_language": "python",
+ "url": "https://ce11an.github.io/spacy-cleaner/",
+ "image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
+ "author": "Cellan Hall",
+ "author_links": {
+ "twitter": "Ce11an",
+ "github": "Ce11an",
+ "website": "https://www.linkedin.com/in/cellan-hall/"
+ },
+ "category": [
+ "extension"
+ ],
+ "tags": [
+ "text-processing"
+ ]
+ },
{
"id": "Zshot",
"title": "Zshot",
@@ -2460,20 +2501,20 @@
"import spacy",
"from spacy_wordnet.wordnet_annotator import WordnetAnnotator ",
"",
- "# Load an spacy model (supported models are \"es\" and \"en\") ",
- "nlp = spacy.load('en')",
- "# Spacy 3.x",
- "nlp.add_pipe(\"spacy_wordnet\", after='tagger', config={'lang': nlp.lang})",
- "# Spacy 2.x",
+ "# Load a spaCy model (supported languages are \"es\" and \"en\") ",
+ "nlp = spacy.load('en_core_web_sm')",
+ "# spaCy 3.x",
+ "nlp.add_pipe(\"spacy_wordnet\", after='tagger')",
+ "# spaCy 2.x",
"# nlp.add_pipe(WordnetAnnotator(nlp.lang), after='tagger')",
"token = nlp('prices')[0]",
"",
- "# wordnet object link spacy token with nltk wordnet interface by giving acces to",
+ "# WordNet object links spaCy token with NLTK WordNet interface by giving access to",
"# synsets and lemmas ",
"token._.wordnet.synsets()",
"token._.wordnet.lemmas()",
"",
- "# And automatically tags with wordnet domains",
+ "# And automatically add info about WordNet domains",
"token._.wordnet.wordnet_domains()"
],
"author": "recognai",
diff --git a/website/src/styles/quickstart.module.sass b/website/src/styles/quickstart.module.sass
index 8ad106a78..d0f9db551 100644
--- a/website/src/styles/quickstart.module.sass
+++ b/website/src/styles/quickstart.module.sass
@@ -149,6 +149,9 @@
& > span
display: block
+ a
+ text-decoration: underline
+
.small
font-size: var(--font-size-code)
line-height: 1.65
diff --git a/website/src/widgets/quickstart-install.js b/website/src/widgets/quickstart-install.js
index 0d2186acb..28dd14ecc 100644
--- a/website/src/widgets/quickstart-install.js
+++ b/website/src/widgets/quickstart-install.js
@@ -159,6 +159,9 @@ const QuickstartInstall = ({ id, title }) => {
setters={setters}
showDropdown={showDropdown}
>
+
+ # Note M1 GPU support is experimental, see Thinc issue #792
+
python -m venv .env
@@ -198,7 +201,13 @@ const QuickstartInstall = ({ id, title }) => {
{nightly ? ' --pre' : ''}
conda install -c conda-forge spacy
-
+
+ conda install -c conda-forge cupy
+
+
+ conda install -c conda-forge cupy
+
+
conda install -c conda-forge cupy