diff --git a/.github/workflows/spacy_universe_alert.yml b/.github/workflows/spacy_universe_alert.yml
index f507e0594..837aaeb33 100644
--- a/.github/workflows/spacy_universe_alert.yml
+++ b/.github/workflows/spacy_universe_alert.yml
@@ -19,6 +19,8 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
+ with:
+ python-version: '3.10'
- name: Install Bernadette app dependency and send an alert
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py
index ebab7471e..a109c4a5a 100644
--- a/spacy/cli/project/run.py
+++ b/spacy/cli/project/run.py
@@ -53,6 +53,7 @@ def project_run(
force: bool = False,
dry: bool = False,
capture: bool = False,
+ skip_requirements_check: bool = False,
) -> None:
"""Run a named script defined in the project.yml. If the script is part
of the default pipeline (defined in the "run" section), DVC is used to
@@ -69,6 +70,7 @@ def project_run(
sys.exit will be called with the return code. You should use capture=False
when you want to turn over execution to the command, and capture=True
when you want to run the command more like a function.
+ skip_requirements_check (bool): Whether to skip the requirements check.
"""
config = load_project_config(project_dir, overrides=overrides)
commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
@@ -76,9 +78,10 @@ def project_run(
validate_subcommand(list(commands.keys()), list(workflows.keys()), subcommand)
req_path = project_dir / "requirements.txt"
- if config.get("check_requirements", True) and os.path.exists(req_path):
- with req_path.open() as requirements_file:
- _check_requirements([req.replace("\n", "") for req in requirements_file])
+ if not skip_requirements_check:
+ if config.get("check_requirements", True) and os.path.exists(req_path):
+ with req_path.open() as requirements_file:
+ _check_requirements([req.strip() for req in requirements_file])
if subcommand in workflows:
msg.info(f"Running workflow '{subcommand}'")
@@ -90,6 +93,7 @@ def project_run(
force=force,
dry=dry,
capture=capture,
+ skip_requirements_check=True,
)
else:
cmd = commands[subcommand]
@@ -338,6 +342,12 @@ def _check_requirements(requirements: List[str]) -> Tuple[bool, bool]:
failed_pkgs_msgs.append(dnf.report())
except pkg_resources.VersionConflict as vc:
conflicting_pkgs_msgs.append(vc.report())
+ except Exception:
+ msg.warn(
+ f"Unable to check requirement: {req} "
+ "Checks are currently limited to requirement specifiers "
+ "(PEP 508)"
+ )
if len(failed_pkgs_msgs) or len(conflicting_pkgs_msgs):
msg.warn(
diff --git a/spacy/errors.py b/spacy/errors.py
index 2f8a3996f..278e5496a 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -212,8 +212,8 @@ class Warnings(metaclass=ErrorsWithCodes):
W121 = ("Attempting to trace non-existent method '{method}' in pipe '{pipe}'")
W122 = ("Couldn't trace method '{method}' in pipe '{pipe}'. This can happen if the pipe class "
"is a Cython extension type.")
- W123 = ("Argument {arg} with value {arg_value} is used instead of {config_value} as specified in the config. Be "
- "aware that this might affect other components in your pipeline.")
+ W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option "
+ "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.")
class Errors(metaclass=ErrorsWithCodes):
diff --git a/spacy/language.py b/spacy/language.py
index d391f15ab..967af1e62 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1879,31 +1879,22 @@ class Language:
if isinstance(exclude, str):
exclude = [exclude]
- def fetch_pipes_status(value: Iterable[str], key: str) -> Iterable[str]:
- """Fetch value for `enable` or `disable` w.r.t. the specified config and passed arguments passed to
- .load(). If both arguments and config specified values for this field, the passed arguments take precedence
- and a warning is printed.
- value (Iterable[str]): Passed value for `enable` or `disable`.
- key (str): Key for field in config (either "enabled" or "disabled").
- RETURN (Iterable[str]):
- """
- # We assume that no argument was passed if the value is the specified default value.
- if id(value) == id(_DEFAULT_EMPTY_PIPES):
- return config["nlp"].get(key, [])
- else:
- if len(config["nlp"].get(key, [])):
- warnings.warn(
- Warnings.W123.format(
- arg=key[:-1],
- arg_value=value,
- config_value=config["nlp"][key],
- )
+ # `enable` should not be merged with `enabled` (the opposite is true for `disable`/`disabled`). If the config
+ # specifies values for `enabled` not included in `enable`, emit warning.
+ if id(enable) != id(_DEFAULT_EMPTY_PIPES):
+ enabled = config["nlp"].get("enabled", [])
+ if len(enabled) and not set(enabled).issubset(enable):
+ warnings.warn(
+ Warnings.W123.format(
+ enable=enable,
+ enabled=enabled,
)
- return value
+ )
+ # Ensure sets of disabled/enabled pipe names are not contradictory.
disabled_pipes = cls._resolve_component_status(
- fetch_pipes_status(disable, "disabled"),
- fetch_pipes_status(enable, "enabled"),
+ list({*disable, *config["nlp"].get("disabled", [])}),
+ enable,
config["nlp"]["pipeline"],
)
nlp._disabled = set(p for p in disabled_pipes if p not in exclude)
@@ -2084,10 +2075,12 @@ class Language:
if enable:
if isinstance(enable, str):
enable = [enable]
- to_disable = [
- pipe_name for pipe_name in pipe_names if pipe_name not in enable
- ]
- if disable and disable != to_disable:
+ to_disable = {
+ *[pipe_name for pipe_name in pipe_names if pipe_name not in enable],
+ *disable,
+ }
+ # If any pipe to be enabled is in to_disable, the specification is inconsistent.
+ if len(set(enable) & to_disable):
raise ValueError(Errors.E1042.format(enable=enable, disable=disable))
return tuple(to_disable)
diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py
index 14a7a36e5..4dd7bae16 100644
--- a/spacy/tests/pipeline/test_pipe_methods.py
+++ b/spacy/tests/pipeline/test_pipe_methods.py
@@ -615,20 +615,18 @@ def test_enable_disable_conflict_with_config():
with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir)
- # Expected to fail, as config and arguments conflict.
- with pytest.raises(ValueError):
- spacy.load(
- tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}}
- )
+ # Expected to succeed, as config and arguments do not conflict.
+ assert spacy.load(
+ tmp_dir, enable=["tagger"], config={"nlp": {"disabled": ["senter"]}}
+ ).disabled == ["senter", "sentencizer"]
# Expected to succeed without warning due to the lack of a conflicting config option.
spacy.load(tmp_dir, enable=["tagger"])
- # Expected to succeed with a warning, as disable=[] should override the config setting.
- with pytest.warns(UserWarning):
+ # Expected to fail due to conflict between enable and disabled.
+ with pytest.raises(ValueError):
spacy.load(
tmp_dir,
- enable=["tagger"],
- disable=[],
- config={"nlp": {"disabled": ["senter"]}},
+ enable=["senter"],
+ config={"nlp": {"disabled": ["senter", "tagger"]}},
)
diff --git a/spacy/tests/serialize/test_serialize_pipeline.py b/spacy/tests/serialize/test_serialize_pipeline.py
index b948bb76c..9fcf18e2d 100644
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@@ -404,11 +404,10 @@ def test_serialize_pipeline_disable_enable():
assert nlp3.component_names == ["ner", "tagger"]
with make_tempdir() as d:
nlp3.to_disk(d)
- with pytest.warns(UserWarning):
- nlp4 = spacy.load(d, disable=["ner"])
- assert nlp4.pipe_names == ["tagger"]
+ nlp4 = spacy.load(d, disable=["ner"])
+ assert nlp4.pipe_names == []
assert nlp4.component_names == ["ner", "tagger"]
- assert nlp4.disabled == ["ner"]
+ assert nlp4.disabled == ["ner", "tagger"]
with make_tempdir() as d:
nlp.to_disk(d)
nlp5 = spacy.load(d, exclude=["tagger"])
diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py
index 53709d660..5d24a0047 100644
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@@ -1,5 +1,6 @@
import os
import math
+import pkg_resources
from random import sample
from typing import Counter
@@ -26,6 +27,7 @@ from spacy.cli.init_config import RECOMMENDATIONS, init_config, fill_config
from spacy.cli.package import get_third_party_dependencies
from spacy.cli.package import _is_permitted_package_name
from spacy.cli.project.remote_storage import RemoteStorage
+from spacy.cli.project.run import _check_requirements
from spacy.cli.validate import get_model_pkgs
from spacy.lang.en import English
from spacy.lang.nl import Dutch
@@ -897,3 +899,42 @@ def test_local_remote_storage():
remote.pull(filename)
with loc_file.open(mode="r") as file_:
assert file_.read() == content
+
+
+@pytest.mark.parametrize(
+ "reqs,output",
+ [
+ [
+ """
+ spacy
+
+ # comment
+
+ thinc""",
+ (False, False),
+ ],
+ [
+ """# comment
+ --some-flag
+ spacy""",
+ (False, False),
+ ],
+ [
+ """# comment
+ --some-flag
+ spacy; python_version >= '3.6'""",
+ (False, False),
+ ],
+ [
+ """# comment
+ spacyunknowndoesnotexist12345""",
+ (True, False),
+ ],
+ ],
+)
+def test_project_check_requirements(reqs, output):
+ # excessive guard against unlikely package name
+ try:
+ pkg_resources.require("spacyunknowndoesnotexist12345")
+ except pkg_resources.DistributionNotFound:
+ assert output == _check_requirements([req.strip() for req in reqs.split("\n")])
diff --git a/website/docs/api/language.md b/website/docs/api/language.md
index 767a7450a..504640d57 100644
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@@ -63,18 +63,18 @@ spaCy loads a model under the hood based on its
> nlp = Language.from_config(config)
> ```
-| Name | Description |
-| ------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
-| _keyword-only_ | |
-| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
-| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
-| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [`nlp.enable_pipe`](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
-| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
-| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
-| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
-| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
-| **RETURNS** | The initialized object. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `config` | The loaded config. ~~Union[Dict[str, Any], Config]~~ |
+| _keyword-only_ | |
+| `vocab` | A `Vocab` object. If `True`, a vocab is created using the default language data settings. ~~Vocab~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled, but can be enabled again using [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
+| `exclude` | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `meta` | [Meta data](/api/data-formats#meta) overrides. ~~Dict[str, Any]~~ |
+| `auto_fill` | Whether to automatically fill in missing values in the config, based on defaults and function argument annotations. Defaults to `True`. ~~bool~~ |
+| `validate` | Whether to validate the component config and arguments against the types expected by the factory. Defaults to `True`. ~~bool~~ |
+| **RETURNS** | The initialized object. ~~Language~~ |
## Language.component {#component tag="classmethod" new="3"}
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index bc53fc868..c798f2a8d 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -45,16 +45,16 @@ specified separately using the new `exclude` keyword argument.
> nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger"])
> ```
-| Name | Description |
-| ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
-| _keyword-only_ | |
-| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
-| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). ~~Union[str, Iterable[str]]~~ |
-| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
-| `exclude` 3 | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
-| `config` 3 | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
-| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
+| Name | Description |
+| ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name` | Pipeline to load, i.e. package name or path. ~~Union[str, Path]~~ |
+| _keyword-only_ | |
+| `vocab` | Optional shared vocab to pass in on initialization. If `True` (default), a new `Vocab` object will be created. ~~Union[Vocab, bool]~~ |
+| `disable` | Name(s) of pipeline component(s) to [disable](/usage/processing-pipelines#disabling). Disabled pipes will be loaded but they won't be run unless you explicitly enable them by calling [nlp.enable_pipe](/api/language#enable_pipe). Is merged with the config entry `nlp.disabled`. ~~Union[str, Iterable[str]]~~ |
+| `enable` 3.4 | Name(s) of pipeline component(s) to [enable](/usage/processing-pipelines#disabling). All other pipes will be disabled. ~~Union[str, Iterable[str]]~~ |
+| `exclude` 3 | Name(s) of pipeline component(s) to [exclude](/usage/processing-pipelines#disabling). Excluded components won't be loaded. ~~Union[str, Iterable[str]]~~ |
+| `config` 3 | Optional config overrides, either as nested dict or dict keyed by section value in dot notation, e.g. `"components.name.value"`. ~~Union[Dict[str, Any], Config]~~ |
+| **RETURNS** | A `Language` object with the loaded pipeline. ~~Language~~ |
Essentially, `spacy.load()` is a convenience wrapper that reads the pipeline's
[`config.cfg`](/api/data-formats#config), uses the language and pipeline
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index bd28810ae..0b63cdcb8 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -363,7 +363,8 @@ nlp.enable_pipe("tagger")
```
In addition to `disable`, `spacy.load()` also accepts `enable`. If `enable` is
-set, all components except for those in `enable` are disabled.
+set, all components except for those in `enable` are disabled. If `enable` and
+`disable` conflict (i.e. the same component is included in both), an error is raised.
```python
# Load the complete pipeline, but disable all components except for tok2vec and tagger
diff --git a/website/meta/universe.json b/website/meta/universe.json
index d7c99956b..fa765f640 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -1,5 +1,31 @@
{
"resources": [
+ {
+ "id": "grecy",
+ "title": "greCy",
+ "slogan": "Ancient Greek pipelines for spaCy",
+ "description": "greCy offers state-of-the-art pipelines for ancient Greek NLP. The repository makes language models available in various sizes, some of them containing floret word vectors and a BERT transformer layer.",
+ "github": "jmyerston/greCy",
+ "code_example": [
+ "import spacy",
+ "#After installing the grc_ud_proiel_trf wheel package from the greCy repository",
+ "",
+ "nlp = spacy.load('grc_ud_proiel_trf')",
+ "doc = nlp('δοκῶ μοι περὶ ὧν πυνθάνεσθε οὐκ ἀμελέτητος εἶναι.')",
+ "",
+ "for token in doc:",
+ " print(token.text, token.norm_, token.lemma_, token.pos_, token.tag_)"
+ ],
+ "code_language": "python",
+ "author": "Jacobo Myerston",
+ "author_links": {
+ "twitter": "@jcbmyrstn",
+ "github": "jmyerston",
+ "website": "https://huggingface.co/spaces/Jacobo/syntax"
+ },
+ "category": ["pipeline", "research"],
+ "tags": ["ancient Greek"]
+ },
{
"id": "spacy-cleaner",
"title": "spacy-cleaner",