mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Add dependency check to project step runs (#11226)
* Add dependency check to project step running. * Fix dependency mismatch warning. * Remove newline. * Add types-setuptools to setup.cfg. * Move types-setuptools to test requirements. Move warnings into _validate_requirements(). Handle file reading in project_run(). * Remove newline formatting for output of package conflicts. * Show full version conflict message instead of just package name. * Update spacy/cli/project/run.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Fix typo. * Re-add rephrasing of message for conflicting packages. Remove requirements path redundancy. * Update spacy/cli/project/run.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update spacy/cli/project/run.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Print unified message for requirement conflicts and missing requirements. * Update spacy/cli/project/run.py Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Fix warning message. * Print conflict/missing messages individually. * Print conflict/missing messages individually. * Add check_requirements setting in project.yml to disable requirements check. * Update website/docs/usage/projects.md Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update website/docs/usage/projects.md Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> * Update description of project.yml structure in projects.md. * Update website/docs/usage/projects.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Prettify projects docs. Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
279358be63
commit
af9b01ef97
|
@ -33,6 +33,7 @@ hypothesis>=3.27.0,<7.0.0
|
|||
mypy>=0.910,<0.970; platform_machine!='aarch64'
|
||||
types-dataclasses>=0.1.3; python_version < "3.7"
|
||||
types-mock>=0.1.1
|
||||
types-setuptools>=57.0.0
|
||||
types-requests
|
||||
types-setuptools>=57.0.0
|
||||
black>=22.0,<23.0
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
from typing import Optional, List, Dict, Sequence, Any, Iterable
|
||||
from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple
|
||||
import os.path
|
||||
from pathlib import Path
|
||||
|
||||
import pkg_resources
|
||||
from wasabi import msg
|
||||
from wasabi.util import locale_escape
|
||||
import sys
|
||||
|
@ -71,6 +74,12 @@ def project_run(
|
|||
commands = {cmd["name"]: cmd for cmd in config.get("commands", [])}
|
||||
workflows = config.get("workflows", {})
|
||||
validate_subcommand(list(commands.keys()), list(workflows.keys()), subcommand)
|
||||
|
||||
req_path = project_dir / "requirements.txt"
|
||||
if config.get("check_requirements", True) and os.path.exists(req_path):
|
||||
with req_path.open() as requirements_file:
|
||||
_check_requirements([req.replace("\n", "") for req in requirements_file])
|
||||
|
||||
if subcommand in workflows:
|
||||
msg.info(f"Running workflow '{subcommand}'")
|
||||
for cmd in workflows[subcommand]:
|
||||
|
@ -310,3 +319,32 @@ def get_fileinfo(project_dir: Path, paths: List[str]) -> List[Dict[str, Optional
|
|||
md5 = get_checksum(file_path) if file_path.exists() else None
|
||||
data.append({"path": path, "md5": md5})
|
||||
return data
|
||||
|
||||
|
||||
def _check_requirements(requirements: List[str]) -> Tuple[bool, bool]:
|
||||
"""Checks whether requirements are installed and free of version conflicts.
|
||||
requirements (List[str]): List of requirements.
|
||||
RETURNS (Tuple[bool, bool]): Whether (1) any packages couldn't be imported, (2) any packages with version conflicts
|
||||
exist.
|
||||
"""
|
||||
|
||||
failed_pkgs_msgs: List[str] = []
|
||||
conflicting_pkgs_msgs: List[str] = []
|
||||
|
||||
for req in requirements:
|
||||
try:
|
||||
pkg_resources.require(req)
|
||||
except pkg_resources.DistributionNotFound as dnf:
|
||||
failed_pkgs_msgs.append(dnf.report())
|
||||
except pkg_resources.VersionConflict as vc:
|
||||
conflicting_pkgs_msgs.append(vc.report())
|
||||
|
||||
if len(failed_pkgs_msgs) or len(conflicting_pkgs_msgs):
|
||||
msg.warn(
|
||||
title="Missing requirements or requirement conflicts detected. Make sure your Python environment is set up "
|
||||
"correctly and you installed all requirements specified in your project's requirements.txt: "
|
||||
)
|
||||
for pgk_msg in failed_pkgs_msgs + conflicting_pkgs_msgs:
|
||||
msg.text(pgk_msg)
|
||||
|
||||
return len(failed_pkgs_msgs) > 0, len(conflicting_pkgs_msgs) > 0
|
||||
|
|
|
@ -148,6 +148,13 @@ skipped. You can also set `--force` to force re-running a command, or `--dry` to
|
|||
perform a "dry run" and see what would happen (without actually running the
|
||||
script).
|
||||
|
||||
Since spaCy v3.4.2, `spacy projects run` checks your installed dependencies to
|
||||
verify that your environment is properly set up and aligns with the project's
|
||||
`requirements.txt`, if there is one. If missing or conflicting dependencies are
|
||||
detected, a corresponding warning is displayed. If you'd like to disable the
|
||||
dependency check, set `check_requirements: false` in your project's
|
||||
`project.yml`.
|
||||
|
||||
### 4. Run a workflow {#run-workfow}
|
||||
|
||||
> #### project.yml
|
||||
|
@ -226,26 +233,28 @@ pipelines.
|
|||
```yaml
|
||||
%%GITHUB_PROJECTS/pipelines/tagger_parser_ud/project.yml
|
||||
```
|
||||
|
||||
> #### Tip: Overriding variables on the CLI
|
||||
>
|
||||
> If you want to override one or more variables on the CLI and are not already specifying a
|
||||
> project directory, you need to add `.` as a placeholder:
|
||||
> If you want to override one or more variables on the CLI and are not already
|
||||
> specifying a project directory, you need to add `.` as a placeholder:
|
||||
>
|
||||
> ```
|
||||
> python -m spacy project run test . --vars.foo bar
|
||||
> ```
|
||||
|
||||
| Section | Description |
|
||||
| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `title` | An optional project title used in `--help` message and [auto-generated docs](#custom-docs). |
|
||||
| `description` | An optional project description used in [auto-generated docs](#custom-docs). |
|
||||
| `vars` | A dictionary of variables that can be referenced in paths, URLs and scripts and overriden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`. |
|
||||
| `env` | A dictionary of variables, mapped to the names of environment variables that will be read in when running the project. For example, `${env.name}` will use the value of the environment variable defined as `name`. |
|
||||
| `directories` | An optional list of [directories](#project-files) that should be created in the project for assets, training outputs, metrics etc. spaCy will make sure that these directories always exist. |
|
||||
| `assets` | A list of assets that can be fetched with the [`project assets`](/api/cli#project-assets) command. `url` defines a URL or local path, `dest` is the destination file relative to the project directory, and an optional `checksum` ensures that an error is raised if the file's checksum doesn't match. Instead of `url`, you can also provide a `git` block with the keys `repo`, `branch` and `path`, to download from a Git repo. |
|
||||
| `workflows` | A dictionary of workflow names, mapped to a list of command names, to execute in order. Workflows can be run with the [`project run`](/api/cli#project-run) command. |
|
||||
| `commands` | A list of named commands. A command can define an optional help message (shown in the CLI when the user adds `--help`) and the `script`, a list of commands to run. The `deps` and `outputs` let you define the created file the command depends on and produces, respectively. This lets spaCy determine whether a command needs to be re-run because its dependencies or outputs changed. Commands can be run as part of a workflow, or separately with the [`project run`](/api/cli#project-run) command. |
|
||||
| `spacy_version` | Optional spaCy version range like `>=3.0.0,<3.1.0` that the project is compatible with. If it's loaded with an incompatible version, an error is raised when the project is loaded. |
|
||||
| Section | Description |
|
||||
| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `title` | An optional project title used in `--help` message and [auto-generated docs](#custom-docs). |
|
||||
| `description` | An optional project description used in [auto-generated docs](#custom-docs). |
|
||||
| `vars` | A dictionary of variables that can be referenced in paths, URLs and scripts and overriden on the CLI, just like [`config.cfg` variables](/usage/training#config-interpolation). For example, `${vars.name}` will use the value of the variable `name`. Variables need to be defined in the section `vars`, but can be a nested dict, so you're able to reference `${vars.model.name}`. |
|
||||
| `env` | A dictionary of variables, mapped to the names of environment variables that will be read in when running the project. For example, `${env.name}` will use the value of the environment variable defined as `name`. |
|
||||
| `directories` | An optional list of [directories](#project-files) that should be created in the project for assets, training outputs, metrics etc. spaCy will make sure that these directories always exist. |
|
||||
| `assets` | A list of assets that can be fetched with the [`project assets`](/api/cli#project-assets) command. `url` defines a URL or local path, `dest` is the destination file relative to the project directory, and an optional `checksum` ensures that an error is raised if the file's checksum doesn't match. Instead of `url`, you can also provide a `git` block with the keys `repo`, `branch` and `path`, to download from a Git repo. |
|
||||
| `workflows` | A dictionary of workflow names, mapped to a list of command names, to execute in order. Workflows can be run with the [`project run`](/api/cli#project-run) command. |
|
||||
| `commands` | A list of named commands. A command can define an optional help message (shown in the CLI when the user adds `--help`) and the `script`, a list of commands to run. The `deps` and `outputs` let you define the created file the command depends on and produces, respectively. This lets spaCy determine whether a command needs to be re-run because its dependencies or outputs changed. Commands can be run as part of a workflow, or separately with the [`project run`](/api/cli#project-run) command. |
|
||||
| `spacy_version` | Optional spaCy version range like `>=3.0.0,<3.1.0` that the project is compatible with. If it's loaded with an incompatible version, an error is raised when the project is loaded. |
|
||||
| `check_requirements` <Tag variant="new">3.4.2</Tag> | A flag determining whether to verify that the installed dependencies align with the project's `requirements.txt`. Defaults to `true`. |
|
||||
|
||||
### Data assets {#data-assets}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user