diff --git a/spacy/cli/project/__init__.py b/spacy/cli/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py deleted file mode 100644 index 8f35b2d23..000000000 --- a/spacy/cli/project/assets.py +++ /dev/null @@ -1,206 +0,0 @@ -from typing import Any, Dict, Optional -from pathlib import Path -from wasabi import msg -import os -import re -import shutil -import requests -import typer - -from ...util import ensure_path, working_dir -from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config -from .._util import get_checksum, download_file, git_checkout, get_git_version -from .._util import SimpleFrozenDict, parse_config_overrides - -# Whether assets are extra if `extra` is not set. -EXTRA_DEFAULT = False - - -@project_cli.command( - "assets", - context_settings={"allow_extra_args": True, "ignore_unknown_options": True}, -) -def project_assets_cli( - # fmt: off - ctx: typer.Context, # This is only used to read additional arguments - project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), - sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+."), - extra: bool = Opt(False, "--extra", "-e", help="Download all assets, including those marked as 'extra'.") - # fmt: on -): - """Fetch project assets like datasets and pretrained weights. Assets are - defined in the "assets" section of the project.yml. If a checksum is - provided in the project.yml, the file is only downloaded if no local file - with the same checksum exists. - - DOCS: https://spacy.io/api/cli#project-assets - """ - overrides = parse_config_overrides(ctx.args) - project_assets( - project_dir, - overrides=overrides, - sparse_checkout=sparse_checkout, - extra=extra, - ) - - -def project_assets( - project_dir: Path, - *, - overrides: Dict[str, Any] = SimpleFrozenDict(), - sparse_checkout: bool = False, - extra: bool = False, -) -> None: - """Fetch assets for a project using DVC if possible. - - project_dir (Path): Path to project directory. - sparse_checkout (bool): Use sparse checkout for assets provided via Git, to only check out and clone the files - needed. - extra (bool): Whether to download all assets, including those marked as 'extra'. - """ - project_path = ensure_path(project_dir) - config = load_project_config(project_path, overrides=overrides) - assets = [ - asset - for asset in config.get("assets", []) - if extra or not asset.get("extra", EXTRA_DEFAULT) - ] - if not assets: - msg.warn( - f"No assets specified in {PROJECT_FILE} (if assets are marked as extra, download them with --extra)", - exits=0, - ) - msg.info(f"Fetching {len(assets)} asset(s)") - - for asset in assets: - dest = (project_dir / asset["dest"]).resolve() - checksum = asset.get("checksum") - if "git" in asset: - git_err = ( - f"Cloning spaCy project templates requires Git and the 'git' command. " - f"Make sure it's installed and that the executable is available." - ) - get_git_version(error=git_err) - if dest.exists(): - # If there's already a file, check for checksum - if checksum and checksum == get_checksum(dest): - msg.good( - f"Skipping download with matching checksum: {asset['dest']}" - ) - continue - else: - if dest.is_dir(): - shutil.rmtree(dest) - else: - dest.unlink() - if "repo" not in asset["git"] or asset["git"]["repo"] is None: - msg.fail( - "A git asset must include 'repo', the repository address.", exits=1 - ) - if "path" not in asset["git"] or asset["git"]["path"] is None: - msg.fail( - "A git asset must include 'path' - use \"\" to get the entire repository.", - exits=1, - ) - git_checkout( - asset["git"]["repo"], - asset["git"]["path"], - dest, - branch=asset["git"].get("branch"), - sparse=sparse_checkout, - ) - msg.good(f"Downloaded asset {dest}") - else: - url = asset.get("url") - if not url: - # project.yml defines asset without URL that the user has to place - check_private_asset(dest, checksum) - continue - fetch_asset(project_path, url, dest, checksum) - - -def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None: - """Check and validate assets without a URL (private assets that the user - has to provide themselves) and give feedback about the checksum. - - dest (Path): Destination path of the asset. - checksum (Optional[str]): Optional checksum of the expected file. - """ - if not Path(dest).exists(): - err = f"No URL provided for asset. You need to add this file yourself: {dest}" - msg.warn(err) - else: - if not checksum: - msg.good(f"Asset already exists: {dest}") - elif checksum == get_checksum(dest): - msg.good(f"Asset exists with matching checksum: {dest}") - else: - msg.fail(f"Asset available but with incorrect checksum: {dest}") - - -def fetch_asset( - project_path: Path, url: str, dest: Path, checksum: Optional[str] = None -) -> None: - """Fetch an asset from a given URL or path. If a checksum is provided and a - local file exists, it's only re-downloaded if the checksum doesn't match. - - project_path (Path): Path to project directory. - url (str): URL or path to asset. - checksum (Optional[str]): Optional expected checksum of local file. - RETURNS (Optional[Path]): The path to the fetched asset or None if fetching - the asset failed. - """ - dest_path = (project_path / dest).resolve() - if dest_path.exists(): - # If there's already a file, check for checksum - if checksum: - if checksum == get_checksum(dest_path): - msg.good(f"Skipping download with matching checksum: {dest}") - return - else: - # If there's not a checksum, make sure the file is a possibly valid size - if os.path.getsize(dest_path) == 0: - msg.warn(f"Asset exists but with size of 0 bytes, deleting: {dest}") - os.remove(dest_path) - # We might as well support the user here and create parent directories in - # case the asset dir isn't listed as a dir to create in the project.yml - if not dest_path.parent.exists(): - dest_path.parent.mkdir(parents=True) - with working_dir(project_path): - url = convert_asset_url(url) - try: - download_file(url, dest_path) - msg.good(f"Downloaded asset {dest}") - except requests.exceptions.RequestException as e: - if Path(url).exists() and Path(url).is_file(): - # If it's a local file, copy to destination - shutil.copy(url, str(dest_path)) - msg.good(f"Copied local asset {dest}") - else: - msg.fail(f"Download failed: {dest}", e) - if checksum and checksum != get_checksum(dest_path): - msg.fail(f"Checksum doesn't match value defined in {PROJECT_FILE}: {dest}") - - -def convert_asset_url(url: str) -> str: - """Check and convert the asset URL if needed. - - url (str): The asset URL. - RETURNS (str): The converted URL. - """ - # If the asset URL is a regular GitHub URL it's likely a mistake - if ( - re.match(r"(http(s?)):\/\/github.com", url) - and "releases/download" not in url - and "/raw/" not in url - ): - converted = url.replace("github.com", "raw.githubusercontent.com") - converted = re.sub(r"/(tree|blob)/", "/", converted) - msg.warn( - "Downloading from a regular GitHub URL. This will only download " - "the source of the page, not the actual file. Converting the URL " - "to a raw URL.", - converted, - ) - return converted - return url diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py deleted file mode 100644 index 14b4ed9b5..000000000 --- a/spacy/cli/project/clone.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Optional -from pathlib import Path -from wasabi import msg -import subprocess -import re - -from ... import about -from ...util import ensure_path -from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE -from .._util import git_checkout, get_git_version, git_repo_branch_exists - -DEFAULT_REPO = about.__projects__ -DEFAULT_PROJECTS_BRANCH = about.__projects_branch__ -DEFAULT_BRANCHES = ["main", "master"] - - -@project_cli.command("clone") -def project_clone_cli( - # fmt: off - name: str = Arg(..., help="The name of the template to clone"), - dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False), - repo: str = Opt(DEFAULT_REPO, "--repo", "-r", help="The repository to clone from"), - branch: Optional[str] = Opt(None, "--branch", "-b", help=f"The branch to clone from. If not provided, will attempt {', '.join(DEFAULT_BRANCHES)}"), - sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+.") - # fmt: on -): - """Clone a project template from a repository. Calls into "git" and will - only download the files from the given subdirectory. The GitHub repo - defaults to the official spaCy template repo, but can be customized - (including using a private repo). - - DOCS: https://spacy.io/api/cli#project-clone - """ - if dest is None: - dest = Path.cwd() / Path(name).parts[-1] - if repo == DEFAULT_REPO and branch is None: - branch = DEFAULT_PROJECTS_BRANCH - - if branch is None: - for default_branch in DEFAULT_BRANCHES: - if git_repo_branch_exists(repo, default_branch): - branch = default_branch - break - if branch is None: - default_branches_msg = ", ".join(f"'{b}'" for b in DEFAULT_BRANCHES) - msg.fail( - "No branch provided and attempted default " - f"branches {default_branches_msg} do not exist.", - exits=1, - ) - else: - if not git_repo_branch_exists(repo, branch): - msg.fail(f"repo: {repo} (branch: {branch}) does not exist.", exits=1) - assert isinstance(branch, str) - project_clone(name, dest, repo=repo, branch=branch, sparse_checkout=sparse_checkout) - - -def project_clone( - name: str, - dest: Path, - *, - repo: str = about.__projects__, - branch: str = about.__projects_branch__, - sparse_checkout: bool = False, -) -> None: - """Clone a project template from a repository. - - name (str): Name of subdirectory to clone. - dest (Path): Destination path of cloned project. - repo (str): URL of Git repo containing project templates. - branch (str): The branch to clone from - """ - dest = ensure_path(dest) - check_clone(name, dest, repo) - project_dir = dest.resolve() - repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo) - try: - git_checkout(repo, name, dest, branch=branch, sparse=sparse_checkout) - except subprocess.CalledProcessError: - err = f"Could not clone '{name}' from repo '{repo_name}' (branch '{branch}')" - msg.fail(err, exits=1) - msg.good(f"Cloned '{name}' from '{repo_name}' (branch '{branch}')", project_dir) - if not (project_dir / PROJECT_FILE).exists(): - msg.warn(f"No {PROJECT_FILE} found in directory") - else: - msg.good(f"Your project is now ready!") - print(f"To fetch the assets, run:\n{COMMAND} project assets {dest}") - - -def check_clone(name: str, dest: Path, repo: str) -> None: - """Check and validate that the destination path can be used to clone. Will - check that Git is available and that the destination path is suitable. - - name (str): Name of the directory to clone from the repo. - dest (Path): Local destination of cloned directory. - repo (str): URL of the repo to clone from. - """ - git_err = ( - f"Cloning spaCy project templates requires Git and the 'git' command. " - f"To clone a project without Git, copy the files from the '{name}' " - f"directory in the {repo} to {dest} manually." - ) - get_git_version(error=git_err) - if not dest: - msg.fail(f"Not a valid directory to clone project: {dest}", exits=1) - if dest.exists(): - # Directory already exists (not allowed, clone needs to create it) - msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1) - if not dest.parent.exists(): - # We're not creating parents, parent dir should exist - msg.fail( - f"Can't clone project, parent directory doesn't exist: {dest.parent}. " - f"Create the necessary folder(s) first before continuing.", - exits=1, - ) diff --git a/spacy/cli/project/document.py b/spacy/cli/project/document.py deleted file mode 100644 index 1ba43a958..000000000 --- a/spacy/cli/project/document.py +++ /dev/null @@ -1,115 +0,0 @@ -from pathlib import Path -from wasabi import msg, MarkdownRenderer - -from ...util import working_dir -from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config - - -DOCS_URL = "https://spacy.io" -INTRO_PROJECT = f"""The [`{PROJECT_FILE}`]({PROJECT_FILE}) defines the data assets required by the -project, as well as the available commands and workflows. For details, see the -[spaCy projects documentation]({DOCS_URL}/usage/projects).""" -INTRO_COMMANDS = f"""The following commands are defined by the project. They -can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run). -Commands are only re-run if their inputs have changed.""" -INTRO_WORKFLOWS = f"""The following workflows are defined by the project. They -can be executed using [`spacy project run [name]`]({DOCS_URL}/api/cli#project-run) -and will run the specified commands in order. Commands are only re-run if their -inputs have changed.""" -INTRO_ASSETS = f"""The following assets are defined by the project. They can -be fetched by running [`spacy project assets`]({DOCS_URL}/api/cli#project-assets) -in the project directory.""" -# These markers are added to the Markdown and can be used to update the file in -# place if it already exists. Only the auto-generated part will be replaced. -MARKER_START = "" -MARKER_END = "" -# If this marker is used in an existing README, it's ignored and not replaced -MARKER_IGNORE = "" - - -@project_cli.command("document") -def project_document_cli( - # fmt: off - project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), - output_file: Path = Opt("-", "--output", "-o", help="Path to output Markdown file for output. Defaults to - for standard output"), - no_emoji: bool = Opt(False, "--no-emoji", "-NE", help="Don't use emoji") - # fmt: on -): - """ - Auto-generate a README.md for a project. If the content is saved to a file, - hidden markers are added so you can add custom content before or after the - auto-generated section and only the auto-generated docs will be replaced - when you re-run the command. - - DOCS: https://spacy.io/api/cli#project-document - """ - project_document(project_dir, output_file, no_emoji=no_emoji) - - -def project_document( - project_dir: Path, output_file: Path, *, no_emoji: bool = False -) -> None: - is_stdout = str(output_file) == "-" - config = load_project_config(project_dir) - md = MarkdownRenderer(no_emoji=no_emoji) - md.add(MARKER_START) - title = config.get("title") - description = config.get("description") - md.add(md.title(1, f"spaCy Project{f': {title}' if title else ''}", "🪐")) - if description: - md.add(description) - md.add(md.title(2, PROJECT_FILE, "📋")) - md.add(INTRO_PROJECT) - # Commands - cmds = config.get("commands", []) - data = [(md.code(cmd["name"]), cmd.get("help", "")) for cmd in cmds] - if data: - md.add(md.title(3, "Commands", "⏯")) - md.add(INTRO_COMMANDS) - md.add(md.table(data, ["Command", "Description"])) - # Workflows - wfs = config.get("workflows", {}).items() - data = [(md.code(n), " → ".join(md.code(w) for w in stp)) for n, stp in wfs] - if data: - md.add(md.title(3, "Workflows", "⏭")) - md.add(INTRO_WORKFLOWS) - md.add(md.table(data, ["Workflow", "Steps"])) - # Assets - assets = config.get("assets", []) - data = [] - for a in assets: - source = "Git" if a.get("git") else "URL" if a.get("url") else "Local" - dest_path = a["dest"] - dest = md.code(dest_path) - if source == "Local": - # Only link assets if they're in the repo - with working_dir(project_dir) as p: - if (p / dest_path).exists(): - dest = md.link(dest, dest_path) - data.append((dest, source, a.get("description", ""))) - if data: - md.add(md.title(3, "Assets", "🗂")) - md.add(INTRO_ASSETS) - md.add(md.table(data, ["File", "Source", "Description"])) - md.add(MARKER_END) - # Output result - if is_stdout: - print(md.text) - else: - content = md.text - if output_file.exists(): - with output_file.open("r", encoding="utf8") as f: - existing = f.read() - if MARKER_IGNORE in existing: - msg.warn("Found ignore marker in existing file: skipping", output_file) - return - if MARKER_START in existing and MARKER_END in existing: - msg.info("Found existing file: only replacing auto-generated docs") - before = existing.split(MARKER_START)[0] - after = existing.split(MARKER_END)[1] - content = f"{before}{content}{after}" - else: - msg.warn("Replacing existing file") - with output_file.open("w", encoding="utf8") as f: - f.write(content) - msg.good("Saved project documentation", output_file) diff --git a/spacy/cli/project/dvc.py b/spacy/cli/project/dvc.py deleted file mode 100644 index a15353855..000000000 --- a/spacy/cli/project/dvc.py +++ /dev/null @@ -1,207 +0,0 @@ -"""This module contains helpers and subcommands for integrating spaCy projects -with Data Version Controk (DVC). https://dvc.org""" -from typing import Dict, Any, List, Optional, Iterable -import subprocess -from pathlib import Path -from wasabi import msg - -from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli -from .._util import Arg, Opt, NAME, COMMAND -from ...util import working_dir, split_command, join_command, run_command -from ...util import SimpleFrozenList - - -DVC_CONFIG = "dvc.yaml" -DVC_DIR = ".dvc" -UPDATE_COMMAND = "dvc" -DVC_CONFIG_COMMENT = f"""# This file is auto-generated by spaCy based on your {PROJECT_FILE}. If you've -# edited your {PROJECT_FILE}, you can regenerate this file by running: -# {COMMAND} project {UPDATE_COMMAND}""" - - -@project_cli.command(UPDATE_COMMAND) -def project_update_dvc_cli( - # fmt: off - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), - workflow: Optional[str] = Arg(None, help=f"Name of workflow defined in {PROJECT_FILE}. Defaults to first workflow if not set."), - verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"), - quiet: bool = Opt(False, "--quiet", "-q", help="Print less info"), - force: bool = Opt(False, "--force", "-F", help="Force update DVC config"), - # fmt: on -): - """Auto-generate Data Version Control (DVC) config. A DVC - project can only define one pipeline, so you need to specify one workflow - defined in the project.yml. If no workflow is specified, the first defined - workflow is used. The DVC config will only be updated if the project.yml - changed. - - DOCS: https://spacy.io/api/cli#project-dvc - """ - project_update_dvc(project_dir, workflow, verbose=verbose, quiet=quiet, force=force) - - -def project_update_dvc( - project_dir: Path, - workflow: Optional[str] = None, - *, - verbose: bool = False, - quiet: bool = False, - force: bool = False, -) -> None: - """Update the auto-generated Data Version Control (DVC) config file. A DVC - project can only define one pipeline, so you need to specify one workflow - defined in the project.yml. Will only update the file if the checksum changed. - - project_dir (Path): The project directory. - workflow (Optional[str]): Optional name of workflow defined in project.yml. - If not set, the first workflow will be used. - verbose (bool): Print more info. - quiet (bool): Print less info. - force (bool): Force update DVC config. - """ - config = load_project_config(project_dir) - updated = update_dvc_config( - project_dir, config, workflow, verbose=verbose, quiet=quiet, force=force - ) - help_msg = "To execute the workflow with DVC, run: dvc repro" - if updated: - msg.good(f"Updated DVC config from {PROJECT_FILE}", help_msg) - else: - msg.info(f"No changes found in {PROJECT_FILE}, no update needed", help_msg) - - -def update_dvc_config( - path: Path, - config: Dict[str, Any], - workflow: Optional[str] = None, - verbose: bool = False, - quiet: bool = False, - force: bool = False, -) -> bool: - """Re-run the DVC commands in dry mode and update dvc.yaml file in the - project directory. The file is auto-generated based on the config. The - first line of the auto-generated file specifies the hash of the config - dict, so if any of the config values change, the DVC config is regenerated. - - path (Path): The path to the project directory. - config (Dict[str, Any]): The loaded project.yml. - verbose (bool): Whether to print additional info (via DVC). - quiet (bool): Don't output anything (via DVC). - force (bool): Force update, even if hashes match. - RETURNS (bool): Whether the DVC config file was updated. - """ - ensure_dvc(path) - workflows = config.get("workflows", {}) - workflow_names = list(workflows.keys()) - check_workflows(workflow_names, workflow) - if not workflow: - workflow = workflow_names[0] - config_hash = get_hash(config) - path = path.resolve() - dvc_config_path = path / DVC_CONFIG - if dvc_config_path.exists(): - # Check if the file was generated using the current config, if not, redo - with dvc_config_path.open("r", encoding="utf8") as f: - ref_hash = f.readline().strip().replace("# ", "") - if ref_hash == config_hash and not force: - return False # Nothing has changed in project.yml, don't need to update - dvc_config_path.unlink() - dvc_commands = [] - config_commands = {cmd["name"]: cmd for cmd in config.get("commands", [])} - - # some flags that apply to every command - flags = [] - if verbose: - flags.append("--verbose") - if quiet: - flags.append("--quiet") - - for name in workflows[workflow]: - command = config_commands[name] - deps = command.get("deps", []) - outputs = command.get("outputs", []) - outputs_no_cache = command.get("outputs_no_cache", []) - if not deps and not outputs and not outputs_no_cache: - continue - # Default to the working dir as the project path since dvc.yaml is auto-generated - # and we don't want arbitrary paths in there - project_cmd = ["python", "-m", NAME, "project", "run", name] - deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl] - outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl] - outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl] - - dvc_cmd = ["run", *flags, "-n", name, "-w", str(path), "--no-exec"] - if command.get("no_skip"): - dvc_cmd.append("--always-changed") - full_cmd = [*dvc_cmd, *deps_cmd, *outputs_cmd, *outputs_nc_cmd, *project_cmd] - dvc_commands.append(join_command(full_cmd)) - - if not dvc_commands: - # If we don't check for this, then there will be an error when reading the - # config, since DVC wouldn't create it. - msg.fail( - "No usable commands for DVC found. This can happen if none of your " - "commands have dependencies or outputs.", - exits=1, - ) - - with working_dir(path): - for c in dvc_commands: - dvc_command = "dvc " + c - run_command(dvc_command) - with dvc_config_path.open("r+", encoding="utf8") as f: - content = f.read() - f.seek(0, 0) - f.write(f"# {config_hash}\n{DVC_CONFIG_COMMENT}\n{content}") - return True - - -def check_workflows(workflows: List[str], workflow: Optional[str] = None) -> None: - """Validate workflows provided in project.yml and check that a given - workflow can be used to generate a DVC config. - - workflows (List[str]): Names of the available workflows. - workflow (Optional[str]): The name of the workflow to convert. - """ - if not workflows: - msg.fail( - f"No workflows defined in {PROJECT_FILE}. To generate a DVC config, " - f"define at least one list of commands.", - exits=1, - ) - if workflow is not None and workflow not in workflows: - msg.fail( - f"Workflow '{workflow}' not defined in {PROJECT_FILE}. " - f"Available workflows: {', '.join(workflows)}", - exits=1, - ) - if not workflow: - msg.warn( - f"No workflow specified for DVC pipeline. Using the first workflow " - f"defined in {PROJECT_FILE}: '{workflows[0]}'" - ) - - -def ensure_dvc(project_dir: Path) -> None: - """Ensure that the "dvc" command is available and that the current project - directory is an initialized DVC project. - """ - try: - subprocess.run(["dvc", "--version"], stdout=subprocess.DEVNULL) - except Exception: - msg.fail( - "To use spaCy projects with DVC (Data Version Control), DVC needs " - "to be installed and the 'dvc' command needs to be available", - "You can install the Python package from pip (pip install dvc) or " - "conda (conda install -c conda-forge dvc). For more details, see the " - "documentation: https://dvc.org/doc/install", - exits=1, - ) - if not (project_dir / ".dvc").exists(): - msg.fail( - "Project not initialized as a DVC project", - "To initialize a DVC project, you can run 'dvc init' in the project " - "directory. For more details, see the documentation: " - "https://dvc.org/doc/command-reference/init", - exits=1, - ) diff --git a/spacy/cli/project/pull.py b/spacy/cli/project/pull.py deleted file mode 100644 index 8894baa50..000000000 --- a/spacy/cli/project/pull.py +++ /dev/null @@ -1,67 +0,0 @@ -from pathlib import Path -from wasabi import msg -from .remote_storage import RemoteStorage -from .remote_storage import get_command_hash -from .._util import project_cli, Arg, logger -from .._util import load_project_config -from .run import update_lockfile - - -@project_cli.command("pull") -def project_pull_cli( - # fmt: off - remote: str = Arg("default", help="Name or path of remote storage"), - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), - # fmt: on -): - """Retrieve available precomputed outputs from a remote storage. - You can alias remotes in your project.yml by mapping them to storage paths. - A storage can be anything that the smart-open library can upload to, e.g. - AWS, Google Cloud Storage, SSH, local directories etc. - - DOCS: https://spacy.io/api/cli#project-pull - """ - for url, output_path in project_pull(project_dir, remote): - if url is not None: - msg.good(f"Pulled {output_path} from {url}") - - -def project_pull(project_dir: Path, remote: str, *, verbose: bool = False): - # TODO: We don't have tests for this :(. It would take a bit of mockery to - # set up. I guess see if it breaks first? - config = load_project_config(project_dir) - if remote in config.get("remotes", {}): - remote = config["remotes"][remote] - storage = RemoteStorage(project_dir, remote) - commands = list(config.get("commands", [])) - # We use a while loop here because we don't know how the commands - # will be ordered. A command might need dependencies from one that's later - # in the list. - while commands: - for i, cmd in enumerate(list(commands)): - logger.debug("CMD: %s.", cmd["name"]) - deps = [project_dir / dep for dep in cmd.get("deps", [])] - if all(dep.exists() for dep in deps): - cmd_hash = get_command_hash("", "", deps, cmd["script"]) - for output_path in cmd.get("outputs", []): - url = storage.pull(output_path, command_hash=cmd_hash) - logger.debug( - "URL: %s for %s with command hash %s", - url, - output_path, - cmd_hash, - ) - yield url, output_path - - out_locs = [project_dir / out for out in cmd.get("outputs", [])] - if all(loc.exists() for loc in out_locs): - update_lockfile(project_dir, cmd) - # We remove the command from the list here, and break, so that - # we iterate over the loop again. - commands.pop(i) - break - else: - logger.debug("Dependency missing. Skipping %s outputs.", cmd["name"]) - else: - # If we didn't break the for loop, break the while loop. - break diff --git a/spacy/cli/project/push.py b/spacy/cli/project/push.py deleted file mode 100644 index a8178de21..000000000 --- a/spacy/cli/project/push.py +++ /dev/null @@ -1,69 +0,0 @@ -from pathlib import Path -from wasabi import msg -from .remote_storage import RemoteStorage -from .remote_storage import get_content_hash, get_command_hash -from .._util import load_project_config -from .._util import project_cli, Arg, logger - - -@project_cli.command("push") -def project_push_cli( - # fmt: off - remote: str = Arg("default", help="Name or path of remote storage"), - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), - # fmt: on -): - """Persist outputs to a remote storage. You can alias remotes in your - project.yml by mapping them to storage paths. A storage can be anything that - the smart-open library can upload to, e.g. AWS, Google Cloud Storage, SSH, - local directories etc. - - DOCS: https://spacy.io/api/cli#project-push - """ - for output_path, url in project_push(project_dir, remote): - if url is None: - msg.info(f"Skipping {output_path}") - else: - msg.good(f"Pushed {output_path} to {url}") - - -def project_push(project_dir: Path, remote: str): - """Persist outputs to a remote storage. You can alias remotes in your project.yml - by mapping them to storage paths. A storage can be anything that the smart-open - library can upload to, e.g. gcs, aws, ssh, local directories etc - """ - config = load_project_config(project_dir) - if remote in config.get("remotes", {}): - remote = config["remotes"][remote] - storage = RemoteStorage(project_dir, remote) - for cmd in config.get("commands", []): - logger.debug("CMD: %s", cmd["name"]) - deps = [project_dir / dep for dep in cmd.get("deps", [])] - if any(not dep.exists() for dep in deps): - logger.debug("Dependency missing. Skipping %s outputs", cmd["name"]) - continue - cmd_hash = get_command_hash( - "", "", [project_dir / dep for dep in cmd.get("deps", [])], cmd["script"] - ) - logger.debug("CMD_HASH: %s", cmd_hash) - for output_path in cmd.get("outputs", []): - output_loc = project_dir / output_path - if output_loc.exists() and _is_not_empty_dir(output_loc): - url = storage.push( - output_path, - command_hash=cmd_hash, - content_hash=get_content_hash(output_loc), - ) - logger.debug( - "URL: %s for output %s with cmd_hash %s", url, output_path, cmd_hash - ) - yield output_path, url - - -def _is_not_empty_dir(loc: Path): - if not loc.is_dir(): - return True - elif any(_is_not_empty_dir(child) for child in loc.iterdir()): - return True - else: - return False diff --git a/spacy/cli/project/remote_storage.py b/spacy/cli/project/remote_storage.py deleted file mode 100644 index 076541580..000000000 --- a/spacy/cli/project/remote_storage.py +++ /dev/null @@ -1,205 +0,0 @@ -from typing import Optional, List, Dict, TYPE_CHECKING -import os -import site -import hashlib -import urllib.parse -import tarfile -from pathlib import Path -from wasabi import msg - -from .._util import get_hash, get_checksum, upload_file, download_file -from .._util import ensure_pathy, make_tempdir -from ...util import get_minor_version, ENV_VARS, check_bool_env_var -from ...git_info import GIT_VERSION -from ... import about -from ...errors import Errors - -if TYPE_CHECKING: - from pathy import FluidPath # noqa: F401 - - -class RemoteStorage: - """Push and pull outputs to and from a remote file storage. - - Remotes can be anything that `smart-open` can support: AWS, GCS, file system, - ssh, etc. - """ - - def __init__(self, project_root: Path, url: str, *, compression="gz"): - self.root = project_root - self.url = ensure_pathy(url) - self.compression = compression - - def push(self, path: Path, command_hash: str, content_hash: str) -> "FluidPath": - """Compress a file or directory within a project and upload it to a remote - storage. If an object exists at the full URL, nothing is done. - - Within the remote storage, files are addressed by their project path - (url encoded) and two user-supplied hashes, representing their creation - context and their file contents. If the URL already exists, the data is - not uploaded. Paths are archived and compressed prior to upload. - """ - loc = self.root / path - if not loc.exists(): - raise IOError(f"Cannot push {loc}: does not exist.") - url = self.make_url(path, command_hash, content_hash) - if url.exists(): - return url - tmp: Path - with make_tempdir() as tmp: - tar_loc = tmp / self.encode_name(str(path)) - mode_string = f"w:{self.compression}" if self.compression else "w" - with tarfile.open(tar_loc, mode=mode_string) as tar_file: - tar_file.add(str(loc), arcname=str(path)) - upload_file(tar_loc, url) - return url - - def pull( - self, - path: Path, - *, - command_hash: Optional[str] = None, - content_hash: Optional[str] = None, - ) -> Optional["FluidPath"]: - """Retrieve a file from the remote cache. If the file already exists, - nothing is done. - - If the command_hash and/or content_hash are specified, only matching - results are returned. If no results are available, an error is raised. - """ - dest = self.root / path - if dest.exists(): - return None - url = self.find(path, command_hash=command_hash, content_hash=content_hash) - if url is None: - return url - else: - # Make sure the destination exists - if not dest.parent.exists(): - dest.parent.mkdir(parents=True) - tmp: Path - with make_tempdir() as tmp: - tar_loc = tmp / url.parts[-1] - download_file(url, tar_loc) - mode_string = f"r:{self.compression}" if self.compression else "r" - with tarfile.open(tar_loc, mode=mode_string) as tar_file: - # This requires that the path is added correctly, relative - # to root. This is how we set things up in push() - - # Disallow paths outside the current directory for the tar - # file (CVE-2007-4559, directory traversal vulnerability) - def is_within_directory(directory, target): - abs_directory = os.path.abspath(directory) - abs_target = os.path.abspath(target) - prefix = os.path.commonprefix([abs_directory, abs_target]) - return prefix == abs_directory - - def safe_extract(tar, path): - for member in tar.getmembers(): - member_path = os.path.join(path, member.name) - if not is_within_directory(path, member_path): - raise ValueError(Errors.E852) - tar.extractall(path) - - safe_extract(tar_file, self.root) - return url - - def find( - self, - path: Path, - *, - command_hash: Optional[str] = None, - content_hash: Optional[str] = None, - ) -> Optional["FluidPath"]: - """Find the best matching version of a file within the storage, - or `None` if no match can be found. If both the creation and content hash - are specified, only exact matches will be returned. Otherwise, the most - recent matching file is preferred. - """ - name = self.encode_name(str(path)) - urls = [] - if command_hash is not None and content_hash is not None: - url = self.url / name / command_hash / content_hash - urls = [url] if url.exists() else [] - elif command_hash is not None: - if (self.url / name / command_hash).exists(): - urls = list((self.url / name / command_hash).iterdir()) - else: - if (self.url / name).exists(): - for sub_dir in (self.url / name).iterdir(): - urls.extend(sub_dir.iterdir()) - if content_hash is not None: - urls = [url for url in urls if url.parts[-1] == content_hash] - if len(urls) >= 2: - try: - urls.sort(key=lambda x: x.stat().last_modified) # type: ignore - except Exception: - msg.warn( - "Unable to sort remote files by last modified. The file(s) " - "pulled from the cache may not be the most recent." - ) - return urls[-1] if urls else None - - def make_url(self, path: Path, command_hash: str, content_hash: str) -> "FluidPath": - """Construct a URL from a subpath, a creation hash and a content hash.""" - return self.url / self.encode_name(str(path)) / command_hash / content_hash - - def encode_name(self, name: str) -> str: - """Encode a subpath into a URL-safe name.""" - return urllib.parse.quote_plus(name) - - -def get_content_hash(loc: Path) -> str: - return get_checksum(loc) - - -def get_command_hash( - site_hash: str, env_hash: str, deps: List[Path], cmd: List[str] -) -> str: - """Create a hash representing the execution of a command. This includes the - currently installed packages, whatever environment variables have been marked - as relevant, and the command. - """ - if check_bool_env_var(ENV_VARS.PROJECT_USE_GIT_VERSION): - spacy_v = GIT_VERSION - else: - spacy_v = str(get_minor_version(about.__version__) or "") - dep_checksums = [get_checksum(dep) for dep in sorted(deps)] - hashes = [spacy_v, site_hash, env_hash] + dep_checksums - hashes.extend(cmd) - creation_bytes = "".join(hashes).encode("utf8") - return hashlib.md5(creation_bytes).hexdigest() - - -def get_site_hash(): - """Hash the current Python environment's site-packages contents, including - the name and version of the libraries. The list we're hashing is what - `pip freeze` would output. - """ - site_dirs = site.getsitepackages() - if site.ENABLE_USER_SITE: - site_dirs.extend(site.getusersitepackages()) - packages = set() - for site_dir in site_dirs: - site_dir = Path(site_dir) - for subpath in site_dir.iterdir(): - if subpath.parts[-1].endswith("dist-info"): - packages.add(subpath.parts[-1].replace(".dist-info", "")) - package_bytes = "".join(sorted(packages)).encode("utf8") - return hashlib.md5sum(package_bytes).hexdigest() - - -def get_env_hash(env: Dict[str, str]) -> str: - """Construct a hash of the environment variables that will be passed into - the commands. - - Values in the env dict may be references to the current os.environ, using - the syntax $ENV_VAR to mean os.environ[ENV_VAR] - """ - env_vars = {} - for key, value in env.items(): - if value.startswith("$"): - env_vars[key] = os.environ.get(value[1:], "") - else: - env_vars[key] = value - return get_hash(env_vars) diff --git a/spacy/cli/project/run.py b/spacy/cli/project/run.py deleted file mode 100644 index 0f4858a99..000000000 --- a/spacy/cli/project/run.py +++ /dev/null @@ -1,360 +0,0 @@ -from typing import Optional, List, Dict, Sequence, Any, Iterable, Tuple -import os.path -from pathlib import Path - -from wasabi import msg -from wasabi.util import locale_escape -import sys -import srsly -import typer - -from ... import about -from ...git_info import GIT_VERSION -from ...util import working_dir, run_command, split_command, is_cwd, join_command -from ...util import SimpleFrozenList, is_minor_version_match, ENV_VARS -from ...util import check_bool_env_var, SimpleFrozenDict -from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash -from .._util import get_checksum, project_cli, Arg, Opt, COMMAND, parse_config_overrides - - -@project_cli.command( - "run", context_settings={"allow_extra_args": True, "ignore_unknown_options": True} -) -def project_run_cli( - # fmt: off - ctx: typer.Context, # This is only used to read additional arguments - subcommand: str = Arg(None, help=f"Name of command defined in the {PROJECT_FILE}"), - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), - force: bool = Opt(False, "--force", "-F", help="Force re-running steps, even if nothing changed"), - dry: bool = Opt(False, "--dry", "-D", help="Perform a dry run and don't execute scripts"), - show_help: bool = Opt(False, "--help", help="Show help message and available subcommands") - # fmt: on -): - """Run a named command or workflow defined in the project.yml. If a workflow - name is specified, all commands in the workflow are run, in order. If - commands define dependencies and/or outputs, they will only be re-run if - state has changed. - - DOCS: https://spacy.io/api/cli#project-run - """ - if show_help or not subcommand: - print_run_help(project_dir, subcommand) - else: - overrides = parse_config_overrides(ctx.args) - project_run(project_dir, subcommand, overrides=overrides, force=force, dry=dry) - - -def project_run( - project_dir: Path, - subcommand: str, - *, - overrides: Dict[str, Any] = SimpleFrozenDict(), - force: bool = False, - dry: bool = False, - capture: bool = False, - skip_requirements_check: bool = False, -) -> None: - """Run a named script defined in the project.yml. If the script is part - of the default pipeline (defined in the "run" section), DVC is used to - execute the command, so it can determine whether to rerun it. It then - calls into "exec" to execute it. - - project_dir (Path): Path to project directory. - subcommand (str): Name of command to run. - overrides (Dict[str, Any]): Optional config overrides. - force (bool): Force re-running, even if nothing changed. - dry (bool): Perform a dry run and don't execute commands. - capture (bool): Whether to capture the output and errors of individual commands. - If False, the stdout and stderr will not be redirected, and if there's an error, - sys.exit will be called with the return code. You should use capture=False - when you want to turn over execution to the command, and capture=True - when you want to run the command more like a function. - skip_requirements_check (bool): Whether to skip the requirements check. - """ - config = load_project_config(project_dir, overrides=overrides) - commands = {cmd["name"]: cmd for cmd in config.get("commands", [])} - workflows = config.get("workflows", {}) - validate_subcommand(list(commands.keys()), list(workflows.keys()), subcommand) - - req_path = project_dir / "requirements.txt" - if not skip_requirements_check: - if config.get("check_requirements", True) and os.path.exists(req_path): - with req_path.open() as requirements_file: - _check_requirements([req.strip() for req in requirements_file]) - - if subcommand in workflows: - msg.info(f"Running workflow '{subcommand}'") - for cmd in workflows[subcommand]: - project_run( - project_dir, - cmd, - overrides=overrides, - force=force, - dry=dry, - capture=capture, - skip_requirements_check=True, - ) - else: - cmd = commands[subcommand] - for dep in cmd.get("deps", []): - if not (project_dir / dep).exists(): - err = f"Missing dependency specified by command '{subcommand}': {dep}" - err_help = "Maybe you forgot to run the 'project assets' command or a previous step?" - err_exits = 1 if not dry else None - msg.fail(err, err_help, exits=err_exits) - check_spacy_commit = check_bool_env_var(ENV_VARS.PROJECT_USE_GIT_VERSION) - with working_dir(project_dir) as current_dir: - msg.divider(subcommand) - rerun = check_rerun(current_dir, cmd, check_spacy_commit=check_spacy_commit) - if not rerun and not force: - msg.info(f"Skipping '{cmd['name']}': nothing changed") - else: - run_commands(cmd["script"], dry=dry, capture=capture) - if not dry: - update_lockfile(current_dir, cmd) - - -def print_run_help(project_dir: Path, subcommand: Optional[str] = None) -> None: - """Simulate a CLI help prompt using the info available in the project.yml. - - project_dir (Path): The project directory. - subcommand (Optional[str]): The subcommand or None. If a subcommand is - provided, the subcommand help is shown. Otherwise, the top-level help - and a list of available commands is printed. - """ - config = load_project_config(project_dir) - config_commands = config.get("commands", []) - commands = {cmd["name"]: cmd for cmd in config_commands} - workflows = config.get("workflows", {}) - project_loc = "" if is_cwd(project_dir) else project_dir - if subcommand: - validate_subcommand(list(commands.keys()), list(workflows.keys()), subcommand) - print(f"Usage: {COMMAND} project run {subcommand} {project_loc}") - if subcommand in commands: - help_text = commands[subcommand].get("help") - if help_text: - print(f"\n{help_text}\n") - elif subcommand in workflows: - steps = workflows[subcommand] - print(f"\nWorkflow consisting of {len(steps)} commands:") - steps_data = [ - (f"{i + 1}. {step}", commands[step].get("help", "")) - for i, step in enumerate(steps) - ] - msg.table(steps_data) - help_cmd = f"{COMMAND} project run [COMMAND] {project_loc} --help" - print(f"For command details, run: {help_cmd}") - else: - print("") - title = config.get("title") - if title: - print(f"{locale_escape(title)}\n") - if config_commands: - print(f"Available commands in {PROJECT_FILE}") - print(f"Usage: {COMMAND} project run [COMMAND] {project_loc}") - msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) - if workflows: - print(f"Available workflows in {PROJECT_FILE}") - print(f"Usage: {COMMAND} project run [WORKFLOW] {project_loc}") - msg.table([(name, " -> ".join(steps)) for name, steps in workflows.items()]) - - -def run_commands( - commands: Iterable[str] = SimpleFrozenList(), - silent: bool = False, - dry: bool = False, - capture: bool = False, -) -> None: - """Run a sequence of commands in a subprocess, in order. - - commands (List[str]): The string commands. - silent (bool): Don't print the commands. - dry (bool): Perform a dry run and don't execut anything. - capture (bool): Whether to capture the output and errors of individual commands. - If False, the stdout and stderr will not be redirected, and if there's an error, - sys.exit will be called with the return code. You should use capture=False - when you want to turn over execution to the command, and capture=True - when you want to run the command more like a function. - """ - for c in commands: - command = split_command(c) - # Not sure if this is needed or a good idea. Motivation: users may often - # use commands in their config that reference "python" and we want to - # make sure that it's always executing the same Python that spaCy is - # executed with and the pip in the same env, not some other Python/pip. - # Also ensures cross-compatibility if user 1 writes "python3" (because - # that's how it's set up on their system), and user 2 without the - # shortcut tries to re-run the command. - if len(command) and command[0] in ("python", "python3"): - command[0] = sys.executable - elif len(command) and command[0] in ("pip", "pip3"): - command = [sys.executable, "-m", "pip", *command[1:]] - if not silent: - print(f"Running command: {join_command(command)}") - if not dry: - run_command(command, capture=capture) - - -def validate_subcommand( - commands: Sequence[str], workflows: Sequence[str], subcommand: str -) -> None: - """Check that a subcommand is valid and defined. Raises an error otherwise. - - commands (Sequence[str]): The available commands. - subcommand (str): The subcommand. - """ - if not commands and not workflows: - msg.fail(f"No commands or workflows defined in {PROJECT_FILE}", exits=1) - if subcommand not in commands and subcommand not in workflows: - help_msg = [] - if subcommand in ["assets", "asset"]: - help_msg.append("Did you mean to run: python -m spacy project assets?") - if commands: - help_msg.append(f"Available commands: {', '.join(commands)}") - if workflows: - help_msg.append(f"Available workflows: {', '.join(workflows)}") - msg.fail( - f"Can't find command or workflow '{subcommand}' in {PROJECT_FILE}", - ". ".join(help_msg), - exits=1, - ) - - -def check_rerun( - project_dir: Path, - command: Dict[str, Any], - *, - check_spacy_version: bool = True, - check_spacy_commit: bool = False, -) -> bool: - """Check if a command should be rerun because its settings or inputs/outputs - changed. - - project_dir (Path): The current project directory. - command (Dict[str, Any]): The command, as defined in the project.yml. - strict_version (bool): - RETURNS (bool): Whether to re-run the command. - """ - # Always rerun if no-skip is set - if command.get("no_skip", False): - return True - lock_path = project_dir / PROJECT_LOCK - if not lock_path.exists(): # We don't have a lockfile, run command - return True - data = srsly.read_yaml(lock_path) - if command["name"] not in data: # We don't have info about this command - return True - entry = data[command["name"]] - # Always run commands with no outputs (otherwise they'd always be skipped) - if not entry.get("outs", []): - return True - # Always rerun if spaCy version or commit hash changed - spacy_v = entry.get("spacy_version") - commit = entry.get("spacy_git_version") - if check_spacy_version and not is_minor_version_match(spacy_v, about.__version__): - info = f"({spacy_v} in {PROJECT_LOCK}, {about.__version__} current)" - msg.info(f"Re-running '{command['name']}': spaCy minor version changed {info}") - return True - if check_spacy_commit and commit != GIT_VERSION: - info = f"({commit} in {PROJECT_LOCK}, {GIT_VERSION} current)" - msg.info(f"Re-running '{command['name']}': spaCy commit changed {info}") - return True - # If the entry in the lockfile matches the lockfile entry that would be - # generated from the current command, we don't rerun because it means that - # all inputs/outputs, hashes and scripts are the same and nothing changed - lock_entry = get_lock_entry(project_dir, command) - exclude = ["spacy_version", "spacy_git_version"] - return get_hash(lock_entry, exclude=exclude) != get_hash(entry, exclude=exclude) - - -def update_lockfile(project_dir: Path, command: Dict[str, Any]) -> None: - """Update the lockfile after running a command. Will create a lockfile if - it doesn't yet exist and will add an entry for the current command, its - script and dependencies/outputs. - - project_dir (Path): The current project directory. - command (Dict[str, Any]): The command, as defined in the project.yml. - """ - lock_path = project_dir / PROJECT_LOCK - if not lock_path.exists(): - srsly.write_yaml(lock_path, {}) - data = {} - else: - data = srsly.read_yaml(lock_path) - data[command["name"]] = get_lock_entry(project_dir, command) - srsly.write_yaml(lock_path, data) - - -def get_lock_entry(project_dir: Path, command: Dict[str, Any]) -> Dict[str, Any]: - """Get a lockfile entry for a given command. An entry includes the command, - the script (command steps) and a list of dependencies and outputs with - their paths and file hashes, if available. The format is based on the - dvc.lock files, to keep things consistent. - - project_dir (Path): The current project directory. - command (Dict[str, Any]): The command, as defined in the project.yml. - RETURNS (Dict[str, Any]): The lockfile entry. - """ - deps = get_fileinfo(project_dir, command.get("deps", [])) - outs = get_fileinfo(project_dir, command.get("outputs", [])) - outs_nc = get_fileinfo(project_dir, command.get("outputs_no_cache", [])) - return { - "cmd": f"{COMMAND} run {command['name']}", - "script": command["script"], - "deps": deps, - "outs": [*outs, *outs_nc], - "spacy_version": about.__version__, - "spacy_git_version": GIT_VERSION, - } - - -def get_fileinfo(project_dir: Path, paths: List[str]) -> List[Dict[str, Optional[str]]]: - """Generate the file information for a list of paths (dependencies, outputs). - Includes the file path and the file's checksum. - - project_dir (Path): The current project directory. - paths (List[str]): The file paths. - RETURNS (List[Dict[str, str]]): The lockfile entry for a file. - """ - data = [] - for path in paths: - file_path = project_dir / path - md5 = get_checksum(file_path) if file_path.exists() else None - data.append({"path": path, "md5": md5}) - return data - - -def _check_requirements(requirements: List[str]) -> Tuple[bool, bool]: - """Checks whether requirements are installed and free of version conflicts. - requirements (List[str]): List of requirements. - RETURNS (Tuple[bool, bool]): Whether (1) any packages couldn't be imported, (2) any packages with version conflicts - exist. - """ - import pkg_resources - - failed_pkgs_msgs: List[str] = [] - conflicting_pkgs_msgs: List[str] = [] - - for req in requirements: - try: - pkg_resources.require(req) - except pkg_resources.DistributionNotFound as dnf: - failed_pkgs_msgs.append(dnf.report()) - except pkg_resources.VersionConflict as vc: - conflicting_pkgs_msgs.append(vc.report()) - except Exception: - msg.warn( - f"Unable to check requirement: {req} " - "Checks are currently limited to requirement specifiers " - "(PEP 508)" - ) - - if len(failed_pkgs_msgs) or len(conflicting_pkgs_msgs): - msg.warn( - title="Missing requirements or requirement conflicts detected. Make sure your Python environment is set up " - "correctly and you installed all requirements specified in your project's requirements.txt: " - ) - for pgk_msg in failed_pkgs_msgs + conflicting_pkgs_msgs: - msg.text(pgk_msg) - - return len(failed_pkgs_msgs) > 0, len(conflicting_pkgs_msgs) > 0