mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 11:36:24 +03:00
e559867605
* Add utils for working with remote storage * WIP add remote_cache for project * WIP add push and pull commands * Use pathy in remote_cache * Updarte util * Update remote_cache * Update util * Update project assets * Update pull script * Update push script * Fix type annotation in util * Work on remote storage * Remove site and env hash * Fix imports * Fix type annotation * Require pathy * Require pathy * Fix import * Add a util to handle project variable substitution * Import push and pull commands * Fix pull command * Fix push command * Fix tarfile in remote_storage * Improve printing * Fiddle with status messages * Set version to v3.0.0a9 * Draft docs for spacy project remote storages * Update docs [ci skip] * Use Thinc config to simplify and unify template variables * Auto-format * Don't import Pathy globally for now Causes slow and annoying Google Cloud warning * Tidy up test * Tidy up and update tests * Update to latest Thinc * Update docs * variables -> vars * Update docs [ci skip] * Update docs [ci skip] Co-authored-by: Ines Montani <ines@ines.io>
37 lines
1.5 KiB
Python
37 lines
1.5 KiB
Python
from pathlib import Path
|
|
from wasabi import msg
|
|
from .remote_storage import RemoteStorage
|
|
from .remote_storage import get_command_hash
|
|
from .._util import project_cli, Arg
|
|
from .._util import load_project_config
|
|
|
|
|
|
@project_cli.command("pull")
|
|
def project_pull_cli(
|
|
# fmt: off
|
|
remote: str = Arg("default", help="Name or path of remote storage"),
|
|
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
|
|
# fmt: on
|
|
):
|
|
"""Retrieve any precomputed outputs from a remote storage that are available.
|
|
You can alias remotes in your project.yml by mapping them to storage paths.
|
|
A storage can be anything that the smart-open library can upload to, e.g.
|
|
gcs, aws, ssh, local directories etc
|
|
"""
|
|
for url, output_path in project_pull(project_dir, remote):
|
|
if url is not None:
|
|
msg.good(f"Pulled {output_path} from {url}")
|
|
|
|
|
|
def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
|
|
config = load_project_config(project_dir)
|
|
if remote in config.get("remotes", {}):
|
|
remote = config["remotes"][remote]
|
|
storage = RemoteStorage(project_dir, remote)
|
|
for cmd in config.get("commands", []):
|
|
deps = [project_dir / dep for dep in cmd.get("deps", [])]
|
|
cmd_hash = get_command_hash("", "", deps, cmd["script"])
|
|
for output_path in cmd.get("outputs", []):
|
|
url = storage.pull(output_path, command_hash=cmd_hash)
|
|
yield url, output_path
|