mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			43 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			43 lines
		
	
	
		
			1.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from pathlib import Path
 | 
						|
from wasabi import msg
 | 
						|
from .remote_storage import RemoteStorage
 | 
						|
from .remote_storage import get_command_hash
 | 
						|
from .._util import project_cli, Arg
 | 
						|
from .._util import load_project_config
 | 
						|
from .run import update_lockfile
 | 
						|
 | 
						|
 | 
						|
@project_cli.command("pull")
 | 
						|
def project_pull_cli(
 | 
						|
    # fmt: off
 | 
						|
    remote: str = Arg("default", help="Name or path of remote storage"),
 | 
						|
    project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
 | 
						|
    # fmt: on
 | 
						|
):
 | 
						|
    """Retrieve available precomputed outputs from a remote storage.
 | 
						|
    You can alias remotes in your project.yml by mapping them to storage paths.
 | 
						|
    A storage can be anything that the smart-open library can upload to, e.g.
 | 
						|
    gcs, aws, ssh, local directories etc
 | 
						|
    """
 | 
						|
    for url, output_path in project_pull(project_dir, remote):
 | 
						|
        if url is not None:
 | 
						|
            msg.good(f"Pulled {output_path} from {url}")
 | 
						|
 | 
						|
 | 
						|
def project_pull(project_dir: Path, remote: str, *, verbose: bool = False):
 | 
						|
    config = load_project_config(project_dir)
 | 
						|
    if remote in config.get("remotes", {}):
 | 
						|
        remote = config["remotes"][remote]
 | 
						|
    storage = RemoteStorage(project_dir, remote)
 | 
						|
    for cmd in config.get("commands", []):
 | 
						|
        deps = [project_dir / dep for dep in cmd.get("deps", [])]
 | 
						|
        if any(not dep.exists() for dep in deps):
 | 
						|
            continue
 | 
						|
        cmd_hash = get_command_hash("", "", deps, cmd["script"])
 | 
						|
        for output_path in cmd.get("outputs", []):
 | 
						|
            url = storage.pull(output_path, command_hash=cmd_hash)
 | 
						|
            yield url, output_path
 | 
						|
 | 
						|
        if cmd.get("outptus") and all(loc.exists() for loc in cmd["outputs"]):
 | 
						|
            update_lockfile(project_dir, cmd)
 |