mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Make project command a submodule * Update with WIP * Add helper for joining commands * Update docstrins, formatting and types * Update assets and add support for copying local files * Fix type * Update success messages
		
			
				
	
	
		
			111 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			111 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from pathlib import Path
 | 
						|
from wasabi import msg
 | 
						|
import subprocess
 | 
						|
import shutil
 | 
						|
 | 
						|
from ... import about
 | 
						|
from ...util import ensure_path, run_command, make_tempdir
 | 
						|
from .._app import project_cli, Arg, Opt, COMMAND
 | 
						|
 | 
						|
 | 
						|
DIRS = [
 | 
						|
    "assets",
 | 
						|
    "metas",
 | 
						|
    "configs",
 | 
						|
    "packages",
 | 
						|
    "metrics",
 | 
						|
    "scripts",
 | 
						|
    "notebooks",
 | 
						|
    "training",
 | 
						|
    "corpus",
 | 
						|
]
 | 
						|
 | 
						|
 | 
						|
@project_cli.command("clone")
 | 
						|
def project_clone_cli(
 | 
						|
    # fmt: off
 | 
						|
    name: str = Arg(..., help="The name of the template to fetch"),
 | 
						|
    dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False),
 | 
						|
    repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
 | 
						|
    # fmt: on
 | 
						|
):
 | 
						|
    """Clone a project template from a repository. Calls into "git" and will
 | 
						|
    only download the files from the given subdirectory. The GitHub repo
 | 
						|
    defaults to the official spaCy template repo, but can be customized
 | 
						|
    (including using a private repo). Setting the --git flag will also
 | 
						|
    initialize the project directory as a Git repo. If the project is intended
 | 
						|
    to be a Git repo, it should be initialized with Git first, before
 | 
						|
    initializing DVC (Data Version Control). This allows DVC to integrate with
 | 
						|
    Git.
 | 
						|
    """
 | 
						|
    if dest == Path.cwd():
 | 
						|
        dest = dest / name
 | 
						|
    project_clone(name, dest, repo=repo)
 | 
						|
 | 
						|
 | 
						|
def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> None:
 | 
						|
    """Clone a project template from a repository.
 | 
						|
 | 
						|
    name (str): Name of subdirectory to clone.
 | 
						|
    dest (Path): Destination path of cloned project.
 | 
						|
    repo (str): URL of Git repo containing project templates.
 | 
						|
    """
 | 
						|
    dest = ensure_path(dest)
 | 
						|
    check_clone(name, dest, repo)
 | 
						|
    project_dir = dest.resolve()
 | 
						|
    # We're using Git and sparse checkout to only clone the files we need
 | 
						|
    with make_tempdir() as tmp_dir:
 | 
						|
        cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
 | 
						|
        try:
 | 
						|
            run_command(cmd)
 | 
						|
        except subprocess.CalledProcessError:
 | 
						|
            err = f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'."
 | 
						|
            msg.fail(err)
 | 
						|
        with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
 | 
						|
            f.write(name)
 | 
						|
        try:
 | 
						|
            run_command(["git", "-C", str(tmp_dir), "fetch"])
 | 
						|
            run_command(["git", "-C", str(tmp_dir), "checkout"])
 | 
						|
        except subprocess.CalledProcessError:
 | 
						|
            err = f"Could not clone '{name}' in the repo '{repo}'."
 | 
						|
            msg.fail(err)
 | 
						|
        shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
 | 
						|
    msg.good(f"Cloned project '{name}' from {repo} into {project_dir}")
 | 
						|
    for sub_dir in DIRS:
 | 
						|
        dir_path = project_dir / sub_dir
 | 
						|
        if not dir_path.exists():
 | 
						|
            dir_path.mkdir(parents=True)
 | 
						|
    msg.good(f"Your project is now ready!", dest)
 | 
						|
    print(f"To fetch the assets, run:\n{COMMAND} project assets {dest}")
 | 
						|
 | 
						|
 | 
						|
def check_clone(name: str, dest: Path, repo: str) -> None:
 | 
						|
    """Check and validate that the destination path can be used to clone. Will
 | 
						|
    check that Git is available and that the destination path is suitable.
 | 
						|
 | 
						|
    name (str): Name of the directory to clone from the repo.
 | 
						|
    dest (Path): Local destination of cloned directory.
 | 
						|
    repo (str): URL of the repo to clone from.
 | 
						|
    """
 | 
						|
    try:
 | 
						|
        subprocess.run(["git", "--version"], stdout=subprocess.DEVNULL)
 | 
						|
    except Exception:
 | 
						|
        msg.fail(
 | 
						|
            f"Cloning spaCy project templates requires Git and the 'git' command. ",
 | 
						|
            f"To clone a project without Git, copy the files from the '{name}' "
 | 
						|
            f"directory in the {repo} to {dest} manually and then run:",
 | 
						|
            f"{COMMAND} project init {dest}",
 | 
						|
            exits=1,
 | 
						|
        )
 | 
						|
    if not dest:
 | 
						|
        msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
 | 
						|
    if dest.exists():
 | 
						|
        # Directory already exists (not allowed, clone needs to create it)
 | 
						|
        msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
 | 
						|
    if not dest.parent.exists():
 | 
						|
        # We're not creating parents, parent dir should exist
 | 
						|
        msg.fail(
 | 
						|
            f"Can't clone project, parent directory doesn't exist: {dest.parent}",
 | 
						|
            exits=1,
 | 
						|
        )
 |