spaCy/spacy/cli/project/clone.py
2020-07-09 19:44:28 +02:00

108 lines
4.0 KiB
Python

from typing import Optional
from pathlib import Path
from wasabi import msg
import subprocess
import shutil
from ... import about
from ...util import ensure_path, run_command, make_tempdir
from .._app import project_cli, Arg, Opt, COMMAND
DIRS = [
"assets",
"metas",
"configs",
"packages",
"metrics",
"scripts",
"notebooks",
"training",
"corpus",
]
@project_cli.command("clone")
def project_clone_cli(
# fmt: off
name: str = Arg(..., help="The name of the template to clone"),
dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
# fmt: on
):
"""Clone a project template from a repository. Calls into "git" and will
only download the files from the given subdirectory. The GitHub repo
defaults to the official spaCy template repo, but can be customized
(including using a private repo).
"""
if dest is None:
dest = Path.cwd() / name
project_clone(name, dest, repo=repo)
def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> None:
"""Clone a project template from a repository.
name (str): Name of subdirectory to clone.
dest (Path): Destination path of cloned project.
repo (str): URL of Git repo containing project templates.
"""
dest = ensure_path(dest)
check_clone(name, dest, repo)
project_dir = dest.resolve()
# We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir:
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
try:
run_command(cmd)
except subprocess.CalledProcessError:
err = f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'."
msg.fail(err)
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
f.write(name)
try:
run_command(["git", "-C", str(tmp_dir), "fetch"])
run_command(["git", "-C", str(tmp_dir), "checkout"])
except subprocess.CalledProcessError:
err = f"Could not clone '{name}' in the repo '{repo}'."
msg.fail(err)
shutil.move(str(tmp_dir / Path(name).name), str(project_dir))
msg.good(f"Cloned project '{name}' from {repo} into {project_dir}")
for sub_dir in DIRS:
dir_path = project_dir / sub_dir
if not dir_path.exists():
dir_path.mkdir(parents=True)
msg.good(f"Your project is now ready!", dest)
print(f"To fetch the assets, run:\n{COMMAND} project assets {dest}")
def check_clone(name: str, dest: Path, repo: str) -> None:
"""Check and validate that the destination path can be used to clone. Will
check that Git is available and that the destination path is suitable.
name (str): Name of the directory to clone from the repo.
dest (Path): Local destination of cloned directory.
repo (str): URL of the repo to clone from.
"""
try:
subprocess.run(["git", "--version"], stdout=subprocess.DEVNULL)
except Exception:
msg.fail(
f"Cloning spaCy project templates requires Git and the 'git' command. ",
f"To clone a project without Git, copy the files from the '{name}' "
f"directory in the {repo} to {dest} manually and then run:",
f"{COMMAND} project init {dest}",
exits=1,
)
if not dest:
msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
if dest.exists():
# Directory already exists (not allowed, clone needs to create it)
msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
if not dest.parent.exists():
# We're not creating parents, parent dir should exist
msg.fail(
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
exits=1,
)