mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-06 14:40:34 +03:00
Support git_sparse_checkout for assets
This commit is contained in:
parent
99d31e7662
commit
e1457d5806
|
@ -1,5 +1,6 @@
|
|||
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
import srsly
|
||||
|
@ -11,7 +12,7 @@ from thinc.config import Config, ConfigValidationError
|
|||
from configparser import InterpolationError
|
||||
|
||||
from ..schemas import ProjectConfigSchema, validate
|
||||
from ..util import import_file
|
||||
from ..util import import_file, run_command, make_tempdir
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pathy import Pathy # noqa: F401
|
||||
|
@ -311,3 +312,24 @@ def ensure_pathy(path):
|
|||
from pathy import Pathy # noqa: F811
|
||||
|
||||
return Pathy(path)
|
||||
|
||||
|
||||
def git_sparse_checkout(repo: str, subpath: str, dest: Path):
|
||||
if dest.exists():
|
||||
raise IOError("Destination of checkout must not exist")
|
||||
if not dest.parent.exists():
|
||||
raise IOError("Parent of destination of checkout must exist")
|
||||
# We're using Git and sparse checkout to only clone the files we need
|
||||
with make_tempdir() as tmp_dir:
|
||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
|
||||
run_command(cmd)
|
||||
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
|
||||
f.write(subpath)
|
||||
run_command(["git", "-C", str(tmp_dir), "fetch"])
|
||||
run_command(["git", "-C", str(tmp_dir), "checkout"])
|
||||
# We need Path(name) to make sure we also support subdirectories
|
||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||
print(dest)
|
||||
print(list(dest.iterdir()))
|
||||
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ import requests
|
|||
|
||||
from ...util import ensure_path, working_dir
|
||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||
from .._util import download_file
|
||||
from .._util import download_file, git_sparse_checkout
|
||||
|
||||
|
||||
# TODO: find a solution for caches
|
||||
|
@ -45,14 +45,18 @@ def project_assets(project_dir: Path) -> None:
|
|||
msg.warn(f"No assets specified in {PROJECT_FILE}", exits=0)
|
||||
msg.info(f"Fetching {len(assets)} asset(s)")
|
||||
for asset in assets:
|
||||
dest = asset["dest"]
|
||||
url = asset.get("url")
|
||||
dest = Path(asset["dest"])
|
||||
checksum = asset.get("checksum")
|
||||
if not url:
|
||||
# project.yml defines asset without URL that the user has to place
|
||||
check_private_asset(dest, checksum)
|
||||
continue
|
||||
fetch_asset(project_path, url, dest, checksum)
|
||||
if "git" in asset:
|
||||
print(dest)
|
||||
git_sparse_checkout(asset["git"]["repo"], asset["git"]["path"], dest)
|
||||
else:
|
||||
url = asset.get("url")
|
||||
if not url:
|
||||
# project.yml defines asset without URL that the user has to place
|
||||
check_private_asset(dest, checksum)
|
||||
continue
|
||||
fetch_asset(project_path, url, dest, checksum)
|
||||
|
||||
|
||||
def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None:
|
||||
|
|
|
@ -6,8 +6,9 @@ import shutil
|
|||
import re
|
||||
|
||||
from ... import about
|
||||
from ...util import ensure_path, run_command, make_tempdir
|
||||
from ...util import ensure_path
|
||||
from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
|
||||
from .._util import git_sparse_checkout
|
||||
|
||||
|
||||
@project_cli.command("clone")
|
||||
|
@ -39,24 +40,11 @@ def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> N
|
|||
check_clone(name, dest, repo)
|
||||
project_dir = dest.resolve()
|
||||
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
||||
# We're using Git and sparse checkout to only clone the files we need
|
||||
with make_tempdir() as tmp_dir:
|
||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true"
|
||||
try:
|
||||
run_command(cmd)
|
||||
except subprocess.CalledProcessError:
|
||||
err = f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'."
|
||||
msg.fail(err)
|
||||
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
|
||||
f.write(name)
|
||||
try:
|
||||
run_command(["git", "-C", str(tmp_dir), "fetch"])
|
||||
run_command(["git", "-C", str(tmp_dir), "checkout"])
|
||||
except subprocess.CalledProcessError:
|
||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||
msg.fail(err)
|
||||
# We need Path(name) to make sure we also support subdirectories
|
||||
shutil.move(str(tmp_dir / Path(name)), str(project_dir))
|
||||
try:
|
||||
git_sparse_checkout(repo, name, dest)
|
||||
except subprocess.CalledProcessError:
|
||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||
msg.fail(err)
|
||||
msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
|
||||
if not (project_dir / PROJECT_FILE).exists():
|
||||
msg.warn(f"No {PROJECT_FILE} found in directory")
|
||||
|
|
Loading…
Reference in New Issue
Block a user