mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-24 20:51:30 +03:00 
			
		
		
		
	Merge pull request #6064 from explosion/fix/sparse-checkout-ux
Fix sparse checkout and error handling
This commit is contained in:
		
						commit
						9cc304c194
					
				|  | @ -300,7 +300,9 @@ def ensure_pathy(path): | |||
|     return Pathy(path) | ||||
| 
 | ||||
| 
 | ||||
| def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"): | ||||
| def git_checkout( | ||||
|     repo: str, subpath: str, dest: Path, *, branch: str = "master", sparse: bool = False | ||||
| ): | ||||
|     git_version = get_git_version() | ||||
|     if dest.exists(): | ||||
|         msg.fail("Destination of checkout must not exist", exits=1) | ||||
|  | @ -323,11 +325,14 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m | |||
|     # We're using Git and sparse checkout to only clone the files we need | ||||
|     with make_tempdir() as tmp_dir: | ||||
|         supports_sparse = git_version >= (2, 22) | ||||
|         use_sparse = supports_sparse and sparse | ||||
|         # This is the "clone, but don't download anything" part. | ||||
|         cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} " | ||||
|         if supports_sparse: | ||||
|         if use_sparse: | ||||
|             cmd += f"--filter=blob:none"  # <-- The key bit | ||||
|         else: | ||||
|         # Only show warnings if the user explicitly wants sparse checkout but | ||||
|         # the Git version doesn't support it | ||||
|         elif sparse: | ||||
|             err_old = ( | ||||
|                 f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) " | ||||
|                 f"that doesn't fully support sparse checkout yet." | ||||
|  | @ -342,19 +347,19 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m | |||
|         try_run_command(cmd) | ||||
|         # Now we need to find the missing filenames for the subpath we want. | ||||
|         # Looking for this 'rev-list' command in the git --help? Hah. | ||||
|         cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}" | ||||
|         cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}" | ||||
|         ret = try_run_command(cmd) | ||||
|         git_repo = _from_http_to_git(repo) | ||||
|         # Now pass those missings into another bit of git internals | ||||
|         missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) | ||||
|         if supports_sparse and not missings: | ||||
|         if use_sparse and not missings: | ||||
|             err = ( | ||||
|                 f"Could not find any relevant files for '{subpath}'. " | ||||
|                 f"Did you specify a correct and complete path within repo '{repo}' " | ||||
|                 f"and branch {branch}?" | ||||
|             ) | ||||
|             msg.fail(err, exits=1) | ||||
|         if supports_sparse: | ||||
|         if use_sparse: | ||||
|             cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" | ||||
|             try_run_command(cmd) | ||||
|         # And finally, we can checkout our subpath | ||||
|  |  | |||
|  | @ -6,14 +6,15 @@ import shutil | |||
| import requests | ||||
| 
 | ||||
| from ...util import ensure_path, working_dir | ||||
| from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum | ||||
| from .._util import download_file, git_sparse_checkout, get_git_version | ||||
| from .._util import project_cli, Arg, Opt, PROJECT_FILE, load_project_config | ||||
| from .._util import get_checksum, download_file, git_checkout, get_git_version | ||||
| 
 | ||||
| 
 | ||||
| @project_cli.command("assets") | ||||
| def project_assets_cli( | ||||
|     # fmt: off | ||||
|     project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), | ||||
|     sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse checkout for assets provided via Git, to only check out and clone the files needed. Requires Git v22.2+.") | ||||
|     # fmt: on | ||||
| ): | ||||
|     """Fetch project assets like datasets and pretrained weights. Assets are | ||||
|  | @ -23,10 +24,10 @@ def project_assets_cli( | |||
| 
 | ||||
|     DOCS: https://nightly.spacy.io/api/cli#project-assets | ||||
|     """ | ||||
|     project_assets(project_dir) | ||||
|     project_assets(project_dir, sparse_checkout=sparse_checkout) | ||||
| 
 | ||||
| 
 | ||||
| def project_assets(project_dir: Path) -> None: | ||||
| def project_assets(project_dir: Path, *, sparse_checkout: bool = False) -> None: | ||||
|     """Fetch assets for a project using DVC if possible. | ||||
| 
 | ||||
|     project_dir (Path): Path to project directory. | ||||
|  | @ -58,11 +59,12 @@ def project_assets(project_dir: Path) -> None: | |||
|                         shutil.rmtree(dest) | ||||
|                     else: | ||||
|                         dest.unlink() | ||||
|             git_sparse_checkout( | ||||
|             git_checkout( | ||||
|                 asset["git"]["repo"], | ||||
|                 asset["git"]["path"], | ||||
|                 dest, | ||||
|                 branch=asset["git"].get("branch"), | ||||
|                 sparse=sparse_checkout, | ||||
|             ) | ||||
|         else: | ||||
|             url = asset.get("url") | ||||
|  |  | |||
|  | @ -7,7 +7,7 @@ import re | |||
| from ... import about | ||||
| from ...util import ensure_path | ||||
| from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE | ||||
| from .._util import git_sparse_checkout, get_git_version | ||||
| from .._util import git_checkout, get_git_version | ||||
| 
 | ||||
| 
 | ||||
| @project_cli.command("clone") | ||||
|  | @ -16,7 +16,8 @@ def project_clone_cli( | |||
|     name: str = Arg(..., help="The name of the template to clone"), | ||||
|     dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False), | ||||
|     repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"), | ||||
|     branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from") | ||||
|     branch: str = Opt(about.__projects_branch__, "--branch", "-b", help="The branch to clone from"), | ||||
|     sparse_checkout: bool = Opt(False, "--sparse", "-S", help="Use sparse Git checkout to only check out and clone the files needed. Requires Git v22.2+.") | ||||
|     # fmt: on | ||||
| ): | ||||
|     """Clone a project template from a repository. Calls into "git" and will | ||||
|  | @ -28,7 +29,7 @@ def project_clone_cli( | |||
|     """ | ||||
|     if dest is None: | ||||
|         dest = Path.cwd() / Path(name).parts[-1] | ||||
|     project_clone(name, dest, repo=repo, branch=branch) | ||||
|     project_clone(name, dest, repo=repo, branch=branch, sparse_checkout=sparse_checkout) | ||||
| 
 | ||||
| 
 | ||||
| def project_clone( | ||||
|  | @ -37,6 +38,7 @@ def project_clone( | |||
|     *, | ||||
|     repo: str = about.__projects__, | ||||
|     branch: str = about.__projects_branch__, | ||||
|     sparse_checkout: bool = False, | ||||
| ) -> None: | ||||
|     """Clone a project template from a repository. | ||||
| 
 | ||||
|  | @ -50,7 +52,7 @@ def project_clone( | |||
|     project_dir = dest.resolve() | ||||
|     repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo) | ||||
|     try: | ||||
|         git_sparse_checkout(repo, name, dest, branch=branch) | ||||
|         git_checkout(repo, name, dest, branch=branch, sparse=sparse_checkout) | ||||
|     except subprocess.CalledProcessError: | ||||
|         err = f"Could not clone '{name}' from repo '{repo_name}'" | ||||
|         msg.fail(err, exits=1) | ||||
|  |  | |||
|  | @ -680,7 +680,10 @@ def run_command( | |||
|             Errors.E970.format(str_command=" ".join(command), tool=command[0]) | ||||
|         ) from None | ||||
|     except subprocess.CalledProcessError as e: | ||||
|         # We don't want a duplicate traceback here | ||||
|         # We don't want a duplicate traceback here so we're making sure the | ||||
|         # CalledProcessError isn't re-raised. We also print both the string | ||||
|         # message and the stderr, in case the error only has one of them. | ||||
|         print(e.stderr) | ||||
|         print(e) | ||||
|         sys.exit(1) | ||||
|     if ret.returncode != 0: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user