Use simple git clone call if not sparse

This commit is contained in:
Matthew Honnibal 2020-09-20 16:22:04 +02:00
parent 2c24d633d0
commit a0fb5e50db

View File

@ -308,6 +308,31 @@ def git_checkout(
msg.fail("Destination of checkout must not exist", exits=1) msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists(): if not dest.parent.exists():
raise IOError("Parent of destination of checkout must exist") raise IOError("Parent of destination of checkout must exist")
if sparse and git_version >= (2, 22):
return git_sparse_checkout(repo, subpath, dest, branch)
elif sparse:
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
)
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
with make_tempdir() as tmp_dir:
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
ret = run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories
shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
def git_sparse_checkout(repo, subpath, dest, branch):
# We're using Git, partial clone and sparse checkout to # We're using Git, partial clone and sparse checkout to
# only clone the files we need # only clone the files we need
# This ends up being RIDICULOUS. omg. # This ends up being RIDICULOUS. omg.
@ -324,47 +349,28 @@ def git_checkout(
# *that* we can do by path. # *that* we can do by path.
# We're using Git and sparse checkout to only clone the files we need # We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
supports_sparse = git_version >= (2, 22)
use_sparse = supports_sparse and sparse
# This is the "clone, but don't download anything" part. # This is the "clone, but don't download anything" part.
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} " cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} --filter=blob:none"
if use_sparse: run_command(cmd)
cmd += f"--filter=blob:none" # <-- The key bit
# Only show warnings if the user explicitly wants sparse checkout but
# the Git version doesn't support it
elif sparse:
err_old = (
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet."
)
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
msg.warn(
f"{err_unk if git_version == (0, 0) else err_old} "
f"This means that more files than necessary may be downloaded "
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
try_run_command(cmd)
# Now we need to find the missing filenames for the subpath we want. # Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah. # Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}" cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if use_sparse else ''} -- {subpath}"
ret = try_run_command(cmd) ret = run_command(cmd, capture=True)
git_repo = _from_http_to_git(repo) git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals # Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
if use_sparse and not missings: if not missings:
err = ( err = (
f"Could not find any relevant files for '{subpath}'. " f"Could not find any relevant files for '{subpath}'. "
f"Did you specify a correct and complete path within repo '{repo}' " f"Did you specify a correct and complete path within repo '{repo}' "
f"and branch {branch}?" f"and branch {branch}?"
) )
msg.fail(err, exits=1) msg.fail(err, exits=1)
if use_sparse:
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
try_run_command(cmd) run_command(cmd, capture=True)
# And finally, we can checkout our subpath # And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
try_run_command(cmd) run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories # We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest)) shutil.move(str(tmp_dir / Path(subpath)), str(dest))
@ -378,7 +384,7 @@ def get_git_version(
RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
(0, 0) if the version couldn't be determined. (0, 0) if the version couldn't be determined.
""" """
ret = try_run_command(["git", "--version"], error=error) ret = run_command("git --version", capture=True)
stdout = ret.stdout.strip() stdout = ret.stdout.strip()
if not stdout or not stdout.startswith("git version"): if not stdout or not stdout.startswith("git version"):
return (0, 0) return (0, 0)
@ -386,23 +392,6 @@ def get_git_version(
return (int(version[0]), int(version[1])) return (int(version[0]), int(version[1]))
def try_run_command(
cmd: Union[str, List[str]], error: str = "Could not run command"
) -> subprocess.CompletedProcess:
"""Try running a command and raise an error if it fails.
cmd (Union[str, List[str]]): The command to run.
error (str): The error message.
RETURNS (CompletedProcess): The completed process if the command ran.
"""
try:
return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e:
msg.fail(error)
print(cmd)
sys.exit(1)
def _from_http_to_git(repo: str) -> str: def _from_http_to_git(repo: str) -> str:
if repo.startswith("http://"): if repo.startswith("http://"):
repo = repo.replace(r"http://", r"https://") repo = repo.replace(r"http://", r"https://")