Fix the git "sparse checkout" functionality (#5973)

* Fix the git sparse checkout functionality

* Format
This commit is contained in:
Matthew Honnibal 2020-08-26 04:00:14 +02:00 committed by GitHub
parent fdcaf86c54
commit 2771e4f2b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 44 additions and 16 deletions

View File

@ -322,21 +322,41 @@ def git_sparse_checkout(
if dest.exists():
msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists():
msg.fail("Parent of destination of checkout must exist", exits=1)
raise IOError("Parent of destination of checkout must exist")
# We're using Git, partial clone and sparse checkout to
# only clone the files we need
# This ends up being RIDICULOUS. omg.
# So, every tutorial and SO post talks about 'sparse checkout'...But they
# go and *clone* the whole repo. Worthless. And cloning part of a repo
# turns out to be completely broken. The only way to specify a "path" is..
# a path *on the server*? The contents of which, specifies the paths. Wat.
# Obviously this is hopelessly broken and insecure, because you can query
# arbitrary paths on the server! So nobody enables this.
# What we have to do is disable *all* files. We could then just checkout
# the path, and it'd "work", but be hopelessly slow...Because it goes and
# transfers every missing object one-by-one. So the final piece is that we
# need to use some weird git internals to fetch the missings in bulk, and
# *that* we can do by path.
# We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir:
# This is the "clone, but don't download anything" part.
cmd = (
f"git clone {repo} {tmp_dir} --no-checkout "
"--depth 1 --config core.sparseCheckout=true"
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
"--filter=blob:none" # <-- The key bit
)
if branch is not None:
cmd = f"{cmd} -b {branch}"
run_command(cmd)
with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f:
f.write(subpath)
run_command(["git", "-C", str(tmp_dir), "fetch"])
run_command(["git", "-C", str(tmp_dir), "checkout"])
run_command(cmd, capture=True)
# Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = run_command(cmd, capture=True)
missings = "\n".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
# Now pass those missings into another bit of git internals
run_command(
f"git -C {tmp_dir} fetch-pack --stdin {repo}", capture=True, stdin=missings
)
# And finally, we can checkout our subpath
run_command(f"git -C {tmp_dir} checkout {branch} {subpath}")
# We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
print(dest)
print(list(dest.iterdir()))

View File

@ -30,11 +30,11 @@ class PRFScore:
@property
def precision(self) -> float:
return (self.tp / (self.tp + self.fp + 1e-100))
return self.tp / (self.tp + self.fp + 1e-100)
@property
def recall(self) -> float:
return (self.tp / (self.tp + self.fn + 1e-100))
return self.tp / (self.tp + self.fn + 1e-100)
@property
def fscore(self) -> float:

View File

@ -572,7 +572,7 @@ def join_command(command: List[str]) -> str:
return " ".join(shlex.quote(cmd) for cmd in command)
def run_command(command: Union[str, List[str]]) -> None:
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
"""Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed.
@ -582,13 +582,21 @@ def run_command(command: Union[str, List[str]]) -> None:
if isinstance(command, str):
command = split_command(command)
try:
status = subprocess.call(command, env=os.environ.copy())
ret = subprocess.run(
command,
env=os.environ.copy(),
capture_output=capture,
input=stdin,
text=True,
check=True,
)
except FileNotFoundError:
raise FileNotFoundError(
Errors.E970.format(str_command=" ".join(command), tool=command[0])
) from None
if status != 0:
sys.exit(status)
if ret.returncode != 0:
sys.exit(ret.returncode)
return ret
@contextmanager