Merge pull request #6047 from svlandeg/feature/doc-fixes

Fix branch for spacy clone + UX
This commit is contained in:
Ines Montani 2020-09-10 13:05:41 +02:00 committed by GitHub
commit 9f08ea80b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 34 additions and 18 deletions

View File

@ -6,6 +6,7 @@ from wasabi import msg
import srsly
import hashlib
import typer
import subprocess
from click import NoSuchOption
from typer.main import get_command
from contextlib import contextmanager
@ -326,22 +327,33 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
f"--filter=blob:none " # <-- The key bit
f"-b {branch}"
)
run_command(cmd, capture=True)
_attempt_run_command(cmd)
# Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = run_command(cmd, capture=True)
repo = _from_http_to_git(repo)
ret = _attempt_run_command(cmd)
git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
run_command(cmd, capture=True)
if not missings:
err = f"Could not find any relevant files for '{subpath}'. " \
f"Did you specify a correct and complete path within repo '{repo}' " \
f"and branch {branch}?"
msg.fail(err, exits=1)
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
_attempt_run_command(cmd)
# And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
run_command(cmd)
_attempt_run_command(cmd)
# We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
def _attempt_run_command(cmd):
try:
return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e:
err = f"Could not run command: {cmd}."
msg.fail(err, exits=1)
def _from_http_to_git(repo):
if repo.startswith("http://"):

View File

@ -28,7 +28,7 @@ def project_clone_cli(
"""
if dest is None:
dest = Path.cwd() / name
project_clone(name, dest, repo=repo)
project_clone(name, dest, repo=repo, branch=branch)
def project_clone(
@ -43,13 +43,14 @@ def project_clone(
name (str): Name of subdirectory to clone.
dest (Path): Destination path of cloned project.
repo (str): URL of Git repo containing project templates.
branch (str): The branch to clone from
"""
dest = ensure_path(dest)
check_clone(name, dest, repo)
project_dir = dest.resolve()
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
try:
git_sparse_checkout(repo, name, dest)
git_sparse_checkout(repo, name, dest, branch=branch)
except subprocess.CalledProcessError:
err = f"Could not clone '{name}' from repo '{repo_name}'"
msg.fail(err, exits=1)

View File

@ -297,7 +297,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example
>

View File

@ -285,7 +285,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example
>

View File

@ -205,8 +205,8 @@ examples can either be the full training data or a representative sample. They
are used to **initialize the models** of trainable pipeline components and are
passed each component's [`begin_training`](/api/pipe#begin_training) method, if
available. Initialization includes validating the network,
[inferring missing shapes](/usage/layers-architectures#shape-inference) and
setting up the label scheme based on the data.
[inferring missing shapes](/usage/layers-architectures#thinc-shape-inference)
and setting up the label scheme based on the data.
If no `get_examples` function is provided when calling `nlp.begin_training`, the
pipeline components will be initialized with generic data. In this case, it is

View File

@ -263,7 +263,8 @@ already been fully [initialized](#begin_training). Note that you don't have to
call this method if you provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example
>

View File

@ -317,7 +317,7 @@ Note that in general, you don't have to call `pipe.add_label` if you provide a
representative data sample to the [`begin_training`](#begin_training) method. In
this case, all labels found in the sample will be automatically added to the
model, and the output dimension will be
[inferred](/usage/layers-architectures#shape-inference) automatically.
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
## Pipe.is_resizable {#is_resizable tag="method"}

View File

@ -293,8 +293,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
that you don't have to call this method if you provide a **representative data
sample** to the [`begin_training`](#begin_training) method. In this case, all
labels found in the sample will be automatically added to the model, and the
output dimension will be [inferred](/usage/layers-architectures#shape-inference)
automatically.
output dimension will be
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
> #### Example
>

View File

@ -302,8 +302,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
that you don't have to call this method if you provide a **representative data
sample** to the [`begin_training`](#begin_training) method. In this case, all
labels found in the sample will be automatically added to the model, and the
output dimension will be [inferred](/usage/layers-architectures#shape-inference)
automatically.
output dimension will be
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
> #### Example
>