Merge pull request #6047 from svlandeg/feature/doc-fixes

Fix branch for spacy clone + UX
This commit is contained in:
Ines Montani 2020-09-10 13:05:41 +02:00 committed by GitHub
commit 9f08ea80b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 34 additions and 18 deletions

View File

@ -6,6 +6,7 @@ from wasabi import msg
import srsly import srsly
import hashlib import hashlib
import typer import typer
import subprocess
from click import NoSuchOption from click import NoSuchOption
from typer.main import get_command from typer.main import get_command
from contextlib import contextmanager from contextlib import contextmanager
@ -326,22 +327,33 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
f"--filter=blob:none " # <-- The key bit f"--filter=blob:none " # <-- The key bit
f"-b {branch}" f"-b {branch}"
) )
run_command(cmd, capture=True) _attempt_run_command(cmd)
# Now we need to find the missing filenames for the subpath we want. # Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah. # Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = run_command(cmd, capture=True) ret = _attempt_run_command(cmd)
repo = _from_http_to_git(repo) git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals # Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}" if not missings:
run_command(cmd, capture=True) err = f"Could not find any relevant files for '{subpath}'. " \
f"Did you specify a correct and complete path within repo '{repo}' " \
f"and branch {branch}?"
msg.fail(err, exits=1)
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
_attempt_run_command(cmd)
# And finally, we can checkout our subpath # And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
run_command(cmd) _attempt_run_command(cmd)
# We need Path(name) to make sure we also support subdirectories # We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest)) shutil.move(str(tmp_dir / Path(subpath)), str(dest))
def _attempt_run_command(cmd):
try:
return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e:
err = f"Could not run command: {cmd}."
msg.fail(err, exits=1)
def _from_http_to_git(repo): def _from_http_to_git(repo):
if repo.startswith("http://"): if repo.startswith("http://"):

View File

@ -28,7 +28,7 @@ def project_clone_cli(
""" """
if dest is None: if dest is None:
dest = Path.cwd() / name dest = Path.cwd() / name
project_clone(name, dest, repo=repo) project_clone(name, dest, repo=repo, branch=branch)
def project_clone( def project_clone(
@ -43,13 +43,14 @@ def project_clone(
name (str): Name of subdirectory to clone. name (str): Name of subdirectory to clone.
dest (Path): Destination path of cloned project. dest (Path): Destination path of cloned project.
repo (str): URL of Git repo containing project templates. repo (str): URL of Git repo containing project templates.
branch (str): The branch to clone from
""" """
dest = ensure_path(dest) dest = ensure_path(dest)
check_clone(name, dest, repo) check_clone(name, dest, repo)
project_dir = dest.resolve() project_dir = dest.resolve()
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo) repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
try: try:
git_sparse_checkout(repo, name, dest) git_sparse_checkout(repo, name, dest, branch=branch)
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
err = f"Could not clone '{name}' from repo '{repo_name}'" err = f"Could not clone '{name}' from repo '{repo_name}'"
msg.fail(err, exits=1) msg.fail(err, exits=1)

View File

@ -297,7 +297,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
provide a **representative data sample** to the provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in [`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically. will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example > #### Example
> >

View File

@ -285,7 +285,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
provide a **representative data sample** to the provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in [`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically. will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example > #### Example
> >

View File

@ -205,8 +205,8 @@ examples can either be the full training data or a representative sample. They
are used to **initialize the models** of trainable pipeline components and are are used to **initialize the models** of trainable pipeline components and are
passed each component's [`begin_training`](/api/pipe#begin_training) method, if passed each component's [`begin_training`](/api/pipe#begin_training) method, if
available. Initialization includes validating the network, available. Initialization includes validating the network,
[inferring missing shapes](/usage/layers-architectures#shape-inference) and [inferring missing shapes](/usage/layers-architectures#thinc-shape-inference)
setting up the label scheme based on the data. and setting up the label scheme based on the data.
If no `get_examples` function is provided when calling `nlp.begin_training`, the If no `get_examples` function is provided when calling `nlp.begin_training`, the
pipeline components will be initialized with generic data. In this case, it is pipeline components will be initialized with generic data. In this case, it is

View File

@ -263,7 +263,8 @@ already been fully [initialized](#begin_training). Note that you don't have to
call this method if you provide a **representative data sample** to the call this method if you provide a **representative data sample** to the
[`begin_training`](#begin_training) method. In this case, all labels found in [`begin_training`](#begin_training) method. In this case, all labels found in
the sample will be automatically added to the model, and the output dimension the sample will be automatically added to the model, and the output dimension
will be [inferred](/usage/layers-architectures#shape-inference) automatically. will be [inferred](/usage/layers-architectures#thinc-shape-inference)
automatically.
> #### Example > #### Example
> >

View File

@ -317,7 +317,7 @@ Note that in general, you don't have to call `pipe.add_label` if you provide a
representative data sample to the [`begin_training`](#begin_training) method. In representative data sample to the [`begin_training`](#begin_training) method. In
this case, all labels found in the sample will be automatically added to the this case, all labels found in the sample will be automatically added to the
model, and the output dimension will be model, and the output dimension will be
[inferred](/usage/layers-architectures#shape-inference) automatically. [inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
## Pipe.is_resizable {#is_resizable tag="method"} ## Pipe.is_resizable {#is_resizable tag="method"}

View File

@ -293,8 +293,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
that you don't have to call this method if you provide a **representative data that you don't have to call this method if you provide a **representative data
sample** to the [`begin_training`](#begin_training) method. In this case, all sample** to the [`begin_training`](#begin_training) method. In this case, all
labels found in the sample will be automatically added to the model, and the labels found in the sample will be automatically added to the model, and the
output dimension will be [inferred](/usage/layers-architectures#shape-inference) output dimension will be
automatically. [inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
> #### Example > #### Example
> >

View File

@ -302,8 +302,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
that you don't have to call this method if you provide a **representative data that you don't have to call this method if you provide a **representative data
sample** to the [`begin_training`](#begin_training) method. In this case, all sample** to the [`begin_training`](#begin_training) method. In this case, all
labels found in the sample will be automatically added to the model, and the labels found in the sample will be automatically added to the model, and the
output dimension will be [inferred](/usage/layers-architectures#shape-inference) output dimension will be
automatically. [inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
> #### Example > #### Example
> >