mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-27 20:33:42 +03:00
Merge pull request #6047 from svlandeg/feature/doc-fixes
Fix branch for spacy clone + UX
This commit is contained in:
commit
9f08ea80b4
|
@ -6,6 +6,7 @@ from wasabi import msg
|
||||||
import srsly
|
import srsly
|
||||||
import hashlib
|
import hashlib
|
||||||
import typer
|
import typer
|
||||||
|
import subprocess
|
||||||
from click import NoSuchOption
|
from click import NoSuchOption
|
||||||
from typer.main import get_command
|
from typer.main import get_command
|
||||||
from contextlib import contextmanager
|
from contextlib import contextmanager
|
||||||
|
@ -326,22 +327,33 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
||||||
f"--filter=blob:none " # <-- The key bit
|
f"--filter=blob:none " # <-- The key bit
|
||||||
f"-b {branch}"
|
f"-b {branch}"
|
||||||
)
|
)
|
||||||
run_command(cmd, capture=True)
|
_attempt_run_command(cmd)
|
||||||
# Now we need to find the missing filenames for the subpath we want.
|
# Now we need to find the missing filenames for the subpath we want.
|
||||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||||||
ret = run_command(cmd, capture=True)
|
ret = _attempt_run_command(cmd)
|
||||||
repo = _from_http_to_git(repo)
|
git_repo = _from_http_to_git(repo)
|
||||||
# Now pass those missings into another bit of git internals
|
# Now pass those missings into another bit of git internals
|
||||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||||
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
|
if not missings:
|
||||||
run_command(cmd, capture=True)
|
err = f"Could not find any relevant files for '{subpath}'. " \
|
||||||
|
f"Did you specify a correct and complete path within repo '{repo}' " \
|
||||||
|
f"and branch {branch}?"
|
||||||
|
msg.fail(err, exits=1)
|
||||||
|
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||||
|
_attempt_run_command(cmd)
|
||||||
# And finally, we can checkout our subpath
|
# And finally, we can checkout our subpath
|
||||||
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||||||
run_command(cmd)
|
_attempt_run_command(cmd)
|
||||||
# We need Path(name) to make sure we also support subdirectories
|
# We need Path(name) to make sure we also support subdirectories
|
||||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||||
|
|
||||||
|
def _attempt_run_command(cmd):
|
||||||
|
try:
|
||||||
|
return run_command(cmd, capture=True)
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
err = f"Could not run command: {cmd}."
|
||||||
|
msg.fail(err, exits=1)
|
||||||
|
|
||||||
def _from_http_to_git(repo):
|
def _from_http_to_git(repo):
|
||||||
if repo.startswith("http://"):
|
if repo.startswith("http://"):
|
||||||
|
|
|
@ -28,7 +28,7 @@ def project_clone_cli(
|
||||||
"""
|
"""
|
||||||
if dest is None:
|
if dest is None:
|
||||||
dest = Path.cwd() / name
|
dest = Path.cwd() / name
|
||||||
project_clone(name, dest, repo=repo)
|
project_clone(name, dest, repo=repo, branch=branch)
|
||||||
|
|
||||||
|
|
||||||
def project_clone(
|
def project_clone(
|
||||||
|
@ -43,13 +43,14 @@ def project_clone(
|
||||||
name (str): Name of subdirectory to clone.
|
name (str): Name of subdirectory to clone.
|
||||||
dest (Path): Destination path of cloned project.
|
dest (Path): Destination path of cloned project.
|
||||||
repo (str): URL of Git repo containing project templates.
|
repo (str): URL of Git repo containing project templates.
|
||||||
|
branch (str): The branch to clone from
|
||||||
"""
|
"""
|
||||||
dest = ensure_path(dest)
|
dest = ensure_path(dest)
|
||||||
check_clone(name, dest, repo)
|
check_clone(name, dest, repo)
|
||||||
project_dir = dest.resolve()
|
project_dir = dest.resolve()
|
||||||
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
|
||||||
try:
|
try:
|
||||||
git_sparse_checkout(repo, name, dest)
|
git_sparse_checkout(repo, name, dest, branch=branch)
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||||
msg.fail(err, exits=1)
|
msg.fail(err, exits=1)
|
||||||
|
|
|
@ -297,7 +297,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
|
||||||
provide a **representative data sample** to the
|
provide a **representative data sample** to the
|
||||||
[`begin_training`](#begin_training) method. In this case, all labels found in
|
[`begin_training`](#begin_training) method. In this case, all labels found in
|
||||||
the sample will be automatically added to the model, and the output dimension
|
the sample will be automatically added to the model, and the output dimension
|
||||||
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
|
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
|
||||||
|
automatically.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
|
|
@ -285,7 +285,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
|
||||||
provide a **representative data sample** to the
|
provide a **representative data sample** to the
|
||||||
[`begin_training`](#begin_training) method. In this case, all labels found in
|
[`begin_training`](#begin_training) method. In this case, all labels found in
|
||||||
the sample will be automatically added to the model, and the output dimension
|
the sample will be automatically added to the model, and the output dimension
|
||||||
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
|
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
|
||||||
|
automatically.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
|
|
@ -205,8 +205,8 @@ examples can either be the full training data or a representative sample. They
|
||||||
are used to **initialize the models** of trainable pipeline components and are
|
are used to **initialize the models** of trainable pipeline components and are
|
||||||
passed each component's [`begin_training`](/api/pipe#begin_training) method, if
|
passed each component's [`begin_training`](/api/pipe#begin_training) method, if
|
||||||
available. Initialization includes validating the network,
|
available. Initialization includes validating the network,
|
||||||
[inferring missing shapes](/usage/layers-architectures#shape-inference) and
|
[inferring missing shapes](/usage/layers-architectures#thinc-shape-inference)
|
||||||
setting up the label scheme based on the data.
|
and setting up the label scheme based on the data.
|
||||||
|
|
||||||
If no `get_examples` function is provided when calling `nlp.begin_training`, the
|
If no `get_examples` function is provided when calling `nlp.begin_training`, the
|
||||||
pipeline components will be initialized with generic data. In this case, it is
|
pipeline components will be initialized with generic data. In this case, it is
|
||||||
|
|
|
@ -263,7 +263,8 @@ already been fully [initialized](#begin_training). Note that you don't have to
|
||||||
call this method if you provide a **representative data sample** to the
|
call this method if you provide a **representative data sample** to the
|
||||||
[`begin_training`](#begin_training) method. In this case, all labels found in
|
[`begin_training`](#begin_training) method. In this case, all labels found in
|
||||||
the sample will be automatically added to the model, and the output dimension
|
the sample will be automatically added to the model, and the output dimension
|
||||||
will be [inferred](/usage/layers-architectures#shape-inference) automatically.
|
will be [inferred](/usage/layers-architectures#thinc-shape-inference)
|
||||||
|
automatically.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
|
|
@ -317,7 +317,7 @@ Note that in general, you don't have to call `pipe.add_label` if you provide a
|
||||||
representative data sample to the [`begin_training`](#begin_training) method. In
|
representative data sample to the [`begin_training`](#begin_training) method. In
|
||||||
this case, all labels found in the sample will be automatically added to the
|
this case, all labels found in the sample will be automatically added to the
|
||||||
model, and the output dimension will be
|
model, and the output dimension will be
|
||||||
[inferred](/usage/layers-architectures#shape-inference) automatically.
|
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
|
||||||
|
|
||||||
## Pipe.is_resizable {#is_resizable tag="method"}
|
## Pipe.is_resizable {#is_resizable tag="method"}
|
||||||
|
|
||||||
|
|
|
@ -293,8 +293,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
|
||||||
that you don't have to call this method if you provide a **representative data
|
that you don't have to call this method if you provide a **representative data
|
||||||
sample** to the [`begin_training`](#begin_training) method. In this case, all
|
sample** to the [`begin_training`](#begin_training) method. In this case, all
|
||||||
labels found in the sample will be automatically added to the model, and the
|
labels found in the sample will be automatically added to the model, and the
|
||||||
output dimension will be [inferred](/usage/layers-architectures#shape-inference)
|
output dimension will be
|
||||||
automatically.
|
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
|
|
@ -302,8 +302,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
|
||||||
that you don't have to call this method if you provide a **representative data
|
that you don't have to call this method if you provide a **representative data
|
||||||
sample** to the [`begin_training`](#begin_training) method. In this case, all
|
sample** to the [`begin_training`](#begin_training) method. In this case, all
|
||||||
labels found in the sample will be automatically added to the model, and the
|
labels found in the sample will be automatically added to the model, and the
|
||||||
output dimension will be [inferred](/usage/layers-architectures#shape-inference)
|
output dimension will be
|
||||||
automatically.
|
[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.
|
||||||
|
|
||||||
> #### Example
|
> #### Example
|
||||||
>
|
>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user