Merge pull request #6047 from svlandeg/feature/doc-fixes

Fix branch for spacy clone + UX
2025-08-09 06:34:54 +03:00 · 2020-09-10 13:05:41 +02:00 · 2020-09-10 13:05:41 +02:00 · 9f08ea80b4
commit 9f08ea80b4
parent 763e302dcc a36766d153
9 changed files with 34 additions and 18 deletions
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -6,6 +6,7 @@ from wasabi import msg
 import srsly
 import hashlib
 import typer
+import subprocess
 from click import NoSuchOption
 from typer.main import get_command
 from contextlib import contextmanager
@ -326,22 +327,33 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
            f"--filter=blob:none "  # <-- The key bit
            f"-b {branch}"
        )
-        run_command(cmd, capture=True)
+        _attempt_run_command(cmd)
        # Now we need to find the missing filenames for the subpath we want.
        # Looking for this 'rev-list' command in the git --help? Hah.
        cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
-        ret = run_command(cmd, capture=True)
-        repo = _from_http_to_git(repo)
+        ret = _attempt_run_command(cmd)
+        git_repo = _from_http_to_git(repo)
        # Now pass those missings into another bit of git internals
        missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
-        cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
-        run_command(cmd, capture=True)
+        if not missings:
+           err = f"Could not find any relevant files for '{subpath}'. " \
+                 f"Did you specify a correct and complete path within repo '{repo}' " \
+                 f"and branch {branch}?"
+           msg.fail(err, exits=1)
+        cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
+        _attempt_run_command(cmd)
        # And finally, we can checkout our subpath
        cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
-        run_command(cmd)
+        _attempt_run_command(cmd)
        # We need Path(name) to make sure we also support subdirectories
        shutil.move(str(tmp_dir / Path(subpath)), str(dest))

+def _attempt_run_command(cmd):
+    try:
+        return run_command(cmd, capture=True)
+    except subprocess.CalledProcessError as e:
+        err = f"Could not run command: {cmd}."
+        msg.fail(err, exits=1)

 def _from_http_to_git(repo):
    if repo.startswith("http://"):
--- a/spacy/cli/project/clone.py
+++ b/spacy/cli/project/clone.py
@ -28,7 +28,7 @@ def project_clone_cli(
    """
    if dest is None:
        dest = Path.cwd() / name
-    project_clone(name, dest, repo=repo)
+    project_clone(name, dest, repo=repo, branch=branch)


 def project_clone(
@ -43,13 +43,14 @@ def project_clone(
    name (str): Name of subdirectory to clone.
    dest (Path): Destination path of cloned project.
    repo (str): URL of Git repo containing project templates.
+    branch (str): The branch to clone from
    """
    dest = ensure_path(dest)
    check_clone(name, dest, repo)
    project_dir = dest.resolve()
    repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo)
    try:
-        git_sparse_checkout(repo, name, dest)
+        git_sparse_checkout(repo, name, dest, branch=branch)
    except subprocess.CalledProcessError:
        err = f"Could not clone '{name}' from repo '{repo_name}'"
        msg.fail(err, exits=1)
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@ -297,7 +297,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
 provide a **representative data sample** to the
 [`begin_training`](#begin_training) method. In this case, all labels found in
 the sample will be automatically added to the model, and the output dimension
-will be [inferred](/usage/layers-architectures#shape-inference) automatically.
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.

 > #### Example
 >
--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@ -285,7 +285,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you
 provide a **representative data sample** to the
 [`begin_training`](#begin_training) method. In this case, all labels found in
 the sample will be automatically added to the model, and the output dimension
-will be [inferred](/usage/layers-architectures#shape-inference) automatically.
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.

 > #### Example
 >
--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -205,8 +205,8 @@ examples can either be the full training data or a representative sample. They
 are used to **initialize the models** of trainable pipeline components and are
 passed each component's [`begin_training`](/api/pipe#begin_training) method, if
 available. Initialization includes validating the network,
-[inferring missing shapes](/usage/layers-architectures#shape-inference) and
-setting up the label scheme based on the data.
+[inferring missing shapes](/usage/layers-architectures#thinc-shape-inference)
+and setting up the label scheme based on the data.

 If no `get_examples` function is provided when calling `nlp.begin_training`, the
 pipeline components will be initialized with generic data. In this case, it is
--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@ -263,7 +263,8 @@ already been fully [initialized](#begin_training). Note that you don't have to
 call this method if you provide a **representative data sample** to the
 [`begin_training`](#begin_training) method. In this case, all labels found in
 the sample will be automatically added to the model, and the output dimension
-will be [inferred](/usage/layers-architectures#shape-inference) automatically.
+will be [inferred](/usage/layers-architectures#thinc-shape-inference)
+automatically.

 > #### Example
 >
--- a/website/docs/api/pipe.md
+++ b/website/docs/api/pipe.md
@ -317,7 +317,7 @@ Note that in general, you don't have to call `pipe.add_label` if you provide a
 representative data sample to the [`begin_training`](#begin_training) method. In
 this case, all labels found in the sample will be automatically added to the
 model, and the output dimension will be
-[inferred](/usage/layers-architectures#shape-inference) automatically.
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.

 ## Pipe.is_resizable {#is_resizable tag="method"}

--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -293,8 +293,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
 that you don't have to call this method if you provide a **representative data
 sample** to the [`begin_training`](#begin_training) method. In this case, all
 labels found in the sample will be automatically added to the model, and the
-output dimension will be [inferred](/usage/layers-architectures#shape-inference)
-automatically.
+output dimension will be
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.

 > #### Example
 >
--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@ -302,8 +302,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note
 that you don't have to call this method if you provide a **representative data
 sample** to the [`begin_training`](#begin_training) method. In this case, all
 labels found in the sample will be automatically added to the model, and the
-output dimension will be [inferred](/usage/layers-architectures#shape-inference)
-automatically.
+output dimension will be
+[inferred](/usage/layers-architectures#thinc-shape-inference) automatically.

 > #### Example
 >