From 9073d99fc92efa325c3a994ea54bb8603c78d6ee Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 10:22:59 +0200 Subject: [PATCH 1/4] fix link to shape inference section --- website/docs/api/dependencyparser.md | 3 ++- website/docs/api/entityrecognizer.md | 3 ++- website/docs/api/language.md | 4 ++-- website/docs/api/morphologizer.md | 3 ++- website/docs/api/pipe.md | 2 +- website/docs/api/tagger.md | 4 ++-- website/docs/api/textcategorizer.md | 4 ++-- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/website/docs/api/dependencyparser.md b/website/docs/api/dependencyparser.md index ed5e8bdb2..5bd2ea8ad 100644 --- a/website/docs/api/dependencyparser.md +++ b/website/docs/api/dependencyparser.md @@ -297,7 +297,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you provide a **representative data sample** to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the output dimension -will be [inferred](/usage/layers-architectures#shape-inference) automatically. +will be [inferred](/usage/layers-architectures#thinc-shape-inference) +automatically. > #### Example > diff --git a/website/docs/api/entityrecognizer.md b/website/docs/api/entityrecognizer.md index fc6904824..9189fe763 100644 --- a/website/docs/api/entityrecognizer.md +++ b/website/docs/api/entityrecognizer.md @@ -285,7 +285,8 @@ Add a new label to the pipe. Note that you don't have to call this method if you provide a **representative data sample** to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the output dimension -will be [inferred](/usage/layers-architectures#shape-inference) automatically. +will be [inferred](/usage/layers-architectures#thinc-shape-inference) +automatically. > #### Example > diff --git a/website/docs/api/language.md b/website/docs/api/language.md index 9c9ccb6cf..530f7740d 100644 --- a/website/docs/api/language.md +++ b/website/docs/api/language.md @@ -205,8 +205,8 @@ examples can either be the full training data or a representative sample. They are used to **initialize the models** of trainable pipeline components and are passed each component's [`begin_training`](/api/pipe#begin_training) method, if available. Initialization includes validating the network, -[inferring missing shapes](/usage/layers-architectures#shape-inference) and -setting up the label scheme based on the data. +[inferring missing shapes](/usage/layers-architectures#thinc-shape-inference) +and setting up the label scheme based on the data. If no `get_examples` function is provided when calling `nlp.begin_training`, the pipeline components will be initialized with generic data. In this case, it is diff --git a/website/docs/api/morphologizer.md b/website/docs/api/morphologizer.md index c83d3d9fd..c4787c050 100644 --- a/website/docs/api/morphologizer.md +++ b/website/docs/api/morphologizer.md @@ -263,7 +263,8 @@ already been fully [initialized](#begin_training). Note that you don't have to call this method if you provide a **representative data sample** to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the output dimension -will be [inferred](/usage/layers-architectures#shape-inference) automatically. +will be [inferred](/usage/layers-architectures#thinc-shape-inference) +automatically. > #### Example > diff --git a/website/docs/api/pipe.md b/website/docs/api/pipe.md index be1279553..ec4b0ff1b 100644 --- a/website/docs/api/pipe.md +++ b/website/docs/api/pipe.md @@ -317,7 +317,7 @@ Note that in general, you don't have to call `pipe.add_label` if you provide a representative data sample to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the output dimension will be -[inferred](/usage/layers-architectures#shape-inference) automatically. +[inferred](/usage/layers-architectures#thinc-shape-inference) automatically. ## Pipe.is_resizable {#is_resizable tag="method"} diff --git a/website/docs/api/tagger.md b/website/docs/api/tagger.md index eceb28b19..06def58d5 100644 --- a/website/docs/api/tagger.md +++ b/website/docs/api/tagger.md @@ -293,8 +293,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note that you don't have to call this method if you provide a **representative data sample** to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the -output dimension will be [inferred](/usage/layers-architectures#shape-inference) -automatically. +output dimension will be +[inferred](/usage/layers-architectures#thinc-shape-inference) automatically. > #### Example > diff --git a/website/docs/api/textcategorizer.md b/website/docs/api/textcategorizer.md index 0d71655c6..b296c95ca 100644 --- a/website/docs/api/textcategorizer.md +++ b/website/docs/api/textcategorizer.md @@ -302,8 +302,8 @@ set, or if the model has already been fully [initialized](#begin_training). Note that you don't have to call this method if you provide a **representative data sample** to the [`begin_training`](#begin_training) method. In this case, all labels found in the sample will be automatically added to the model, and the -output dimension will be [inferred](/usage/layers-architectures#shape-inference) -automatically. +output dimension will be +[inferred](/usage/layers-architectures#thinc-shape-inference) automatically. > #### Example > From 1fc54867921c9e5e110e45bd7168fb1cd9337ec1 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 11:31:32 +0200 Subject: [PATCH 2/4] more fine-grained errors for git_sparse_checkout --- spacy/cli/_util.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index ee83ec75e..ea3ff77a8 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -6,6 +6,7 @@ from wasabi import msg import srsly import hashlib import typer +import subprocess from click import NoSuchOption from typer.main import get_command from contextlib import contextmanager @@ -326,22 +327,31 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m f"--filter=blob:none " # <-- The key bit f"-b {branch}" ) - run_command(cmd, capture=True) + _attempt_run_command(cmd) # Now we need to find the missing filenames for the subpath we want. # Looking for this 'rev-list' command in the git --help? Hah. cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" - ret = run_command(cmd, capture=True) + ret = _attempt_run_command(cmd) repo = _from_http_to_git(repo) # Now pass those missings into another bit of git internals missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) + if not missings: + err = f"Could not find any relevant files for '{subpath}'. Did you specify a correct and complete Git path?" + msg.fail(err, exits=1) cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}" - run_command(cmd, capture=True) + _attempt_run_command(cmd, capture=True) # And finally, we can checkout our subpath cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" - run_command(cmd) + _attempt_run_command(cmd) # We need Path(name) to make sure we also support subdirectories shutil.move(str(tmp_dir / Path(subpath)), str(dest)) +def _attempt_run_command(cmd): + try: + return run_command(cmd, capture=True) + except subprocess.CalledProcessError as e: + err = f"Could not run command: {cmd}." + msg.fail(err, exits=1) def _from_http_to_git(repo): if repo.startswith("http://"): From 92f9d2f406540afa021fe85c77f178c2c9e3cb57 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 11:35:50 +0200 Subject: [PATCH 3/4] small UX fixes --- spacy/cli/_util.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index ea3ff77a8..ae39f7ecb 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -332,14 +332,15 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m # Looking for this 'rev-list' command in the git --help? Hah. cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" ret = _attempt_run_command(cmd) - repo = _from_http_to_git(repo) + git_repo = _from_http_to_git(repo) # Now pass those missings into another bit of git internals missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) if not missings: - err = f"Could not find any relevant files for '{subpath}'. Did you specify a correct and complete Git path?" + err = f"Could not find any relevant files for '{subpath}'. " \ + f"Did you specify a correct and complete path within repo '{repo}'?" msg.fail(err, exits=1) - cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}" - _attempt_run_command(cmd, capture=True) + cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" + _attempt_run_command(cmd) # And finally, we can checkout our subpath cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" _attempt_run_command(cmd) From a36766d153ebaddae9e2d085621ef32f548c3986 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 12:00:34 +0200 Subject: [PATCH 4/4] hookup branch --- spacy/cli/_util.py | 3 ++- spacy/cli/project/clone.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index ae39f7ecb..b03f3eb69 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -337,7 +337,8 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) if not missings: err = f"Could not find any relevant files for '{subpath}'. " \ - f"Did you specify a correct and complete path within repo '{repo}'?" + f"Did you specify a correct and complete path within repo '{repo}' " \ + f"and branch {branch}?" msg.fail(err, exits=1) cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" _attempt_run_command(cmd) diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py index c6d261097..427df490f 100644 --- a/spacy/cli/project/clone.py +++ b/spacy/cli/project/clone.py @@ -28,7 +28,7 @@ def project_clone_cli( """ if dest is None: dest = Path.cwd() / name - project_clone(name, dest, repo=repo) + project_clone(name, dest, repo=repo, branch=branch) def project_clone( @@ -43,13 +43,14 @@ def project_clone( name (str): Name of subdirectory to clone. dest (Path): Destination path of cloned project. repo (str): URL of Git repo containing project templates. + branch (str): The branch to clone from """ dest = ensure_path(dest) check_clone(name, dest, repo) project_dir = dest.resolve() repo_name = re.sub(r"(http(s?)):\/\/github.com/", "", repo) try: - git_sparse_checkout(repo, name, dest) + git_sparse_checkout(repo, name, dest, branch=branch) except subprocess.CalledProcessError: err = f"Could not clone '{name}' from repo '{repo_name}'" msg.fail(err, exits=1)