Merge remote-tracking branch 'upstream/develop' into feature/cli-config

This commit is contained in:
svlandeg 2020-09-12 14:44:40 +02:00
commit a75cfe0da6
7 changed files with 43 additions and 19 deletions

View File

@ -1,4 +1,4 @@
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
import sys import sys
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -321,29 +321,37 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
# *that* we can do by path. # *that* we can do by path.
# We're using Git and sparse checkout to only clone the files we need # We're using Git and sparse checkout to only clone the files we need
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
git_version = get_git_version()
supports_sparse = git_version >= (2, 22)
# This is the "clone, but don't download anything" part. # This is the "clone, but don't download anything" part.
cmd = ( cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " if supports_sparse:
f"--filter=blob:none " # <-- The key bit cmd += f"--filter=blob:none" # <-- The key bit
f"-b {branch}" else:
) msg.warn(
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
f"that doesn't fully support sparse checkout yet. This means that "
f"more files than necessary may be downloaded temporarily. To "
f"only download the files needed, upgrade to Git v2.22 or above."
)
_attempt_run_command(cmd) _attempt_run_command(cmd)
# Now we need to find the missing filenames for the subpath we want. # Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah. # Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
ret = _attempt_run_command(cmd) ret = _attempt_run_command(cmd)
git_repo = _from_http_to_git(repo) git_repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals # Now pass those missings into another bit of git internals
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
if not missings: if supports_sparse and not missings:
err = ( err = (
f"Could not find any relevant files for '{subpath}'. " f"Could not find any relevant files for '{subpath}'. "
f"Did you specify a correct and complete path within repo '{repo}' " f"Did you specify a correct and complete path within repo '{repo}' "
f"and branch {branch}?" f"and branch {branch}?"
) )
msg.fail(err, exits=1) msg.fail(err, exits=1)
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" if supports_sparse:
_attempt_run_command(cmd) cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
_attempt_run_command(cmd)
# And finally, we can checkout our subpath # And finally, we can checkout our subpath
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
_attempt_run_command(cmd) _attempt_run_command(cmd)
@ -351,15 +359,24 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
shutil.move(str(tmp_dir / Path(subpath)), str(dest)) shutil.move(str(tmp_dir / Path(subpath)), str(dest))
def _attempt_run_command(cmd): def get_git_version() -> Tuple[int, int]:
ret = _attempt_run_command(["git", "--version"])
# TODO: this seems kinda brittle?
version = ret.stdout[11:].strip().split(".")
return (int(version[0]), int(version[1]))
def _attempt_run_command(cmd: Union[str, List[str]]):
try: try:
return run_command(cmd, capture=True) return run_command(cmd, capture=True)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
err = f"Could not run command: {cmd}." err = f"Could not run command"
msg.fail(err, exits=1) msg.fail(err)
print(cmd)
sys.exit(1)
def _from_http_to_git(repo): def _from_http_to_git(repo: str) -> str:
if repo.startswith("http://"): if repo.startswith("http://"):
repo = repo.replace(r"http://", r"https://") repo = repo.replace(r"http://", r"https://")
if repo.startswith(r"https://"): if repo.startswith(r"https://"):

View File

@ -18,6 +18,7 @@ def package_cli(
output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"), create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"),
name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"), version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"), no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"),
force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"), force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
@ -38,6 +39,7 @@ def package_cli(
input_dir, input_dir,
output_dir, output_dir,
meta_path=meta_path, meta_path=meta_path,
name=name,
version=version, version=version,
create_meta=create_meta, create_meta=create_meta,
create_sdist=not no_sdist, create_sdist=not no_sdist,
@ -50,6 +52,7 @@ def package(
input_dir: Path, input_dir: Path,
output_dir: Path, output_dir: Path,
meta_path: Optional[Path] = None, meta_path: Optional[Path] = None,
name: Optional[str] = None,
version: Optional[str] = None, version: Optional[str] = None,
create_meta: bool = False, create_meta: bool = False,
create_sdist: bool = True, create_sdist: bool = True,
@ -71,6 +74,8 @@ def package(
msg.fail("Can't load pipeline meta.json", meta_path, exits=1) msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
meta = srsly.read_json(meta_path) meta = srsly.read_json(meta_path)
meta = get_meta(input_dir, meta) meta = get_meta(input_dir, meta)
if name is not None:
meta["name"] = name
if version is not None: if version is not None:
meta["version"] = version meta["version"] = version
if not create_meta: # only print if user doesn't want to overwrite if not create_meta: # only print if user doesn't want to overwrite

View File

@ -27,7 +27,7 @@ def project_clone_cli(
DOCS: https://nightly.spacy.io/api/cli#project-clone DOCS: https://nightly.spacy.io/api/cli#project-clone
""" """
if dest is None: if dest is None:
dest = Path.cwd() / name dest = Path.cwd() / Path(name).parts[-1]
project_clone(name, dest, repo=repo, branch=branch) project_clone(name, dest, repo=repo, branch=branch)

View File

@ -243,7 +243,8 @@ class Language:
self._config["nlp"]["pipeline"] = list(self.component_names) self._config["nlp"]["pipeline"] = list(self.component_names)
self._config["nlp"]["disabled"] = list(self.disabled) self._config["nlp"]["disabled"] = list(self.disabled)
self._config["components"] = pipeline self._config["components"] = pipeline
self._config["training"]["score_weights"] = combine_score_weights(score_weights) if not self._config["training"].get("score_weights"):
self._config["training"]["score_weights"] = combine_score_weights(score_weights)
if not srsly.is_json_serializable(self._config): if not srsly.is_json_serializable(self._config):
raise ValueError(Errors.E961.format(config=self._config)) raise ValueError(Errors.E961.format(config=self._config))
return self._config return self._config

View File

@ -180,7 +180,7 @@ class ModelMetaSchema(BaseModel):
url: StrictStr = Field("", title="Model author URL") url: StrictStr = Field("", title="Model author URL")
sources: Optional[Union[List[StrictStr], List[Dict[str, str]]]] = Field(None, title="Training data sources") sources: Optional[Union[List[StrictStr], List[Dict[str, str]]]] = Field(None, title="Training data sources")
vectors: Dict[str, Any] = Field({}, title="Included word vectors") vectors: Dict[str, Any] = Field({}, title="Included word vectors")
labels: Dict[str, Dict[str, List[str]]] = Field({}, title="Component labels, keyed by component name") labels: Dict[str, List[str]] = Field({}, title="Component labels, keyed by component name")
accuracy: Dict[str, Union[float, Dict[str, float]]] = Field({}, title="Accuracy numbers") accuracy: Dict[str, Union[float, Dict[str, float]]] = Field({}, title="Accuracy numbers")
speed: Dict[str, Union[float, int]] = Field({}, title="Speed evaluation numbers") speed: Dict[str, Union[float, int]] = Field({}, title="Speed evaluation numbers")
spacy_git_version: StrictStr = Field("", title="Commit of spaCy version used") spacy_git_version: StrictStr = Field("", title="Commit of spaCy version used")

View File

@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str:
return " ".join(shlex.quote(cmd) for cmd in command) return " ".join(shlex.quote(cmd) for cmd in command)
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None: def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
"""Run a command on the command line as a subprocess. If the subprocess """Run a command on the command line as a subprocess. If the subprocess
returns a non-zero exit code, a system exit is performed. returns a non-zero exit code, a system exit is performed.

View File

@ -852,7 +852,7 @@ this, you can set the `--no-sdist` flag.
</Infobox> </Infobox>
```cli ```cli
$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--version] [--force] $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--name] [--version] [--force]
``` ```
> #### Example > #### Example
@ -870,6 +870,7 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta]
| `--meta-path`, `-m` <Tag variant="new">2</Tag> | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~ | | `--meta-path`, `-m` <Tag variant="new">2</Tag> | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~ |
| `--create-meta`, `-C` <Tag variant="new">2</Tag> | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ | | `--create-meta`, `-C` <Tag variant="new">2</Tag> | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ |
| `--no-sdist`, `-NS`, | Don't build the `.tar.gz` sdist automatically. Can be set if you want to run this step manually. ~~bool (flag)~~ | | `--no-sdist`, `-NS`, | Don't build the `.tar.gz` sdist automatically. Can be set if you want to run this step manually. ~~bool (flag)~~ |
| `--name`, `-n` <Tag variant="new">3</Tag> | Package name to override in meta. ~~Optional[str] \(option)~~ |
| `--version`, `-v` <Tag variant="new">3</Tag> | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~ | | `--version`, `-v` <Tag variant="new">3</Tag> | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~ |
| `--force`, `-f` | Force overwriting of existing folder in output directory. ~~bool (flag)~~ | | `--force`, `-f` | Force overwriting of existing folder in output directory. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |