mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Update project CLI
This commit is contained in:
parent
3f2f5f9cb3
commit
a6b76440b7
|
@ -7,15 +7,26 @@ import subprocess
|
|||
import shlex
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
||||
from ._app import app, Arg, Opt
|
||||
from ._app import app, Arg, Opt, COMMAND
|
||||
from .. import about
|
||||
from ..schemas import ProjectConfigSchema, validate
|
||||
from ..util import ensure_path, run_command
|
||||
from ..util import ensure_path, run_command, make_tempdir, working_dir
|
||||
|
||||
|
||||
CONFIG_FILE = "project.yml"
|
||||
DIRS = ["assets", "configs", "packages", "metrics", "scripts", "notebooks", "training"]
|
||||
DIRS = [
|
||||
"assets",
|
||||
"metas",
|
||||
"configs",
|
||||
"packages",
|
||||
"metrics",
|
||||
"scripts",
|
||||
"notebooks",
|
||||
"training",
|
||||
"corpus",
|
||||
]
|
||||
CACHES = [
|
||||
Path.home() / ".torch",
|
||||
Path.home() / ".caches" / "torch",
|
||||
|
@ -45,28 +56,37 @@ def callback():
|
|||
def project_clone_cli(
|
||||
# fmt: off
|
||||
name: str = Arg(..., help="The name of the template to fetch"),
|
||||
dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=True, file_okay=False),
|
||||
dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False),
|
||||
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
|
||||
verbose: bool = Opt(False, "--verbose", "-V", help="Show detailed information")
|
||||
# fmt: on
|
||||
):
|
||||
"""Clone a project template from a repository."""
|
||||
project_clone(name, dest, repo=repo)
|
||||
project_clone(name, dest, repo=repo, verbose=verbose)
|
||||
|
||||
|
||||
def project_clone(name: str, dest: Path, repo: str = about.__projects__) -> None:
|
||||
def project_clone(
|
||||
name: str, dest: Path, *, repo: str = about.__projects__, verbose: bool = False
|
||||
) -> None:
|
||||
dest = ensure_path(dest)
|
||||
if not dest or not dest.exists() or not dest.is_dir():
|
||||
msg.fail("Not a valid directory to clone project", dest, exits=1)
|
||||
cmd = ["dvc", "get", repo, name, "-o", str(dest)]
|
||||
msg.info(" ".join(cmd))
|
||||
run_command(cmd)
|
||||
check_clone_dest(dest)
|
||||
# When cloning a subdirectory with DVC, it will create a folder of that name
|
||||
# within the destination dir, so we use a tempdir and then copy it into the
|
||||
# parent directory to create the cloned directory
|
||||
with make_tempdir() as tmp_dir:
|
||||
cmd = ["dvc", "get", repo, name, "-o", str(tmp_dir)]
|
||||
if verbose:
|
||||
cmd.append("-v")
|
||||
print(" ".join(cmd))
|
||||
run_command(cmd)
|
||||
shutil.move(str(tmp_dir / Path(name).name), str(dest))
|
||||
msg.good(f"Cloned project '{name}' from {repo}")
|
||||
with msg.loading("Setting up directories..."):
|
||||
for sub_dir in DIRS:
|
||||
dir_path = dest / sub_dir
|
||||
if not dir_path.exists():
|
||||
dir_path.mkdir(parents=True)
|
||||
for sub_dir in DIRS:
|
||||
dir_path = dest / sub_dir
|
||||
if not dir_path.exists():
|
||||
dir_path.mkdir(parents=True)
|
||||
msg.good(f"Your project is now ready!", dest.resolve())
|
||||
print(f"To get the assets, run:\npython -m spacy project get-assets {dest}")
|
||||
|
||||
|
||||
@project_cli.command("get-assets")
|
||||
|
@ -91,7 +111,6 @@ def project_get_assets(project_path: Path) -> None:
|
|||
dest_path = project_path / dest
|
||||
check_asset(url)
|
||||
cmd = ["dvc", "get-url", url, str(dest_path)]
|
||||
msg.info(" ".join(cmd))
|
||||
run_command(cmd)
|
||||
msg.good(f"Got asset {dest}")
|
||||
|
||||
|
@ -100,11 +119,33 @@ def project_get_assets(project_path: Path) -> None:
|
|||
def project_run_cli(
|
||||
# fmt: off
|
||||
project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False),
|
||||
subcommand: str = Arg(None, help="Name of command defined in project config")
|
||||
subcommand: str = Arg(None, help="Name of command defined in project config"),
|
||||
show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
|
||||
# fmt: on
|
||||
):
|
||||
"""Run scripts defined in the project."""
|
||||
project_run(project_dir, subcommand)
|
||||
if show_help:
|
||||
print_run_help(project_dir, subcommand)
|
||||
else:
|
||||
project_run(project_dir, subcommand)
|
||||
|
||||
|
||||
def print_run_help(project_dir: Path, subcommand: str) -> None:
|
||||
"""Simulate a CLI help prompt using the info available in the project config."""
|
||||
config = load_project_config(project_dir)
|
||||
config_commands = config.get("commands", [])
|
||||
commands = {cmd["name"]: cmd for cmd in config_commands}
|
||||
if subcommand:
|
||||
if subcommand not in commands:
|
||||
msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
|
||||
print(f"Usage: {COMMAND} project run {project_dir} {subcommand}")
|
||||
help_text = commands[subcommand].get("help")
|
||||
if help_text:
|
||||
msg.text(f"\n{help_text}\n")
|
||||
else:
|
||||
print(f"\nAvailable commands in {CONFIG_FILE}")
|
||||
print(f"Usage: {COMMAND} project run {project_dir} [COMMAND]")
|
||||
msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
|
||||
|
||||
|
||||
def project_run(project_dir: Path, subcommand: str) -> None:
|
||||
|
@ -112,20 +153,23 @@ def project_run(project_dir: Path, subcommand: str) -> None:
|
|||
config_commands = config.get("commands", [])
|
||||
variables = config.get("variables", {})
|
||||
commands = {cmd["name"]: cmd for cmd in config_commands}
|
||||
if subcommand is None:
|
||||
all_commands = config.get("run", [])
|
||||
if not all_commands:
|
||||
msg.warn("No run commands defined in project config", exits=0)
|
||||
msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
|
||||
for command in all_commands:
|
||||
if command not in commands:
|
||||
msg.fail(f"Can't find command '{command}' in project config", exits=1)
|
||||
msg.divider(command)
|
||||
run_commands(commands[command]["script"], variables)
|
||||
return
|
||||
if subcommand not in commands:
|
||||
if subcommand and subcommand not in commands:
|
||||
msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
|
||||
run_commands(commands[subcommand]["script"], variables)
|
||||
with working_dir(project_dir):
|
||||
if subcommand is None:
|
||||
all_commands = config.get("run", [])
|
||||
if not all_commands:
|
||||
msg.warn("No run commands defined in project config", exits=0)
|
||||
msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
|
||||
for command in all_commands:
|
||||
if command not in commands:
|
||||
msg.fail(
|
||||
f"Can't find command '{command}' in project config", exits=1
|
||||
)
|
||||
msg.divider(command)
|
||||
run_commands(commands[command]["script"], variables)
|
||||
else:
|
||||
run_commands(commands[subcommand]["script"], variables)
|
||||
|
||||
|
||||
app.add_typer(project_cli, name="project")
|
||||
|
@ -146,7 +190,7 @@ def run_commands(commands: List[str] = tuple(), variables: Dict[str, str] = {})
|
|||
for command in commands:
|
||||
# Substitute variables, e.g. "./{NAME}.json"
|
||||
command = command.format(**variables)
|
||||
msg.info(command)
|
||||
print(command)
|
||||
run_command(shlex.split(command))
|
||||
|
||||
|
||||
|
@ -160,3 +204,19 @@ def check_asset(url: str) -> None:
|
|||
"download the raw file, click on 'Download' on the GitHub page "
|
||||
"and copy the raw.githubusercontent.com URL instead."
|
||||
)
|
||||
# url.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/").replace("/tree/", "/")
|
||||
|
||||
|
||||
def check_clone_dest(dest: Path) -> None:
|
||||
"""Check and validate that the destination path can be used to clone."""
|
||||
if not dest:
|
||||
msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
|
||||
if dest.exists():
|
||||
# Directory already exists (not allowed, clone needs to create it)
|
||||
msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
|
||||
if not dest.parent.exists():
|
||||
# We're not creating parents, parent dir should exist
|
||||
msg.fail(
|
||||
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
|
||||
exits=1,
|
||||
)
|
||||
|
|
|
@ -229,11 +229,15 @@ class ProjectConfigCommand(BaseModel):
|
|||
name: StrictStr = Field(..., title="Name of command")
|
||||
help: Optional[StrictStr] = Field(None, title="Command description")
|
||||
script: List[StrictStr] = Field([], title="List of CLI commands to run, in order")
|
||||
dvc_deps: List[StrictStr] = Field([], title="Data Version Control dependencies")
|
||||
dvc_outputs: List[StrictStr] = Field([], title="Data Version Control outputs")
|
||||
dvc_outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)")
|
||||
deps: List[StrictStr] = Field([], title="Data Version Control dependencies")
|
||||
outputs: List[StrictStr] = Field([], title="Data Version Control outputs")
|
||||
outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)")
|
||||
# fmt: on
|
||||
|
||||
class Config:
|
||||
title = "A single named command specified in a project config"
|
||||
extra = "forbid"
|
||||
|
||||
|
||||
class ProjectConfigSchema(BaseModel):
|
||||
# fmt: off
|
||||
|
|
|
@ -8,8 +8,8 @@ from pathlib import Path
|
|||
from spacy import Errors
|
||||
from spacy.tokens import Doc, Span
|
||||
from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA
|
||||
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.util import make_tempdir
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
|
@ -19,13 +19,6 @@ def make_tempfile(mode="r"):
|
|||
f.close()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def make_tempdir():
|
||||
d = Path(tempfile.mkdtemp())
|
||||
yield d
|
||||
shutil.rmtree(str(d))
|
||||
|
||||
|
||||
def get_doc(
|
||||
vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None
|
||||
):
|
||||
|
|
|
@ -19,6 +19,8 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
|||
from packaging.version import Version, InvalidVersion
|
||||
import subprocess
|
||||
from contextlib import contextmanager
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
|
||||
try:
|
||||
|
@ -453,6 +455,13 @@ def working_dir(path: Union[str, Path]) -> None:
|
|||
os.chdir(prev_cwd)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def make_tempdir():
|
||||
d = Path(tempfile.mkdtemp())
|
||||
yield d
|
||||
shutil.rmtree(str(d))
|
||||
|
||||
|
||||
def is_in_jupyter():
|
||||
"""Check if user is running spaCy from a Jupyter notebook by detecting the
|
||||
IPython kernel. Mainly used for the displaCy visualizer.
|
||||
|
|
Loading…
Reference in New Issue
Block a user