mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Update project CLI
This commit is contained in:
		
							parent
							
								
									3f2f5f9cb3
								
							
						
					
					
						commit
						a6b76440b7
					
				| 
						 | 
					@ -7,15 +7,26 @@ import subprocess
 | 
				
			||||||
import shlex
 | 
					import shlex
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					import shutil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ._app import app, Arg, Opt
 | 
					from ._app import app, Arg, Opt, COMMAND
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
from ..schemas import ProjectConfigSchema, validate
 | 
					from ..schemas import ProjectConfigSchema, validate
 | 
				
			||||||
from ..util import ensure_path, run_command
 | 
					from ..util import ensure_path, run_command, make_tempdir, working_dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONFIG_FILE = "project.yml"
 | 
					CONFIG_FILE = "project.yml"
 | 
				
			||||||
DIRS = ["assets", "configs", "packages", "metrics", "scripts", "notebooks", "training"]
 | 
					DIRS = [
 | 
				
			||||||
 | 
					    "assets",
 | 
				
			||||||
 | 
					    "metas",
 | 
				
			||||||
 | 
					    "configs",
 | 
				
			||||||
 | 
					    "packages",
 | 
				
			||||||
 | 
					    "metrics",
 | 
				
			||||||
 | 
					    "scripts",
 | 
				
			||||||
 | 
					    "notebooks",
 | 
				
			||||||
 | 
					    "training",
 | 
				
			||||||
 | 
					    "corpus",
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
CACHES = [
 | 
					CACHES = [
 | 
				
			||||||
    Path.home() / ".torch",
 | 
					    Path.home() / ".torch",
 | 
				
			||||||
    Path.home() / ".caches" / "torch",
 | 
					    Path.home() / ".caches" / "torch",
 | 
				
			||||||
| 
						 | 
					@ -45,28 +56,37 @@ def callback():
 | 
				
			||||||
def project_clone_cli(
 | 
					def project_clone_cli(
 | 
				
			||||||
    # fmt: off
 | 
					    # fmt: off
 | 
				
			||||||
    name: str = Arg(..., help="The name of the template to fetch"),
 | 
					    name: str = Arg(..., help="The name of the template to fetch"),
 | 
				
			||||||
    dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=True, file_okay=False),
 | 
					    dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False),
 | 
				
			||||||
    repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
 | 
					    repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
 | 
				
			||||||
 | 
					    verbose: bool = Opt(False, "--verbose", "-V", help="Show detailed information")
 | 
				
			||||||
    # fmt: on
 | 
					    # fmt: on
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """Clone a project template from a repository."""
 | 
					    """Clone a project template from a repository."""
 | 
				
			||||||
    project_clone(name, dest, repo=repo)
 | 
					    project_clone(name, dest, repo=repo, verbose=verbose)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def project_clone(name: str, dest: Path, repo: str = about.__projects__) -> None:
 | 
					def project_clone(
 | 
				
			||||||
 | 
					    name: str, dest: Path, *, repo: str = about.__projects__, verbose: bool = False
 | 
				
			||||||
 | 
					) -> None:
 | 
				
			||||||
    dest = ensure_path(dest)
 | 
					    dest = ensure_path(dest)
 | 
				
			||||||
    if not dest or not dest.exists() or not dest.is_dir():
 | 
					    check_clone_dest(dest)
 | 
				
			||||||
        msg.fail("Not a valid directory to clone project", dest, exits=1)
 | 
					    # When cloning a subdirectory with DVC, it will create a folder of that name
 | 
				
			||||||
    cmd = ["dvc", "get", repo, name, "-o", str(dest)]
 | 
					    # within the destination dir, so we use a tempdir and then copy it into the
 | 
				
			||||||
    msg.info(" ".join(cmd))
 | 
					    # parent directory to create the cloned directory
 | 
				
			||||||
 | 
					    with make_tempdir() as tmp_dir:
 | 
				
			||||||
 | 
					        cmd = ["dvc", "get", repo, name, "-o", str(tmp_dir)]
 | 
				
			||||||
 | 
					        if verbose:
 | 
				
			||||||
 | 
					            cmd.append("-v")
 | 
				
			||||||
 | 
					        print(" ".join(cmd))
 | 
				
			||||||
        run_command(cmd)
 | 
					        run_command(cmd)
 | 
				
			||||||
 | 
					        shutil.move(str(tmp_dir / Path(name).name), str(dest))
 | 
				
			||||||
    msg.good(f"Cloned project '{name}' from {repo}")
 | 
					    msg.good(f"Cloned project '{name}' from {repo}")
 | 
				
			||||||
    with msg.loading("Setting up directories..."):
 | 
					 | 
				
			||||||
    for sub_dir in DIRS:
 | 
					    for sub_dir in DIRS:
 | 
				
			||||||
        dir_path = dest / sub_dir
 | 
					        dir_path = dest / sub_dir
 | 
				
			||||||
        if not dir_path.exists():
 | 
					        if not dir_path.exists():
 | 
				
			||||||
            dir_path.mkdir(parents=True)
 | 
					            dir_path.mkdir(parents=True)
 | 
				
			||||||
    msg.good(f"Your project is now ready!", dest.resolve())
 | 
					    msg.good(f"Your project is now ready!", dest.resolve())
 | 
				
			||||||
 | 
					    print(f"To get the assets, run:\npython -m spacy project get-assets {dest}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@project_cli.command("get-assets")
 | 
					@project_cli.command("get-assets")
 | 
				
			||||||
| 
						 | 
					@ -91,7 +111,6 @@ def project_get_assets(project_path: Path) -> None:
 | 
				
			||||||
        dest_path = project_path / dest
 | 
					        dest_path = project_path / dest
 | 
				
			||||||
        check_asset(url)
 | 
					        check_asset(url)
 | 
				
			||||||
        cmd = ["dvc", "get-url", url, str(dest_path)]
 | 
					        cmd = ["dvc", "get-url", url, str(dest_path)]
 | 
				
			||||||
        msg.info(" ".join(cmd))
 | 
					 | 
				
			||||||
        run_command(cmd)
 | 
					        run_command(cmd)
 | 
				
			||||||
        msg.good(f"Got asset {dest}")
 | 
					        msg.good(f"Got asset {dest}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -100,18 +119,43 @@ def project_get_assets(project_path: Path) -> None:
 | 
				
			||||||
def project_run_cli(
 | 
					def project_run_cli(
 | 
				
			||||||
    # fmt: off
 | 
					    # fmt: off
 | 
				
			||||||
    project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False),
 | 
					    project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False),
 | 
				
			||||||
    subcommand: str = Arg(None, help="Name of command defined in project config")
 | 
					    subcommand: str = Arg(None, help="Name of command defined in project config"),
 | 
				
			||||||
 | 
					    show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
 | 
				
			||||||
    # fmt: on
 | 
					    # fmt: on
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """Run scripts defined in the project."""
 | 
					    """Run scripts defined in the project."""
 | 
				
			||||||
 | 
					    if show_help:
 | 
				
			||||||
 | 
					        print_run_help(project_dir, subcommand)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
        project_run(project_dir, subcommand)
 | 
					        project_run(project_dir, subcommand)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def print_run_help(project_dir: Path, subcommand: str) -> None:
 | 
				
			||||||
 | 
					    """Simulate a CLI help prompt using the info available in the project config."""
 | 
				
			||||||
 | 
					    config = load_project_config(project_dir)
 | 
				
			||||||
 | 
					    config_commands = config.get("commands", [])
 | 
				
			||||||
 | 
					    commands = {cmd["name"]: cmd for cmd in config_commands}
 | 
				
			||||||
 | 
					    if subcommand:
 | 
				
			||||||
 | 
					        if subcommand not in commands:
 | 
				
			||||||
 | 
					            msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
 | 
				
			||||||
 | 
					        print(f"Usage: {COMMAND} project run {project_dir} {subcommand}")
 | 
				
			||||||
 | 
					        help_text = commands[subcommand].get("help")
 | 
				
			||||||
 | 
					        if help_text:
 | 
				
			||||||
 | 
					            msg.text(f"\n{help_text}\n")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        print(f"\nAvailable commands in {CONFIG_FILE}")
 | 
				
			||||||
 | 
					        print(f"Usage: {COMMAND} project run {project_dir} [COMMAND]")
 | 
				
			||||||
 | 
					        msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def project_run(project_dir: Path, subcommand: str) -> None:
 | 
					def project_run(project_dir: Path, subcommand: str) -> None:
 | 
				
			||||||
    config = load_project_config(project_dir)
 | 
					    config = load_project_config(project_dir)
 | 
				
			||||||
    config_commands = config.get("commands", [])
 | 
					    config_commands = config.get("commands", [])
 | 
				
			||||||
    variables = config.get("variables", {})
 | 
					    variables = config.get("variables", {})
 | 
				
			||||||
    commands = {cmd["name"]: cmd for cmd in config_commands}
 | 
					    commands = {cmd["name"]: cmd for cmd in config_commands}
 | 
				
			||||||
 | 
					    if subcommand and subcommand not in commands:
 | 
				
			||||||
 | 
					        msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
 | 
				
			||||||
 | 
					    with working_dir(project_dir):
 | 
				
			||||||
        if subcommand is None:
 | 
					        if subcommand is None:
 | 
				
			||||||
            all_commands = config.get("run", [])
 | 
					            all_commands = config.get("run", [])
 | 
				
			||||||
            if not all_commands:
 | 
					            if not all_commands:
 | 
				
			||||||
| 
						 | 
					@ -119,12 +163,12 @@ def project_run(project_dir: Path, subcommand: str) -> None:
 | 
				
			||||||
            msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
 | 
					            msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
 | 
				
			||||||
            for command in all_commands:
 | 
					            for command in all_commands:
 | 
				
			||||||
                if command not in commands:
 | 
					                if command not in commands:
 | 
				
			||||||
                msg.fail(f"Can't find command '{command}' in project config", exits=1)
 | 
					                    msg.fail(
 | 
				
			||||||
 | 
					                        f"Can't find command '{command}' in project config", exits=1
 | 
				
			||||||
 | 
					                    )
 | 
				
			||||||
                msg.divider(command)
 | 
					                msg.divider(command)
 | 
				
			||||||
                run_commands(commands[command]["script"], variables)
 | 
					                run_commands(commands[command]["script"], variables)
 | 
				
			||||||
        return
 | 
					        else:
 | 
				
			||||||
    if subcommand not in commands:
 | 
					 | 
				
			||||||
        msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
 | 
					 | 
				
			||||||
            run_commands(commands[subcommand]["script"], variables)
 | 
					            run_commands(commands[subcommand]["script"], variables)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -146,7 +190,7 @@ def run_commands(commands: List[str] = tuple(), variables: Dict[str, str] = {})
 | 
				
			||||||
    for command in commands:
 | 
					    for command in commands:
 | 
				
			||||||
        # Substitute variables, e.g. "./{NAME}.json"
 | 
					        # Substitute variables, e.g. "./{NAME}.json"
 | 
				
			||||||
        command = command.format(**variables)
 | 
					        command = command.format(**variables)
 | 
				
			||||||
        msg.info(command)
 | 
					        print(command)
 | 
				
			||||||
        run_command(shlex.split(command))
 | 
					        run_command(shlex.split(command))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -160,3 +204,19 @@ def check_asset(url: str) -> None:
 | 
				
			||||||
            "download the raw file, click on 'Download' on the GitHub page "
 | 
					            "download the raw file, click on 'Download' on the GitHub page "
 | 
				
			||||||
            "and copy the raw.githubusercontent.com URL instead."
 | 
					            "and copy the raw.githubusercontent.com URL instead."
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					        # url.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/").replace("/tree/", "/")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_clone_dest(dest: Path) -> None:
 | 
				
			||||||
 | 
					    """Check and validate that the destination path can be used to clone."""
 | 
				
			||||||
 | 
					    if not dest:
 | 
				
			||||||
 | 
					        msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
 | 
				
			||||||
 | 
					    if dest.exists():
 | 
				
			||||||
 | 
					        # Directory already exists (not allowed, clone needs to create it)
 | 
				
			||||||
 | 
					        msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
 | 
				
			||||||
 | 
					    if not dest.parent.exists():
 | 
				
			||||||
 | 
					        # We're not creating parents, parent dir should exist
 | 
				
			||||||
 | 
					        msg.fail(
 | 
				
			||||||
 | 
					            f"Can't clone project, parent directory doesn't exist: {dest.parent}",
 | 
				
			||||||
 | 
					            exits=1,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -229,11 +229,15 @@ class ProjectConfigCommand(BaseModel):
 | 
				
			||||||
    name: StrictStr = Field(..., title="Name of command")
 | 
					    name: StrictStr = Field(..., title="Name of command")
 | 
				
			||||||
    help: Optional[StrictStr] = Field(None, title="Command description")
 | 
					    help: Optional[StrictStr] = Field(None, title="Command description")
 | 
				
			||||||
    script: List[StrictStr] = Field([], title="List of CLI commands to run, in order")
 | 
					    script: List[StrictStr] = Field([], title="List of CLI commands to run, in order")
 | 
				
			||||||
    dvc_deps: List[StrictStr] = Field([], title="Data Version Control dependencies")
 | 
					    deps: List[StrictStr] = Field([], title="Data Version Control dependencies")
 | 
				
			||||||
    dvc_outputs: List[StrictStr] = Field([], title="Data Version Control outputs")
 | 
					    outputs: List[StrictStr] = Field([], title="Data Version Control outputs")
 | 
				
			||||||
    dvc_outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)")
 | 
					    outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)")
 | 
				
			||||||
    # fmt: on
 | 
					    # fmt: on
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    class Config:
 | 
				
			||||||
 | 
					        title = "A single named command specified in a project config"
 | 
				
			||||||
 | 
					        extra = "forbid"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ProjectConfigSchema(BaseModel):
 | 
					class ProjectConfigSchema(BaseModel):
 | 
				
			||||||
    # fmt: off
 | 
					    # fmt: off
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,8 +8,8 @@ from pathlib import Path
 | 
				
			||||||
from spacy import Errors
 | 
					from spacy import Errors
 | 
				
			||||||
from spacy.tokens import Doc, Span
 | 
					from spacy.tokens import Doc, Span
 | 
				
			||||||
from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA
 | 
					from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA
 | 
				
			||||||
 | 
					 | 
				
			||||||
from spacy.vocab import Vocab
 | 
					from spacy.vocab import Vocab
 | 
				
			||||||
 | 
					from spacy.util import make_tempdir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@contextlib.contextmanager
 | 
					@contextlib.contextmanager
 | 
				
			||||||
| 
						 | 
					@ -19,13 +19,6 @@ def make_tempfile(mode="r"):
 | 
				
			||||||
    f.close()
 | 
					    f.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@contextlib.contextmanager
 | 
					 | 
				
			||||||
def make_tempdir():
 | 
					 | 
				
			||||||
    d = Path(tempfile.mkdtemp())
 | 
					 | 
				
			||||||
    yield d
 | 
					 | 
				
			||||||
    shutil.rmtree(str(d))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_doc(
 | 
					def get_doc(
 | 
				
			||||||
    vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None
 | 
					    vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -19,6 +19,8 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier
 | 
				
			||||||
from packaging.version import Version, InvalidVersion
 | 
					from packaging.version import Version, InvalidVersion
 | 
				
			||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
from contextlib import contextmanager
 | 
					from contextlib import contextmanager
 | 
				
			||||||
 | 
					import tempfile
 | 
				
			||||||
 | 
					import shutil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
| 
						 | 
					@ -453,6 +455,13 @@ def working_dir(path: Union[str, Path]) -> None:
 | 
				
			||||||
        os.chdir(prev_cwd)
 | 
					        os.chdir(prev_cwd)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@contextmanager
 | 
				
			||||||
 | 
					def make_tempdir():
 | 
				
			||||||
 | 
					    d = Path(tempfile.mkdtemp())
 | 
				
			||||||
 | 
					    yield d
 | 
				
			||||||
 | 
					    shutil.rmtree(str(d))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def is_in_jupyter():
 | 
					def is_in_jupyter():
 | 
				
			||||||
    """Check if user is running spaCy from a Jupyter notebook by detecting the
 | 
					    """Check if user is running spaCy from a Jupyter notebook by detecting the
 | 
				
			||||||
    IPython kernel. Mainly used for the displaCy visualizer.
 | 
					    IPython kernel. Mainly used for the displaCy visualizer.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user