mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Update CLI commans to use one shared util file
This commit is contained in:
parent
240e0a62ca
commit
73332ddb67
|
@ -1,6 +1,6 @@
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
|
|
||||||
from ._app import app, setup_cli # noqa: F401
|
from ._util import app, setup_cli # noqa: F401
|
||||||
|
|
||||||
# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
|
# These are the actual functions, NOT the wrapped CLI commands. The CLI commands
|
||||||
# are registered automatically and won't have to be imported here.
|
# are registered automatically and won't have to be imported here.
|
||||||
|
|
|
@ -1,31 +0,0 @@
|
||||||
import typer
|
|
||||||
from typer.main import get_command
|
|
||||||
|
|
||||||
|
|
||||||
COMMAND = "python -m spacy"
|
|
||||||
NAME = "spacy"
|
|
||||||
HELP = """spaCy Command-line Interface
|
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/cli
|
|
||||||
"""
|
|
||||||
PROJECT_HELP = f"""Command-line interface for spaCy projects and working with
|
|
||||||
project templates. You'd typically start by cloning a project template to a local
|
|
||||||
directory and fetching its assets like datasets etc. See the project's
|
|
||||||
project.yml for the available commands.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
app = typer.Typer(name=NAME, help=HELP)
|
|
||||||
project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
|
|
||||||
app.add_typer(project_cli)
|
|
||||||
|
|
||||||
# Wrappers for Typer's annotations. Initially created to set defaults and to
|
|
||||||
# keep the names short, but not needed at the moment.
|
|
||||||
Arg = typer.Argument
|
|
||||||
Opt = typer.Option
|
|
||||||
|
|
||||||
|
|
||||||
def setup_cli() -> None:
|
|
||||||
# Ensure that the help messages always display the correct prompt
|
|
||||||
command = get_command(app)
|
|
||||||
command(prog_name=COMMAND)
|
|
|
@ -1,14 +1,76 @@
|
||||||
from typing import Dict, Any, Union
|
from typing import Dict, Any, Union, List
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
import srsly
|
import srsly
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import typer
|
||||||
|
from typer.main import get_command
|
||||||
|
|
||||||
from ...schemas import ProjectConfigSchema, validate
|
from ..schemas import ProjectConfigSchema, validate
|
||||||
|
|
||||||
|
|
||||||
PROJECT_FILE = "project.yml"
|
PROJECT_FILE = "project.yml"
|
||||||
PROJECT_LOCK = "project.lock"
|
PROJECT_LOCK = "project.lock"
|
||||||
|
COMMAND = "python -m spacy"
|
||||||
|
NAME = "spacy"
|
||||||
|
HELP = """spaCy Command-line Interface
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/cli
|
||||||
|
"""
|
||||||
|
PROJECT_HELP = f"""Command-line interface for spaCy projects and working with
|
||||||
|
project templates. You'd typically start by cloning a project template to a local
|
||||||
|
directory and fetching its assets like datasets etc. See the project's
|
||||||
|
{PROJECT_FILE} for the available commands.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Wrappers for Typer's annotations. Initially created to set defaults and to
|
||||||
|
# keep the names short, but not needed at the moment.
|
||||||
|
Arg = typer.Argument
|
||||||
|
Opt = typer.Option
|
||||||
|
|
||||||
|
app = typer.Typer(name=NAME, help=HELP)
|
||||||
|
project_cli = typer.Typer(name="project", help=PROJECT_HELP, no_args_is_help=True)
|
||||||
|
app.add_typer(project_cli)
|
||||||
|
|
||||||
|
|
||||||
|
def setup_cli() -> None:
|
||||||
|
# Ensure that the help messages always display the correct prompt
|
||||||
|
command = get_command(app)
|
||||||
|
command(prog_name=COMMAND)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_config_overrides(args: List[str]) -> Dict[str, Any]:
|
||||||
|
"""Generate a dictionary of config overrides based on the extra arguments
|
||||||
|
provided on the CLI, e.g. --training.batch_size to override
|
||||||
|
"training.batch_size". Arguments without a "." are considered invalid,
|
||||||
|
since the config only allows top-level sections to exist.
|
||||||
|
|
||||||
|
args (List[str]): The extra arguments from the command line.
|
||||||
|
RETURNS (Dict[str, Any]): The parsed dict, keyed by nested config setting.
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
while args:
|
||||||
|
opt = args.pop(0)
|
||||||
|
err = f"Invalid config override '{opt}'"
|
||||||
|
if opt.startswith("--"): # new argument
|
||||||
|
opt = opt.replace("--", "")
|
||||||
|
if "." not in opt:
|
||||||
|
msg.fail(f"{err}: can't override top-level section", exits=1)
|
||||||
|
if not args or args[0].startswith("--"): # flag with no value
|
||||||
|
value = True
|
||||||
|
else:
|
||||||
|
value = args.pop(0)
|
||||||
|
# Just like we do in the config, we're calling json.loads on the
|
||||||
|
# values. But since they come from the CLI, it'd b unintuitive to
|
||||||
|
# explicitly mark strings with escaped quotes. So we're working
|
||||||
|
# around that here by falling back to a string if parsing fails.
|
||||||
|
try:
|
||||||
|
result[opt] = srsly.json_loads(value)
|
||||||
|
except ValueError:
|
||||||
|
result[opt] = str(value)
|
||||||
|
else:
|
||||||
|
msg.fail(f"{err}: options need to start with --", exits=1)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def load_project_config(path: Path) -> Dict[str, Any]:
|
def load_project_config(path: Path) -> Dict[str, Any]:
|
|
@ -6,7 +6,7 @@ import srsly
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..gold import docs_to_json
|
from ..gold import docs_to_json
|
||||||
from ..tokens import DocBin
|
from ..tokens import DocBin
|
||||||
from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
|
from ..gold.converters import iob2docs, conll_ner2docs, json2docs, conllu2docs
|
||||||
|
|
|
@ -5,7 +5,7 @@ import sys
|
||||||
import srsly
|
import srsly
|
||||||
from wasabi import Printer, MESSAGES
|
from wasabi import Printer, MESSAGES
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..gold import Corpus, Example
|
from ..gold import Corpus, Example
|
||||||
from ..syntax import nonproj
|
from ..syntax import nonproj
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
|
|
|
@ -4,7 +4,7 @@ import sys
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..util import is_package, get_base_version, run_command
|
from ..util import is_package, get_base_version, run_command
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ from thinc.api import require_gpu, fix_random_seed
|
||||||
|
|
||||||
from ..gold import Corpus
|
from ..gold import Corpus
|
||||||
from ..tokens import Doc
|
from ..tokens import Doc
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..scorer import Scorer
|
from ..scorer import Scorer
|
||||||
from .. import util
|
from .. import util
|
||||||
from .. import displacy
|
from .. import displacy
|
||||||
|
|
|
@ -4,7 +4,7 @@ from pathlib import Path
|
||||||
from wasabi import Printer
|
from wasabi import Printer
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from .. import util
|
from .. import util
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ import srsly
|
||||||
import warnings
|
import warnings
|
||||||
from wasabi import Printer
|
from wasabi import Printer
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..vectors import Vectors
|
from ..vectors import Vectors
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
|
|
|
@ -5,7 +5,7 @@ from wasabi import Printer, get_raw_input
|
||||||
import srsly
|
import srsly
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..schemas import validate, ModelMetaSchema
|
from ..schemas import validate, ModelMetaSchema
|
||||||
from .. import util
|
from .. import util
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
|
@ -12,7 +12,7 @@ from wasabi import msg
|
||||||
import srsly
|
import srsly
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
from ..ml.models.multi_task import build_cloze_multi_task_model
|
from ..ml.models.multi_task import build_cloze_multi_task_model
|
||||||
from ..ml.models.multi_task import build_cloze_characters_multi_task_model
|
from ..ml.models.multi_task import build_cloze_characters_multi_task_model
|
||||||
|
|
|
@ -8,7 +8,7 @@ import sys
|
||||||
import itertools
|
import itertools
|
||||||
from wasabi import msg, Printer
|
from wasabi import msg, Printer
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..util import load_model
|
from ..util import load_model
|
||||||
|
|
||||||
|
|
|
@ -7,8 +7,7 @@ import re
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from ...util import ensure_path, working_dir
|
from ...util import ensure_path, working_dir
|
||||||
from .._app import project_cli, Arg
|
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||||
from .util import PROJECT_FILE, load_project_config, get_checksum
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: find a solution for caches
|
# TODO: find a solution for caches
|
||||||
|
|
|
@ -7,8 +7,7 @@ import re
|
||||||
|
|
||||||
from ... import about
|
from ... import about
|
||||||
from ...util import ensure_path, run_command, make_tempdir
|
from ...util import ensure_path, run_command, make_tempdir
|
||||||
from .._app import project_cli, Arg, Opt, COMMAND
|
from .._util import project_cli, Arg, Opt, COMMAND, PROJECT_FILE
|
||||||
from .util import PROJECT_FILE
|
|
||||||
|
|
||||||
|
|
||||||
@project_cli.command("clone")
|
@project_cli.command("clone")
|
||||||
|
|
|
@ -5,8 +5,8 @@ import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
|
|
||||||
from .util import PROJECT_FILE, load_project_config, get_hash
|
from .._util import PROJECT_FILE, load_project_config, get_hash, project_cli
|
||||||
from .._app import project_cli, Arg, Opt, NAME, COMMAND
|
from .._util import Arg, Opt, NAME, COMMAND
|
||||||
from ...util import working_dir, split_command, join_command, run_command
|
from ...util import working_dir, split_command, join_command, run_command
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,8 @@ import sys
|
||||||
import srsly
|
import srsly
|
||||||
|
|
||||||
from ...util import working_dir, run_command, split_command, is_cwd, join_command
|
from ...util import working_dir, run_command, split_command, is_cwd, join_command
|
||||||
from .._app import project_cli, Arg, Opt, COMMAND
|
from .._util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
|
||||||
from .util import PROJECT_FILE, PROJECT_LOCK, load_project_config, get_hash
|
from .._util import get_checksum, project_cli, Arg, Opt, COMMAND
|
||||||
from .util import get_checksum
|
|
||||||
|
|
||||||
|
|
||||||
@project_cli.command("run")
|
@project_cli.command("run")
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import Optional, Dict
|
from typing import Optional, Dict, Any
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
import srsly
|
import srsly
|
||||||
import tqdm
|
import tqdm
|
||||||
|
@ -8,8 +8,9 @@ import thinc
|
||||||
import thinc.schedules
|
import thinc.schedules
|
||||||
from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
|
from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
|
||||||
import random
|
import random
|
||||||
|
import typer
|
||||||
|
|
||||||
from ._app import app, Arg, Opt
|
from ._util import app, Arg, Opt, parse_config_overrides
|
||||||
from ..gold import Corpus, Example
|
from ..gold import Corpus, Example
|
||||||
from ..lookups import Lookups
|
from ..lookups import Lookups
|
||||||
from .. import util
|
from .. import util
|
||||||
|
@ -24,9 +25,12 @@ from ..ml import models # noqa: F401
|
||||||
registry = util.registry
|
registry = util.registry
|
||||||
|
|
||||||
|
|
||||||
@app.command("train")
|
@app.command(
|
||||||
|
"train", context_settings={"allow_extra_args": True, "ignore_unknown_options": True}
|
||||||
|
)
|
||||||
def train_cli(
|
def train_cli(
|
||||||
# fmt: off
|
# fmt: off
|
||||||
|
ctx: typer.Context, # This is only used to read additional arguments
|
||||||
train_path: Path = Arg(..., help="Location of training data", exists=True),
|
train_path: Path = Arg(..., help="Location of training data", exists=True),
|
||||||
dev_path: Path = Arg(..., help="Location of development data", exists=True),
|
dev_path: Path = Arg(..., help="Location of development data", exists=True),
|
||||||
config_path: Path = Arg(..., help="Path to config file", exists=True),
|
config_path: Path = Arg(..., help="Path to config file", exists=True),
|
||||||
|
@ -36,17 +40,36 @@ def train_cli(
|
||||||
# fmt: on
|
# fmt: on
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Train or update a spaCy model. Requires data to be formatted in spaCy's
|
Train or update a spaCy model. Requires data in spaCy's binary format. To
|
||||||
JSON format. To convert data from other formats, use the `spacy convert`
|
convert data from other formats, use the `spacy convert` command. The
|
||||||
command.
|
config file includes all settings and hyperparameters used during traing.
|
||||||
|
To override settings in the config, e.g. settings that point to local
|
||||||
|
paths or that you want to experiment with, you can override them as
|
||||||
|
command line options. For instance, --training.batch_size 128 overrides
|
||||||
|
the value of "batch_size" in the block "[training]". The --code argument
|
||||||
|
lets you pass in a Python file that's imported before training. It can be
|
||||||
|
used to register custom functions and architectures that can then be
|
||||||
|
referenced in the config.
|
||||||
"""
|
"""
|
||||||
util.set_env_log(verbose)
|
util.set_env_log(verbose)
|
||||||
verify_cli_args(**locals())
|
verify_cli_args(
|
||||||
|
train_path=train_path,
|
||||||
|
dev_path=dev_path,
|
||||||
|
config_path=config_path,
|
||||||
|
code_path=code_path,
|
||||||
|
)
|
||||||
|
overrides = parse_config_overrides(ctx.args)
|
||||||
|
if code_path is not None:
|
||||||
try:
|
try:
|
||||||
util.import_file("python_code", code_path)
|
util.import_file("python_code", code_path)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
msg.fail(f"Couldn't load Python code: {code_path}", e, exits=1)
|
msg.fail(f"Couldn't load Python code: {code_path}", e, exits=1)
|
||||||
train(config_path, {"train": train_path, "dev": dev_path}, output_path=output_path)
|
train(
|
||||||
|
config_path,
|
||||||
|
{"train": train_path, "dev": dev_path},
|
||||||
|
output_path=output_path,
|
||||||
|
config_overrides=overrides,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def train(
|
def train(
|
||||||
|
@ -54,7 +77,7 @@ def train(
|
||||||
data_paths: Dict[str, Path],
|
data_paths: Dict[str, Path],
|
||||||
raw_text: Optional[Path] = None,
|
raw_text: Optional[Path] = None,
|
||||||
output_path: Optional[Path] = None,
|
output_path: Optional[Path] = None,
|
||||||
weights_data: Optional[bytes] = None,
|
config_overrides: Dict[str, Any] = {},
|
||||||
) -> None:
|
) -> None:
|
||||||
msg.info(f"Loading config from: {config_path}")
|
msg.info(f"Loading config from: {config_path}")
|
||||||
# Read the config first without creating objects, to get to the original nlp_config
|
# Read the config first without creating objects, to get to the original nlp_config
|
||||||
|
@ -469,7 +492,6 @@ def verify_cli_args(
|
||||||
config_path: Path,
|
config_path: Path,
|
||||||
output_path: Optional[Path] = None,
|
output_path: Optional[Path] = None,
|
||||||
code_path: Optional[Path] = None,
|
code_path: Optional[Path] = None,
|
||||||
verbose: bool = False,
|
|
||||||
):
|
):
|
||||||
# Make sure all files and paths exists if they are needed
|
# Make sure all files and paths exists if they are needed
|
||||||
if not config_path or not config_path.exists():
|
if not config_path or not config_path.exists():
|
||||||
|
|
|
@ -4,7 +4,7 @@ import sys
|
||||||
import requests
|
import requests
|
||||||
from wasabi import msg, Printer
|
from wasabi import msg, Printer
|
||||||
|
|
||||||
from ._app import app
|
from ._util import app
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..util import get_package_version, get_installed_models, get_base_version
|
from ..util import get_package_version, get_installed_models, get_base_version
|
||||||
from ..util import get_package_path, get_model_meta, is_compatible_version
|
from ..util import get_package_path, get_model_meta, is_compatible_version
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.cli.project.util import validate_project_commands
|
from spacy.cli._util import validate_project_commands
|
||||||
from spacy.schemas import ProjectConfigSchema, validate
|
from spacy.schemas import ProjectConfigSchema, validate
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user