mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Update CLI args and docstrings
This commit is contained in:
parent
ac4297ee39
commit
05e182e421
|
@ -1,3 +1,4 @@
|
|||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
import subprocess
|
||||
|
@ -24,22 +25,18 @@ DIRS = [
|
|||
@project_cli.command("clone")
|
||||
def project_clone_cli(
|
||||
# fmt: off
|
||||
name: str = Arg(..., help="The name of the template to fetch"),
|
||||
dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False),
|
||||
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
|
||||
name: str = Arg(..., help="The name of the template to clone"),
|
||||
dest: Optional[Path] = Arg(None, help="Where to clone the project. Defaults to current working directory", exists=False),
|
||||
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to clone from"),
|
||||
# fmt: on
|
||||
):
|
||||
"""Clone a project template from a repository. Calls into "git" and will
|
||||
only download the files from the given subdirectory. The GitHub repo
|
||||
defaults to the official spaCy template repo, but can be customized
|
||||
(including using a private repo). Setting the --git flag will also
|
||||
initialize the project directory as a Git repo. If the project is intended
|
||||
to be a Git repo, it should be initialized with Git first, before
|
||||
initializing DVC (Data Version Control). This allows DVC to integrate with
|
||||
Git.
|
||||
(including using a private repo).
|
||||
"""
|
||||
if dest == Path.cwd():
|
||||
dest = dest / name
|
||||
if dest is None:
|
||||
dest = Path.cwd() / name
|
||||
project_clone(name, dest, repo=repo)
|
||||
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ def project_update_dvc_cli(
|
|||
"""Auto-generate Data Version Control (DVC) config. A DVC
|
||||
project can only define one pipeline, so you need to specify one workflow
|
||||
defined in the project.yml. If no workflow is specified, the first defined
|
||||
workflow is used. The DVC config will only be updated if
|
||||
workflow is used. The DVC config will only be updated if the project.yml changed.
|
||||
"""
|
||||
project_update_dvc(project_dir, workflow, verbose=verbose, force=force)
|
||||
|
||||
|
|
|
@ -20,14 +20,14 @@ def project_run_cli(
|
|||
subcommand: str = Arg(None, help=f"Name of command defined in the {PROJECT_FILE}"),
|
||||
project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False),
|
||||
force: bool = Opt(False, "--force", "-F", help="Force re-running steps, even if nothing changed"),
|
||||
dry: bool = Opt(False, "--dry", "-D", help="Perform a dry run and don't execute commands"),
|
||||
dry: bool = Opt(False, "--dry", "-D", help="Perform a dry run and don't execute scripts"),
|
||||
show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
|
||||
# fmt: on
|
||||
):
|
||||
"""Run a named script or workflow defined in the project.yml. If a workflow
|
||||
"""Run a named command or workflow defined in the project.yml. If a workflow
|
||||
name is specified, all commands in the workflow are run, in order. If
|
||||
commands define inputs and/or outputs, they will only be re-run if state
|
||||
has changed.
|
||||
commands define dependencies and/or outputs, they will only be re-run if
|
||||
state has changed.
|
||||
"""
|
||||
if show_help or not subcommand:
|
||||
print_run_help(project_dir, subcommand)
|
||||
|
|
|
@ -121,14 +121,14 @@ class ConfigSchema(BaseModel):
|
|||
@app.command("train")
|
||||
def train_cli(
|
||||
# fmt: off
|
||||
train_path: Path = Arg(..., help="Location of JSON-formatted training data", exists=True),
|
||||
dev_path: Path = Arg(..., help="Location of JSON-formatted development data", exists=True),
|
||||
train_path: Path = Arg(..., help="Location of training data", exists=True),
|
||||
dev_path: Path = Arg(..., help="Location of development data", exists=True),
|
||||
config_path: Path = Arg(..., help="Path to config file", exists=True),
|
||||
output_path: Optional[Path] = Opt(None, "--output", "--output-path", "-o", help="Output directory to store model in"),
|
||||
code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
|
||||
init_tok2vec: Optional[Path] = Opt(None, "--init-tok2vec", "-t2v", help="Path to pretrained weights for the tok2vec components. See 'spacy pretrain'. Experimental."),
|
||||
raw_text: Optional[Path] = Opt(None, "--raw-text", "-rt", help="Path to jsonl file with unlabelled text documents."),
|
||||
verbose: bool = Opt(False, "--verbose", "-VV", help="Display more information for debugging purposes"),
|
||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||
use_gpu: int = Opt(-1, "--use-gpu", "-g", help="Use GPU"),
|
||||
tag_map_path: Optional[Path] = Opt(None, "--tag-map-path", "-tm", help="Location of JSON-formatted tag map"),
|
||||
omit_extra_lookups: bool = Opt(False, "--omit-extra-lookups", "-OEL", help="Don't include extra lookups in model"),
|
||||
|
@ -203,8 +203,10 @@ def train(
|
|||
msg.info(f"Initializing the nlp pipeline: {nlp.pipe_names}")
|
||||
train_examples = list(
|
||||
corpus.train_dataset(
|
||||
nlp, shuffle=False, gold_preproc=training["gold_preproc"],
|
||||
max_length=training["max_length"]
|
||||
nlp,
|
||||
shuffle=False,
|
||||
gold_preproc=training["gold_preproc"],
|
||||
max_length=training["max_length"],
|
||||
)
|
||||
)
|
||||
nlp.begin_training(lambda: train_examples)
|
||||
|
@ -322,10 +324,7 @@ def create_train_batches(nlp, corpus, cfg):
|
|||
discard_oversize=cfg["discard_oversize"],
|
||||
)
|
||||
else:
|
||||
batches = util.minibatch(
|
||||
train_examples,
|
||||
size=cfg["batch_size"],
|
||||
)
|
||||
batches = util.minibatch(train_examples, size=cfg["batch_size"])
|
||||
|
||||
# make sure the minibatch_by_words result is not empty, or we'll have an infinite training loop
|
||||
try:
|
||||
|
@ -438,7 +437,9 @@ def train_while_improving(
|
|||
|
||||
if raw_text:
|
||||
random.shuffle(raw_text)
|
||||
raw_examples = [Example.from_dict(nlp.make_doc(rt["text"]), {}) for rt in raw_text]
|
||||
raw_examples = [
|
||||
Example.from_dict(nlp.make_doc(rt["text"]), {}) for rt in raw_text
|
||||
]
|
||||
raw_batches = util.minibatch(raw_examples, size=8)
|
||||
|
||||
for step, (epoch, batch) in enumerate(train_data):
|
||||
|
|
Loading…
Reference in New Issue
Block a user