Add flag to skip saving model-last

This commit is contained in:
Adriane Boyd 2023-04-03 13:39:34 +02:00
parent a562767336
commit 6d38cc10ff
4 changed files with 24 additions and 15 deletions

View File

@ -23,6 +23,7 @@ def pretrain_cli(
resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"), resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."), epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"), use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
skip_last: bool = Opt(False, "--skip-last", "-L", help="Skip saving model-last.bin"),
# fmt: on # fmt: on
): ):
""" """
@ -74,6 +75,7 @@ def pretrain_cli(
epoch_resume=epoch_resume, epoch_resume=epoch_resume,
use_gpu=use_gpu, use_gpu=use_gpu,
silent=False, silent=False,
skip_last=skip_last,
) )
msg.good("Successfully finished pretrain") msg.good("Successfully finished pretrain")

View File

@ -165,7 +165,8 @@ def test_pretraining_default():
@pytest.mark.parametrize("objective", CHAR_OBJECTIVES) @pytest.mark.parametrize("objective", CHAR_OBJECTIVES)
def test_pretraining_tok2vec_characters(objective): @pytest.mark.parametrize("skip_last", (True, False))
def test_pretraining_tok2vec_characters(objective, skip_last):
"""Test that pretraining works with the character objective""" """Test that pretraining works with the character objective"""
config = Config().from_str(pretrain_string_listener) config = Config().from_str(pretrain_string_listener)
config["pretraining"]["objective"] = objective config["pretraining"]["objective"] = objective
@ -178,11 +179,14 @@ def test_pretraining_tok2vec_characters(objective):
filled["paths"]["raw_text"] = file_path filled["paths"]["raw_text"] = file_path
filled = filled.interpolate() filled = filled.interpolate()
assert filled["pretraining"]["component"] == "tok2vec" assert filled["pretraining"]["component"] == "tok2vec"
pretrain(filled, tmp_dir) pretrain(filled, tmp_dir, skip_last=skip_last)
assert Path(tmp_dir / "model0.bin").exists() assert Path(tmp_dir / "model0.bin").exists()
assert Path(tmp_dir / "model4.bin").exists() assert Path(tmp_dir / "model4.bin").exists()
assert Path(tmp_dir / "model-last.bin").exists()
assert not Path(tmp_dir / "model5.bin").exists() assert not Path(tmp_dir / "model5.bin").exists()
if skip_last:
assert not Path(tmp_dir / "model-last.bin").exists()
else:
assert Path(tmp_dir / "model-last.bin").exists()
@pytest.mark.parametrize("objective", VECTOR_OBJECTIVES) @pytest.mark.parametrize("objective", VECTOR_OBJECTIVES)

View File

@ -24,6 +24,7 @@ def pretrain(
epoch_resume: Optional[int] = None, epoch_resume: Optional[int] = None,
use_gpu: int = -1, use_gpu: int = -1,
silent: bool = True, silent: bool = True,
skip_last: bool = False,
): ):
msg = Printer(no_print=silent) msg = Printer(no_print=silent)
if config["training"]["seed"] is not None: if config["training"]["seed"] is not None:
@ -98,6 +99,7 @@ def pretrain(
_save_model(epoch) _save_model(epoch)
tracker.epoch_loss = 0.0 tracker.epoch_loss = 0.0
finally: finally:
if not skip_last:
_save_model(P["max_epochs"], is_last=True) _save_model(P["max_epochs"], is_last=True)

View File

@ -1123,13 +1123,14 @@ $ python -m spacy pretrain [config_path] [output_dir] [--code] [--resume-path] [
``` ```
| Name | Description | | Name | Description |
| ----------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | -------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ | | `config_path` | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. If `-`, the data will be [read from stdin](/usage/training#config-stdin). ~~Union[Path, str] \(positional)~~ |
| `output_dir` | Directory to save binary weights to on each epoch. ~~Path (positional)~~ | | `output_dir` | Directory to save binary weights to on each epoch. ~~Path (positional)~~ |
| `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ | | `--code`, `-c` | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-functions) for new architectures. ~~Optional[Path] \(option)~~ |
| `--resume-path`, `-r` | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~ | | `--resume-path`, `-r` | Path to pretrained weights from which to resume pretraining. ~~Optional[Path] \(option)~~ |
| `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~ | | `--epoch-resume`, `-er` | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. ~~Optional[int] \(option)~~ |
| `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ | | `--gpu-id`, `-g` | GPU ID or `-1` for CPU. Defaults to `-1`. ~~int (option)~~ |
| `--skip-last`, `-L` <Tag variant="new">3.5.2</Tag> | Skip saving `model-last.bin`. Defaults to `False`. ~~bool (flag)~~ |
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
| overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~ | | overrides | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.dropout 0.2`. ~~Any (option/flag)~~ |
| **CREATES** | The pretrained weights that can be used to initialize `spacy train`. | | **CREATES** | The pretrained weights that can be used to initialize `spacy train`. |