mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Merge branch 'develop' into fix/default-corpus-values
This commit is contained in:
commit
df8dd91b6f
|
@ -49,35 +49,11 @@ def train_cli(
|
|||
config = util.load_config(config_path, overrides=overrides, interpolate=False)
|
||||
msg.divider("Initializing pipeline")
|
||||
with show_validation_error(config_path, hint_fill=False):
|
||||
nlp = init_pipeline(config, output_path, use_gpu=use_gpu)
|
||||
nlp = init_nlp(config, use_gpu=use_gpu)
|
||||
msg.divider("Training pipeline")
|
||||
train(nlp, output_path, use_gpu=use_gpu, silent=False)
|
||||
|
||||
|
||||
def init_pipeline(
|
||||
config: Config, output_path: Optional[Path], *, use_gpu: int = -1
|
||||
) -> Language:
|
||||
init_kwargs = {"use_gpu": use_gpu}
|
||||
if output_path is not None:
|
||||
init_path = output_path / "model-initial"
|
||||
if not init_path.exists():
|
||||
msg.info(f"Initializing the pipeline in {init_path}")
|
||||
nlp = init_nlp(config, **init_kwargs)
|
||||
nlp.to_disk(init_path)
|
||||
msg.good(f"Saved initialized pipeline to {init_path}")
|
||||
else:
|
||||
nlp = util.load_model(init_path)
|
||||
if must_reinitialize(config, nlp.config):
|
||||
msg.warn("Config has changed: need to re-initialize pipeline")
|
||||
nlp = init_nlp(config, **init_kwargs)
|
||||
nlp.to_disk(init_path)
|
||||
msg.good(f"Re-initialized pipeline in {init_path}")
|
||||
else:
|
||||
msg.good(f"Loaded initialized pipeline from {init_path}")
|
||||
return nlp
|
||||
return init_nlp(config, **init_kwargs)
|
||||
|
||||
|
||||
def verify_cli_args(config_path: Path, output_path: Optional[Path] = None) -> None:
|
||||
# Make sure all files and paths exists if they are needed
|
||||
if not config_path or not config_path.exists():
|
||||
|
|
|
@ -28,6 +28,7 @@ def create_docbin_reader(
|
|||
) -> Callable[["Language"], Iterable[Example]]:
|
||||
if path is None:
|
||||
raise ValueError(Errors.E913)
|
||||
util.logger.debug(f"Loading corpus from path: {path}")
|
||||
return Corpus(
|
||||
path,
|
||||
gold_preproc=gold_preproc,
|
||||
|
|
|
@ -94,6 +94,7 @@ def init_vocab(
|
|||
if vectors is not None:
|
||||
load_vectors_into_model(nlp, vectors)
|
||||
logger.info(f"Added vectors: {vectors}")
|
||||
logger.info("Finished initializing nlp object")
|
||||
|
||||
|
||||
def load_vectors_into_model(
|
||||
|
|
Loading…
Reference in New Issue
Block a user