Fix linking resumed components (#6859)

* link components across enabled, resumed and frozen

* revert renaming

* revert renaming, the sequel
This commit is contained in:
Sofie Van Landeghem 2021-02-01 12:19:58 +01:00 committed by GitHub
parent 8a245076c4
commit acabb284dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 3 deletions

View File

@ -1190,6 +1190,7 @@ class Language:
get_examples: Optional[Callable[[], Iterable[Example]]] = None,
*,
sgd: Optional[Optimizer] = None,
link_components: bool = True,
) -> Optimizer:
"""Initialize the pipe for training, using data examples if available.
@ -1197,6 +1198,8 @@ class Language:
returns gold-standard Example objects.
sgd (Optional[Optimizer]): An optimizer to use for updates. If not
provided, will be created using the .create_optimizer() method.
link_components (bool): Link listener components automatically or not
(default True)
RETURNS (thinc.api.Optimizer): The optimizer.
DOCS: https://spacy.io/api/language#initialize
@ -1244,6 +1247,7 @@ class Language:
proc.initialize, p_settings, section="components", name=name
)
proc.initialize(get_examples, nlp=self, **p_settings)
if link_components:
self._link_components()
self._optimizer = sgd
if sgd is not None:
@ -1528,7 +1532,7 @@ class Language:
"""Register 'listeners' within pipeline components, to allow them to
effectively share weights.
"""
# I had though, "Why do we do this inside the Language object? Shouldn't
# I had thought, "Why do we do this inside the Language object? Shouldn't
# it be the tok2vec/transformer/etc's job?
# The problem is we need to do it during deserialization...And the
# components don't receive the pipeline then. So this does have to be

View File

@ -64,8 +64,10 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
with nlp.select_pipes(enable=resume_components):
logger.info(f"Resuming training for: {resume_components}")
nlp.resume_training(sgd=optimizer)
# Make sure that listeners are defined before initializing further
nlp._link_components()
with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer, link_components=False)
logger.info(f"Initialized pipeline components: {nlp.pipe_names}")
# Detect components with listeners that are not frozen consistently
for name, proc in nlp.pipeline: