From ef77c886388713f7651daced6c90c63165a88d6b Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Wed, 17 Mar 2021 14:56:04 +0900 Subject: [PATCH 1/2] Don't warn about components not in the pipeline See here: https://github.com/explosion/spaCy/discussions/7463 Still need to check if there are any side effects of listeners being present but not in the pipeline, but this commit will silence the warnings. --- spacy/training/initialize.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index f7f2f21a4..d017aa909 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -74,6 +74,10 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language": for name, proc in nlp.pipeline: if getattr(proc, "listening_components", None): # e.g. tok2vec/transformer for listener in proc.listening_components: + # Don't warn about components not in the pipeline + if listener not in nlp.pipeline: + continue + if listener in frozen_components and name not in frozen_components: logger.warning(Warnings.W087.format(name=name, listener=listener)) # We always check this regardless, in case user freezes tok2vec From 40bc01e66823c82a5319497ad46675b83bc7878f Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Wed, 17 Mar 2021 22:41:41 +0900 Subject: [PATCH 2/2] Proactively remove unused listeners With this the changes in initialize.py might be unecessary. Requires testing. --- spacy/language.py | 24 +++++++++++++++--------- spacy/training/initialize.py | 19 +++++++++---------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 871dfafaa..04a5e843e 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1686,15 +1686,21 @@ class Language: ) # Detect components with listeners that are not frozen consistently for name, proc in nlp.pipeline: - if getattr(proc, "listening_components", None): # e.g. tok2vec/transformer - for listener in proc.listening_components: - # If it's a component sourced from another pipeline, we check if - # the tok2vec listeners should be replaced with standalone tok2vec - # models (e.g. so component can be frozen without its performance - # degrading when other components/tok2vec are updated) - paths = sourced.get(listener, {}).get("replace_listeners", []) - if paths: - nlp.replace_listeners(name, listener, paths) + # Remove listeners not in the pipeline + listener_names = getattr(proc, "listening_components", []) + unused_listener_names = [ll for ll in listener_names if ll not in nlp.pipe_names] + for listener_name in unused_listener_names: + for listener in proc.listener_map.get(listener_name, []): + proc.remove_listener(listener, listener_name) + + for listener in getattr(proc, "listening_components", []): # e.g. tok2vec/transformer + # If it's a component sourced from another pipeline, we check if + # the tok2vec listeners should be replaced with standalone tok2vec + # models (e.g. so component can be frozen without its performance + # degrading when other components/tok2vec are updated) + paths = sourced.get(listener, {}).get("replace_listeners", []) + if paths: + nlp.replace_listeners(name, listener, paths) return nlp def replace_listeners( diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index d017aa909..f623627eb 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -72,17 +72,16 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language": logger.info(f"Initialized pipeline components: {nlp.pipe_names}") # Detect components with listeners that are not frozen consistently for name, proc in nlp.pipeline: - if getattr(proc, "listening_components", None): # e.g. tok2vec/transformer - for listener in proc.listening_components: - # Don't warn about components not in the pipeline - if listener not in nlp.pipeline: - continue + for listener in getattr(proc, "listening_components", []): # e.g. tok2vec/transformer + # Don't warn about components not in the pipeline + if listener not in nlp.pipe_names: + continue - if listener in frozen_components and name not in frozen_components: - logger.warning(Warnings.W087.format(name=name, listener=listener)) - # We always check this regardless, in case user freezes tok2vec - if listener not in frozen_components and name in frozen_components: - logger.warning(Warnings.W086.format(name=name, listener=listener)) + if listener in frozen_components and name not in frozen_components: + logger.warning(Warnings.W087.format(name=name, listener=listener)) + # We always check this regardless, in case user freezes tok2vec + if listener not in frozen_components and name in frozen_components: + logger.warning(Warnings.W086.format(name=name, listener=listener)) return nlp