Add tests to demonstrate source/name/listener_map issues

This commit is contained in:
Adriane Boyd 2023-06-06 21:36:09 +02:00
parent c936db2faf
commit 65e481e176

View File

@ -188,8 +188,7 @@ def test_tok2vec_listener(with_vectors):
for tag in t[1]["tags"]: for tag in t[1]["tags"]:
tagger.add_label(tag) tagger.add_label(tag)
# Check that the Tok2Vec component finds it listeners # Check that the Tok2Vec component finds its listeners
assert tok2vec.listeners == []
optimizer = nlp.initialize(lambda: train_examples) optimizer = nlp.initialize(lambda: train_examples)
assert tok2vec.listeners == [tagger_tok2vec] assert tok2vec.listeners == [tagger_tok2vec]
@ -217,7 +216,7 @@ def test_tok2vec_listener_callback():
assert nlp.pipe_names == ["tok2vec", "tagger"] assert nlp.pipe_names == ["tok2vec", "tagger"]
tagger = nlp.get_pipe("tagger") tagger = nlp.get_pipe("tagger")
tok2vec = nlp.get_pipe("tok2vec") tok2vec = nlp.get_pipe("tok2vec")
nlp._link_components() nlp._link_components() # TODO: remove
docs = [nlp.make_doc("A random sentence")] docs = [nlp.make_doc("A random sentence")]
tok2vec.model.initialize(X=docs) tok2vec.model.initialize(X=docs)
gold_array = [[1.0 for tag in ["V", "Z"]] for word in docs] gold_array = [[1.0 for tag in ["V", "Z"]] for word in docs]
@ -426,29 +425,46 @@ def test_replace_listeners_from_config():
nlp.to_disk(dir_path) nlp.to_disk(dir_path)
base_model = str(dir_path) base_model = str(dir_path)
new_config = { new_config = {
"nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]}, "nlp": {
"lang": "en",
"pipeline": ["tok2vec", "tagger2", "ner3", "tagger4"],
},
"components": { "components": {
"tok2vec": {"source": base_model}, "tok2vec": {"source": base_model},
"tagger": { "tagger2": {
"source": base_model, "source": base_model,
"component": "tagger",
"replace_listeners": ["model.tok2vec"], "replace_listeners": ["model.tok2vec"],
}, },
"ner": {"source": base_model}, "ner3": {
"source": base_model,
"component": "ner",
},
"tagger4": {
"source": base_model,
"component": "tagger",
},
}, },
} }
new_nlp = util.load_model_from_config(new_config, auto_fill=True) new_nlp = util.load_model_from_config(new_config, auto_fill=True)
new_nlp.initialize(lambda: examples) new_nlp.initialize(lambda: examples)
tok2vec = new_nlp.get_pipe("tok2vec") tok2vec = new_nlp.get_pipe("tok2vec")
tagger = new_nlp.get_pipe("tagger") tagger = new_nlp.get_pipe("tagger2")
ner = new_nlp.get_pipe("ner") ner = new_nlp.get_pipe("ner3")
assert tok2vec.listening_components == ["ner"] assert "ner" not in new_nlp.pipe_names
assert "tagger" not in new_nlp.pipe_names
assert tok2vec.listening_components == ["ner3", "tagger4"]
assert any(isinstance(node, Tok2VecListener) for node in ner.model.walk()) assert any(isinstance(node, Tok2VecListener) for node in ner.model.walk())
assert not any(isinstance(node, Tok2VecListener) for node in tagger.model.walk()) assert not any(isinstance(node, Tok2VecListener) for node in tagger.model.walk())
t2v_cfg = new_nlp.config["components"]["tok2vec"]["model"] t2v_cfg = new_nlp.config["components"]["tok2vec"]["model"]
assert t2v_cfg["@architectures"] == "spacy.Tok2Vec.v2" assert t2v_cfg["@architectures"] == "spacy.Tok2Vec.v2"
assert new_nlp.config["components"]["tagger"]["model"]["tok2vec"] == t2v_cfg assert new_nlp.config["components"]["tagger2"]["model"]["tok2vec"] == t2v_cfg
assert ( assert (
new_nlp.config["components"]["ner"]["model"]["tok2vec"]["@architectures"] new_nlp.config["components"]["ner3"]["model"]["tok2vec"]["@architectures"]
== "spacy.Tok2VecListener.v1"
)
assert (
new_nlp.config["components"]["tagger4"]["model"]["tok2vec"]["@architectures"]
== "spacy.Tok2VecListener.v1" == "spacy.Tok2VecListener.v1"
) )
@ -540,3 +556,60 @@ def test_tok2vec_listeners_textcat():
assert cats1["imperative"] < 0.9 assert cats1["imperative"] < 0.9
assert [t.tag_ for t in docs[0]] == ["V", "J", "N"] assert [t.tag_ for t in docs[0]] == ["V", "J", "N"]
assert [t.tag_ for t in docs[1]] == ["N", "V", "J", "N"] assert [t.tag_ for t in docs[1]] == ["N", "V", "J", "N"]
def test_tok2vec_listener_source_link_name():
"""The component's internal name and the tok2vec listener map correspond
to the most recently modified pipeline.
"""
orig_config = Config().from_str(cfg_string_multi)
nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
nlp1._link_components() # TODO: remove
assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
nlp2 = English()
nlp2.add_pipe("tok2vec", source=nlp1)
nlp2.add_pipe("tagger", name="tagger2", source=nlp1)
# there is no way to have the component have the right name for both
# pipelines, right now the most recently modified pipeline is prioritized
#assert nlp1.get_pipe("tagger").name == nlp2.get_pipe("tagger2").name == "tagger2" # TODO: uncomment
# there is no way to have the tok2vec have the right listener map for both
# pipelines, right now the most recently modified pipeline is prioritized
assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
nlp2.add_pipe("ner", name="ner3", source=nlp1)
assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2", "ner3"]
nlp2.remove_pipe("ner3")
assert nlp2.get_pipe("tok2vec").listening_components == ["tagger2"]
nlp2.remove_pipe("tagger2")
assert nlp2.get_pipe("tok2vec").listening_components == []
# at this point the tok2vec component corresponds to nlp2
assert nlp1.get_pipe("tok2vec").listening_components == []
# modifying the nlp1 pipeline syncs the tok2vec listener map back to nlp1
nlp1.add_pipe("sentencizer")
assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
# modifying nlp2 syncs it back to nlp2
nlp2.add_pipe("sentencizer")
assert nlp1.get_pipe("tok2vec").listening_components == []
def test_tok2vec_listener_source_replace_listeners():
orig_config = Config().from_str(cfg_string_multi)
nlp1 = util.load_model_from_config(orig_config, auto_fill=True, validate=True)
nlp1._link_components() # TODO: remove
assert nlp1.get_pipe("tok2vec").listening_components == ["tagger", "ner"]
nlp1._link_components() # TODO: remove
nlp1.replace_listeners("tok2vec", "tagger", ["model.tok2vec"])
assert nlp1.get_pipe("tok2vec").listening_components == ["ner"]
nlp2 = English()
nlp2.add_pipe("tok2vec", source=nlp1)
assert nlp2.get_pipe("tok2vec").listening_components == []
nlp2.add_pipe("tagger", source=nlp1)
assert nlp2.get_pipe("tok2vec").listening_components == []
nlp2.add_pipe("ner", name="ner2", source=nlp1)
assert nlp2.get_pipe("tok2vec").listening_components == ["ner2"]