From f4f46b617f2106f51579bae2b71c71867d1cc7eb Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 14 Feb 2021 14:02:14 +1100 Subject: [PATCH] Preserve sourced components in fill-config (fixes #7055) (#7058) --- spacy/cli/init_config.py | 4 +++ spacy/tests/regression/test_issue7055.py | 40 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 spacy/tests/regression/test_issue7055.py diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index 6bdf393f6..9880c389c 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -103,6 +103,10 @@ def fill_config( # config result is a valid config nlp = util.load_model_from_config(nlp.config) filled = nlp.config + # If we have sourced components in the base config, those will have been + # replaced with their actual config after loading, so we have to re-add them + sourced = util.get_sourced_components(config) + filled["components"].update(sourced) if pretraining: validate_config_for_pretrain(filled, msg) pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH) diff --git a/spacy/tests/regression/test_issue7055.py b/spacy/tests/regression/test_issue7055.py new file mode 100644 index 000000000..c7ddb0a75 --- /dev/null +++ b/spacy/tests/regression/test_issue7055.py @@ -0,0 +1,40 @@ +from spacy.cli.init_config import fill_config +from spacy.util import load_config +from spacy.lang.en import English +from thinc.api import Config + +from ..util import make_tempdir + + +def test_issue7055(): + """Test that fill-config doesn't turn sourced components into factories.""" + source_cfg = { + "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger"]}, + "components": { + "tok2vec": {"factory": "tok2vec"}, + "tagger": {"factory": "tagger"}, + }, + } + source_nlp = English.from_config(source_cfg) + with make_tempdir() as dir_path: + # We need to create a loadable source pipeline + source_path = dir_path / "test_model" + source_nlp.to_disk(source_path) + base_cfg = { + "nlp": {"lang": "en", "pipeline": ["tok2vec", "tagger", "ner"]}, + "components": { + "tok2vec": {"source": str(source_path)}, + "tagger": {"source": str(source_path)}, + "ner": {"factory": "ner"}, + }, + } + base_cfg = Config(base_cfg) + base_path = dir_path / "base.cfg" + base_cfg.to_disk(base_path) + output_path = dir_path / "config.cfg" + fill_config(output_path, base_path, silent=True) + filled_cfg = load_config(output_path) + assert filled_cfg["components"]["tok2vec"]["source"] == str(source_path) + assert filled_cfg["components"]["tagger"]["source"] == str(source_path) + assert filled_cfg["components"]["ner"]["factory"] == "ner" + assert "model" in filled_cfg["components"]["ner"]