diff --git a/pyproject.toml b/pyproject.toml index 387236c5f..d23730b00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ requires = [ "cymem>=2.0.2,<2.1.0", "preshed>=3.0.2,<3.1.0", "murmurhash>=0.28.0,<1.1.0", - "thinc>=8.0.0a29,<8.0.0a40", + "thinc>=8.0.0a30,<8.0.0a40", "blis>=0.4.0,<0.5.0", "pytokenizations", "pathy" diff --git a/requirements.txt b/requirements.txt index eebe0852c..61d252ce4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ # Our libraries cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 -thinc>=8.0.0a29,<8.0.0a40 +thinc>=8.0.0a30,<8.0.0a40 blis>=0.4.0,<0.5.0 ml_datasets>=0.1.1 murmurhash>=0.28.0,<1.1.0 diff --git a/setup.cfg b/setup.cfg index 003015a6d..e6d02684e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,13 +34,13 @@ setup_requires = cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 murmurhash>=0.28.0,<1.1.0 - thinc>=8.0.0a29,<8.0.0a40 + thinc>=8.0.0a30,<8.0.0a40 install_requires = # Our libraries murmurhash>=0.28.0,<1.1.0 cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 - thinc>=8.0.0a29,<8.0.0a40 + thinc>=8.0.0a30,<8.0.0a40 blis>=0.4.0,<0.5.0 wasabi>=0.7.1,<1.1.0 srsly>=2.1.0,<3.0.0 diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py index 776d520a5..723068106 100644 --- a/spacy/cli/init_config.py +++ b/spacy/cli/init_config.py @@ -70,7 +70,10 @@ def fill_config( msg = Printer(no_print=is_stdout) with show_validation_error(hint_fill=False): config = util.load_config(base_path) - nlp, _ = util.load_model_from_config(config, auto_fill=True) + nlp, _ = util.load_model_from_config(config, auto_fill=True, validate=False) + # Load a second time with validation to be extra sure that the produced + # config result is a valid config + nlp, _ = util.load_model_from_config(nlp.config) filled = nlp.config if pretraining: validate_config_for_pretrain(filled, msg) diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py index 22dd4170d..9eae7c775 100644 --- a/spacy/tests/serialize/test_serialize_config.py +++ b/spacy/tests/serialize/test_serialize_config.py @@ -313,3 +313,13 @@ def test_config_optional_sections(): # also how Config.interpolate works under the hood. new_config = Config().from_str(filled.to_str()) assert new_config["pretraining"] == {} + + +def test_config_auto_fill_extra_fields(): + config = Config({"nlp": {"lang": "en"}, "training": {}}) + assert load_model_from_config(config, auto_fill=True) + config = Config({"nlp": {"lang": "en"}, "training": {"extra": "hello"}}) + nlp, _ = load_model_from_config(config, auto_fill=True, validate=False) + assert "extra" not in nlp.config["training"] + # Make sure the config generated is valid + load_model_from_config(nlp.config)