Fix Language-specific factory handling in package command (#9674)

* Use internal names for factories

If a component factory is registered like `@French.factory(...)` instead
of `@Language.factory(...)`, the name in the factories registry will be
prefixed with the language code. However in the nlp.config object the
factory will be listed without the language code. The `add_pipe` code
has fallback logic to handle this, but packaging code and the registry
itself don't.

This change makes it so that the factory name in nlp.config is the
language-specific form. It's not clear if this will break anything else,
but it does seem to fix the inconsistency and resolve the specific user
issue that brought this to our attention.

* Change approach to use fallback in package lookup

This adds fallback logic to the package lookup, so it doesn't have to
touch the way the config is built. It seems to fix the tests too.

* Remove unecessary line

* Add test

Thsi also adds an assert that seems to have been forgotten.
This commit is contained in:
Paul O'Leary McCann 2021-11-29 07:31:02 +00:00 committed by GitHub
parent 7b134b8fbd
commit ac05de2c6c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 2 deletions

View File

@ -4,6 +4,7 @@ from pathlib import Path
from wasabi import Printer, MarkdownRenderer, get_raw_input
from thinc.api import Config
from collections import defaultdict
from catalogue import RegistryError
import srsly
import sys
@ -212,9 +213,18 @@ def get_third_party_dependencies(
if "factory" in component:
funcs["factories"].add(component["factory"])
modules = set()
lang = config["nlp"]["lang"]
for reg_name, func_names in funcs.items():
for func_name in func_names:
func_info = util.registry.find(reg_name, func_name)
# Try the lang-specific version and fall back
try:
func_info = util.registry.find(reg_name, lang + "." + func_name)
except RegistryError:
try:
func_info = util.registry.find(reg_name, func_name)
except RegistryError as regerr:
# lang-specific version being absent is not actually an issue
raise regerr from None
module_name = func_info.get("module") # type: ignore[attr-defined]
if module_name: # the code is part of a module, not a --code file
modules.add(func_info["module"].split(".")[0]) # type: ignore[index]

View File

@ -565,7 +565,16 @@ def test_get_third_party_dependencies():
}
},
)
get_third_party_dependencies(nlp.config) == []
assert get_third_party_dependencies(nlp.config) == []
# Test with lang-specific factory
@Dutch.factory("third_party_test")
def test_factory(nlp, name):
return lambda x: x
nlp.add_pipe("third_party_test")
# Before #9674 this would throw an exception
get_third_party_dependencies(nlp.config)
@pytest.mark.parametrize(