This commit is contained in:
Ines Montani 2020-07-25 12:21:37 +02:00
parent a063a82c40
commit c003d26b94
3 changed files with 3 additions and 3 deletions

View File

@ -483,6 +483,7 @@ class Errors:
E199 = ("Unable to merge 0-length span at doc[{start}:{end}].") E199 = ("Unable to merge 0-length span at doc[{start}:{end}].")
# TODO: fix numbering after merging develop into master # TODO: fix numbering after merging develop into master
E955 = ("Can't find table '{table}' for language '{lang}' in spacy-lookups-data.")
E956 = ("Can't find component '{name}' in [components] block in the config. " E956 = ("Can't find component '{name}' in [components] block in the config. "
"Available components: {opts}") "Available components: {opts}")
E957 = ("Writing directly to Language.factories isn't needed anymore in " E957 = ("Writing directly to Language.factories isn't needed anymore in "

View File

@ -12,7 +12,6 @@ from .strings import get_string_id
UNSET = object() UNSET = object()
@registry.language_data("spacy-lookups-data")
def load_lookups( def load_lookups(
lang: str, tables: List[str], strict: bool = True lang: str, tables: List[str], strict: bool = True
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
@ -23,6 +22,7 @@ def load_lookups(
lang (str): The language code (corresponds to entry point exposed by lang (str): The language code (corresponds to entry point exposed by
the spacy-lookups-data package). the spacy-lookups-data package).
tables (List[str]): Name of tables to load, e.g. ["lemma_lookup", "lemma_exc"] tables (List[str]): Name of tables to load, e.g. ["lemma_lookup", "lemma_exc"]
strict (bool): Whether to raise an error if a table doesn't exist.
RETURNS (Dict[str, Any]): The lookups, keyed by table name. RETURNS (Dict[str, Any]): The lookups, keyed by table name.
""" """
# TODO: import spacy_lookups_data instead of going via entry points here? # TODO: import spacy_lookups_data instead of going via entry points here?
@ -33,7 +33,7 @@ def load_lookups(
for table in tables: for table in tables:
if table not in data: if table not in data:
if strict: if strict:
raise ValueError("TODO: unknown table") raise ValueError(Errors.E955.format(table=table, lang=lang))
language_data = {} language_data = {}
else: else:
language_data = load_language_data(data[table]) language_data = load_language_data(data[table])

View File

@ -62,7 +62,6 @@ class registry(thinc.registry):
tokenizers = catalogue.create("spacy", "tokenizers", entry_points=True) tokenizers = catalogue.create("spacy", "tokenizers", entry_points=True)
lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True) lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True)
lookups = catalogue.create("spacy", "lookups", entry_points=True) lookups = catalogue.create("spacy", "lookups", entry_points=True)
language_data = catalogue.create("spacy", "language_data", entry_points=True)
displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True) displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True)
assets = catalogue.create("spacy", "assets", entry_points=True) assets = catalogue.create("spacy", "assets", entry_points=True)
# These are factories registered via third-party packages and the # These are factories registered via third-party packages and the