diff --git a/spacy/errors.py b/spacy/errors.py index 07c3df686..fb50f913d 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -483,6 +483,7 @@ class Errors: E199 = ("Unable to merge 0-length span at doc[{start}:{end}].") # TODO: fix numbering after merging develop into master + E955 = ("Can't find table '{table}' for language '{lang}' in spacy-lookups-data.") E956 = ("Can't find component '{name}' in [components] block in the config. " "Available components: {opts}") E957 = ("Writing directly to Language.factories isn't needed anymore in " diff --git a/spacy/lookups.py b/spacy/lookups.py index e5a4a0b40..bf71ba877 100644 --- a/spacy/lookups.py +++ b/spacy/lookups.py @@ -12,7 +12,6 @@ from .strings import get_string_id UNSET = object() -@registry.language_data("spacy-lookups-data") def load_lookups( lang: str, tables: List[str], strict: bool = True ) -> Optional[Dict[str, Any]]: @@ -23,6 +22,7 @@ def load_lookups( lang (str): The language code (corresponds to entry point exposed by the spacy-lookups-data package). tables (List[str]): Name of tables to load, e.g. ["lemma_lookup", "lemma_exc"] + strict (bool): Whether to raise an error if a table doesn't exist. RETURNS (Dict[str, Any]): The lookups, keyed by table name. """ # TODO: import spacy_lookups_data instead of going via entry points here? @@ -33,7 +33,7 @@ def load_lookups( for table in tables: if table not in data: if strict: - raise ValueError("TODO: unknown table") + raise ValueError(Errors.E955.format(table=table, lang=lang)) language_data = {} else: language_data = load_language_data(data[table]) diff --git a/spacy/util.py b/spacy/util.py index 3b6ba0f25..682d45bc9 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -62,7 +62,6 @@ class registry(thinc.registry): tokenizers = catalogue.create("spacy", "tokenizers", entry_points=True) lemmatizers = catalogue.create("spacy", "lemmatizers", entry_points=True) lookups = catalogue.create("spacy", "lookups", entry_points=True) - language_data = catalogue.create("spacy", "language_data", entry_points=True) displacy_colors = catalogue.create("spacy", "displacy_colors", entry_points=True) assets = catalogue.create("spacy", "assets", entry_points=True) # These are factories registered via third-party packages and the