From bb9ad37e054d8bc5d0bb9704416e7fb6ca7b0791 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 20 Dec 2018 23:58:43 +0100 Subject: [PATCH] Improve entry points and allow custom language classes via entry points (#3080) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Remove check for overwritten factory This needs to be handled differently – on first initialization, a new factory will be added and any subsequent initializations will trigger this warning, even if it's a new entry point that doesn't overwrite a built-in. * Add helper to only load specific entry point Useful for loading languages via entry points, so that they can be lazy-loaded. Otherwise, all entry point languages would have to be loaded upfront. * Check entry points for custom languages --- spacy/language.py | 5 +---- spacy/util.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 259379df0..c1abb62b4 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -28,7 +28,7 @@ from .lang.punctuation import TOKENIZER_INFIXES from .lang.tokenizer_exceptions import TOKEN_MATCH from .lang.tag_map import TAG_MAP from .lang.lex_attrs import LEX_ATTRS, is_stop -from .errors import Errors, Warnings, user_warning +from .errors import Errors from . import util from . import about @@ -146,9 +146,6 @@ class Language(object): RETURNS (Language): The newly constructed object. """ user_factories = util.get_entry_points("spacy_factories") - for factory in user_factories.keys(): - if factory in self.factories: - user_warning(Warnings.W009.format(name=factory)) self.factories.update(user_factories) self._meta = dict(meta) self._path = None diff --git a/spacy/util.py b/spacy/util.py index 13810857b..66b0dc30e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -43,6 +43,11 @@ def get_lang_class(lang): RETURNS (Language): Language class. """ global LANGUAGES + # Check if an entry point is exposed for the language code + entry_point = get_entry_point("spacy_languages", lang) + if entry_point is not None: + LANGUAGES[lang] = entry_point + return entry_point if lang not in LANGUAGES: try: module = importlib.import_module(".lang.%s" % lang, "spacy") @@ -230,6 +235,19 @@ def get_entry_points(key): return result +def get_entry_point(key, value): + """Check if registered entry point is available for a given name and + load it. Otherwise, return None. + + key (unicode): Entry point name. + value (unicode): Name of entry point to load. + RETURNS: The loaded entry point or None. + """ + for entry_point in pkg_resources.iter_entry_points(key): + if entry_point.name == value: + return entry_point.load() + + def is_in_jupyter(): """Check if user is running spaCy from a Jupyter notebook by detecting the IPython kernel. Mainly used for the displaCy visualizer.