From 8baff1c7c0418b26bf690756e1cac81ecf655816 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 12 May 2019 00:53:11 +0200 Subject: [PATCH 1/3] =?UTF-8?q?=F0=9F=92=AB=20Improve=20introspection=20of?= =?UTF-8?q?=20custom=20extension=20attributes=20(#3729)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add custom __dir__ to Underscore (see #3707) * Make sure custom extension methods keep their docstrings (see #3707) * Improve tests * Prepend note on partial to docstring (see #3707) * Remove print statement * Handle cases where docstring is None --- spacy/tests/doc/test_underscore.py | 25 +++++++++++++++++++++++++ spacy/tokens/underscore.py | 16 +++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/spacy/tests/doc/test_underscore.py b/spacy/tests/doc/test_underscore.py index 8f47157fa..2877bfeea 100644 --- a/spacy/tests/doc/test_underscore.py +++ b/spacy/tests/doc/test_underscore.py @@ -140,3 +140,28 @@ def test_underscore_mutable_defaults_dict(en_vocab): assert len(token1._.mutable) == 2 assert token1._.mutable["x"] == ["y"] assert len(token2._.mutable) == 0 + + +def test_underscore_dir(en_vocab): + """Test that dir() correctly returns extension attributes. This enables + things like tab-completion for the attributes in doc._.""" + Doc.set_extension("test_dir", default=None) + doc = Doc(en_vocab, words=["hello", "world"]) + assert "_" in dir(doc) + assert "test_dir" in dir(doc._) + assert "test_dir" not in dir(doc[0]._) + assert "test_dir" not in dir(doc[0:2]._) + + +def test_underscore_docstring(en_vocab): + """Test that docstrings are available for extension methods, even though + they're partials.""" + + def test_method(doc, arg1=1, arg2=2): + """I am a docstring""" + return (arg1, arg2) + + Doc.set_extension("test_docstrings", method=test_method) + doc = Doc(en_vocab, words=["hello", "world"]) + assert test_method.__doc__ == "I am a docstring" + assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring" diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py index ef1d78717..b36fe9294 100644 --- a/spacy/tokens/underscore.py +++ b/spacy/tokens/underscore.py @@ -25,6 +25,11 @@ class Underscore(object): object.__setattr__(self, "_start", start) object.__setattr__(self, "_end", end) + def __dir__(self): + # Hack to enable autocomplete on custom extensions + extensions = list(self._extensions.keys()) + return ["set", "get", "has"] + extensions + def __getattr__(self, name): if name not in self._extensions: raise AttributeError(Errors.E046.format(name=name)) @@ -32,7 +37,16 @@ class Underscore(object): if getter is not None: return getter(self._obj) elif method is not None: - return functools.partial(method, self._obj) + method_partial = functools.partial(method, self._obj) + # Hack to port over docstrings of the original function + # See https://stackoverflow.com/q/27362727/6400719 + method_docstring = method.__doc__ or "" + method_docstring_prefix = ( + "This method is a partial function and its first argument " + "(the object it's called on) will be filled automatically. " + ) + method_partial.__doc__ = method_docstring_prefix + method_docstring + return method_partial else: key = self._get_key(name) if key in self._doc.user_data: From ed18a6efbd0aed54be103921ceedd15157722cb7 Mon Sep 17 00:00:00 2001 From: BreakBB <33514570+BreakBB@users.noreply.github.com> Date: Tue, 14 May 2019 16:59:31 +0200 Subject: [PATCH 2/3] Add check for callable to 'Language.replace_pipe' to fix #3737 (#3741) --- spacy/errors.py | 2 ++ spacy/language.py | 5 +++++ spacy/tests/pipeline/test_pipe_methods.py | 6 ++++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 5f964114e..b28393156 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -383,6 +383,8 @@ class Errors(object): E133 = ("The sum of prior probabilities for alias '{alias}' should not exceed 1, " "but found {sum}.") E134 = ("Alias '{alias}' defined for unknown entity '{entity}'.") + E135 = ("If you meant to replace a built-in component, use `create_pipe`: " + "`nlp.replace_pipe('{name}', nlp.create_pipe('{name}'))`") @add_codes diff --git a/spacy/language.py b/spacy/language.py index 6bd21b0bc..924c0b423 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -333,6 +333,11 @@ class Language(object): """ if name not in self.pipe_names: raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names)) + if not hasattr(component, "__call__"): + msg = Errors.E003.format(component=repr(component), name=name) + if isinstance(component, basestring_) and component in self.factories: + msg += Errors.E135.format(name=name) + raise ValueError(msg) self.pipeline[self.pipe_names.index(name)] = (name, component) def rename_pipe(self, old_name, new_name): diff --git a/spacy/tests/pipeline/test_pipe_methods.py b/spacy/tests/pipeline/test_pipe_methods.py index d36201718..a0870784c 100644 --- a/spacy/tests/pipeline/test_pipe_methods.py +++ b/spacy/tests/pipeline/test_pipe_methods.py @@ -52,11 +52,13 @@ def test_get_pipe(nlp, name): assert nlp.get_pipe(name) == new_pipe -@pytest.mark.parametrize("name,replacement", [("my_component", lambda doc: doc)]) -def test_replace_pipe(nlp, name, replacement): +@pytest.mark.parametrize("name,replacement,not_callable", [("my_component", lambda doc: doc, {})]) +def test_replace_pipe(nlp, name, replacement, not_callable): with pytest.raises(ValueError): nlp.replace_pipe(name, new_pipe) nlp.add_pipe(new_pipe, name=name) + with pytest.raises(ValueError): + nlp.replace_pipe(name, not_callable) nlp.replace_pipe(name, replacement) assert nlp.get_pipe(name) != new_pipe assert nlp.get_pipe(name) == replacement From 321c9f5acc7dccf329dcd827955e833556ca4065 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 16 May 2019 23:15:58 +0200 Subject: [PATCH 3/3] Fix lex_id docs (closes #3743) --- website/docs/api/lexeme.md | 1 - website/docs/api/token.md | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/website/docs/api/lexeme.md b/website/docs/api/lexeme.md index 5ec2aaf0c..018dc72d8 100644 --- a/website/docs/api/lexeme.md +++ b/website/docs/api/lexeme.md @@ -128,7 +128,6 @@ The L2 norm of the lexeme's vector representation. | `text` | unicode | Verbatim text content. | | `orth` | int | ID of the verbatim text content. | | `orth_` | unicode | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes. | -| `lex_id` | int | ID of the lexeme's lexical type. | | `rank` | int | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors. | | `flags` | int | Container of the lexeme's binary flags. | | `norm` | int | The lexemes's norm, i.e. a normalized form of the lexeme text. | diff --git a/website/docs/api/token.md b/website/docs/api/token.md index 2085a02c6..356cffb59 100644 --- a/website/docs/api/token.md +++ b/website/docs/api/token.md @@ -468,7 +468,7 @@ The L2 norm of the token's vector representation. | `prob` | float | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary). | | `idx` | int | The character offset of the token within the parent document. | | `sentiment` | float | A scalar value indicating the positivity or negativity of the token. | -| `lex_id` | int | Sequential ID of the token's lexical type. | +| `lex_id` | int | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. | | `rank` | int | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. | | `cluster` | int | Brown cluster ID. | | `_` | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). |