mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 20:28:20 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
3cbbc4afcb
|
@ -383,6 +383,8 @@ class Errors(object):
|
|||
E133 = ("The sum of prior probabilities for alias '{alias}' should not exceed 1, "
|
||||
"but found {sum}.")
|
||||
E134 = ("Alias '{alias}' defined for unknown entity '{entity}'.")
|
||||
E135 = ("If you meant to replace a built-in component, use `create_pipe`: "
|
||||
"`nlp.replace_pipe('{name}', nlp.create_pipe('{name}'))`")
|
||||
|
||||
|
||||
@add_codes
|
||||
|
|
|
@ -333,6 +333,11 @@ class Language(object):
|
|||
"""
|
||||
if name not in self.pipe_names:
|
||||
raise ValueError(Errors.E001.format(name=name, opts=self.pipe_names))
|
||||
if not hasattr(component, "__call__"):
|
||||
msg = Errors.E003.format(component=repr(component), name=name)
|
||||
if isinstance(component, basestring_) and component in self.factories:
|
||||
msg += Errors.E135.format(name=name)
|
||||
raise ValueError(msg)
|
||||
self.pipeline[self.pipe_names.index(name)] = (name, component)
|
||||
|
||||
def rename_pipe(self, old_name, new_name):
|
||||
|
|
|
@ -140,3 +140,28 @@ def test_underscore_mutable_defaults_dict(en_vocab):
|
|||
assert len(token1._.mutable) == 2
|
||||
assert token1._.mutable["x"] == ["y"]
|
||||
assert len(token2._.mutable) == 0
|
||||
|
||||
|
||||
def test_underscore_dir(en_vocab):
|
||||
"""Test that dir() correctly returns extension attributes. This enables
|
||||
things like tab-completion for the attributes in doc._."""
|
||||
Doc.set_extension("test_dir", default=None)
|
||||
doc = Doc(en_vocab, words=["hello", "world"])
|
||||
assert "_" in dir(doc)
|
||||
assert "test_dir" in dir(doc._)
|
||||
assert "test_dir" not in dir(doc[0]._)
|
||||
assert "test_dir" not in dir(doc[0:2]._)
|
||||
|
||||
|
||||
def test_underscore_docstring(en_vocab):
|
||||
"""Test that docstrings are available for extension methods, even though
|
||||
they're partials."""
|
||||
|
||||
def test_method(doc, arg1=1, arg2=2):
|
||||
"""I am a docstring"""
|
||||
return (arg1, arg2)
|
||||
|
||||
Doc.set_extension("test_docstrings", method=test_method)
|
||||
doc = Doc(en_vocab, words=["hello", "world"])
|
||||
assert test_method.__doc__ == "I am a docstring"
|
||||
assert doc._.test_docstrings.__doc__.rsplit(". ")[-1] == "I am a docstring"
|
||||
|
|
|
@ -52,11 +52,13 @@ def test_get_pipe(nlp, name):
|
|||
assert nlp.get_pipe(name) == new_pipe
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name,replacement", [("my_component", lambda doc: doc)])
|
||||
def test_replace_pipe(nlp, name, replacement):
|
||||
@pytest.mark.parametrize("name,replacement,not_callable", [("my_component", lambda doc: doc, {})])
|
||||
def test_replace_pipe(nlp, name, replacement, not_callable):
|
||||
with pytest.raises(ValueError):
|
||||
nlp.replace_pipe(name, new_pipe)
|
||||
nlp.add_pipe(new_pipe, name=name)
|
||||
with pytest.raises(ValueError):
|
||||
nlp.replace_pipe(name, not_callable)
|
||||
nlp.replace_pipe(name, replacement)
|
||||
assert nlp.get_pipe(name) != new_pipe
|
||||
assert nlp.get_pipe(name) == replacement
|
||||
|
|
|
@ -25,6 +25,11 @@ class Underscore(object):
|
|||
object.__setattr__(self, "_start", start)
|
||||
object.__setattr__(self, "_end", end)
|
||||
|
||||
def __dir__(self):
|
||||
# Hack to enable autocomplete on custom extensions
|
||||
extensions = list(self._extensions.keys())
|
||||
return ["set", "get", "has"] + extensions
|
||||
|
||||
def __getattr__(self, name):
|
||||
if name not in self._extensions:
|
||||
raise AttributeError(Errors.E046.format(name=name))
|
||||
|
@ -32,7 +37,16 @@ class Underscore(object):
|
|||
if getter is not None:
|
||||
return getter(self._obj)
|
||||
elif method is not None:
|
||||
return functools.partial(method, self._obj)
|
||||
method_partial = functools.partial(method, self._obj)
|
||||
# Hack to port over docstrings of the original function
|
||||
# See https://stackoverflow.com/q/27362727/6400719
|
||||
method_docstring = method.__doc__ or ""
|
||||
method_docstring_prefix = (
|
||||
"This method is a partial function and its first argument "
|
||||
"(the object it's called on) will be filled automatically. "
|
||||
)
|
||||
method_partial.__doc__ = method_docstring_prefix + method_docstring
|
||||
return method_partial
|
||||
else:
|
||||
key = self._get_key(name)
|
||||
if key in self._doc.user_data:
|
||||
|
|
|
@ -128,7 +128,6 @@ The L2 norm of the lexeme's vector representation.
|
|||
| `text` | unicode | Verbatim text content. |
|
||||
| `orth` | int | ID of the verbatim text content. |
|
||||
| `orth_` | unicode | Verbatim text content (identical to `Lexeme.text`). Exists mostly for consistency with the other attributes. |
|
||||
| `lex_id` | int | ID of the lexeme's lexical type. |
|
||||
| `rank` | int | Sequential ID of the lexemes's lexical type, used to index into tables, e.g. for word vectors. |
|
||||
| `flags` | int | Container of the lexeme's binary flags. |
|
||||
| `norm` | int | The lexemes's norm, i.e. a normalized form of the lexeme text. |
|
||||
|
|
|
@ -468,7 +468,7 @@ The L2 norm of the token's vector representation.
|
|||
| `prob` | float | Smoothed log probability estimate of token's word type (context-independent entry in the vocabulary). |
|
||||
| `idx` | int | The character offset of the token within the parent document. |
|
||||
| `sentiment` | float | A scalar value indicating the positivity or negativity of the token. |
|
||||
| `lex_id` | int | Sequential ID of the token's lexical type. |
|
||||
| `lex_id` | int | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. |
|
||||
| `rank` | int | Sequential ID of the token's lexical type, used to index into tables, e.g. for word vectors. |
|
||||
| `cluster` | int | Brown cluster ID. |
|
||||
| `_` | `Underscore` | User space for adding custom [attribute extensions](/usage/processing-pipelines#custom-components-attributes). |
|
||||
|
|
Loading…
Reference in New Issue
Block a user