mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
💫 Support mutable default values for extension attributes (#3389)
* Support mutable default values in extensions * Update documentation
This commit is contained in:
parent
e77220e3ae
commit
7c05ca01e8
|
@ -106,3 +106,37 @@ def test_underscore_raises_for_invalid(invalid_kwargs):
|
|||
def test_underscore_accepts_valid(valid_kwargs):
|
||||
valid_kwargs["force"] = True
|
||||
Doc.set_extension("test", **valid_kwargs)
|
||||
|
||||
|
||||
def test_underscore_mutable_defaults_list(en_vocab):
|
||||
"""Test that mutable default arguments are handled correctly (see #2581)."""
|
||||
Doc.set_extension("mutable", default=[])
|
||||
doc1 = Doc(en_vocab, words=["one"])
|
||||
doc2 = Doc(en_vocab, words=["two"])
|
||||
doc1._.mutable.append("foo")
|
||||
assert len(doc1._.mutable) == 1
|
||||
assert doc1._.mutable[0] == "foo"
|
||||
assert len(doc2._.mutable) == 0
|
||||
doc1._.mutable = ["bar", "baz"]
|
||||
doc1._.mutable.append("foo")
|
||||
assert len(doc1._.mutable) == 3
|
||||
assert len(doc2._.mutable) == 0
|
||||
|
||||
|
||||
def test_underscore_mutable_defaults_dict(en_vocab):
|
||||
"""Test that mutable default arguments are handled correctly (see #2581)."""
|
||||
Token.set_extension("mutable", default={})
|
||||
token1 = Doc(en_vocab, words=["one"])[0]
|
||||
token2 = Doc(en_vocab, words=["two"])[0]
|
||||
token1._.mutable["foo"] = "bar"
|
||||
assert len(token1._.mutable) == 1
|
||||
assert token1._.mutable["foo"] == "bar"
|
||||
assert len(token2._.mutable) == 0
|
||||
token1._.mutable["foo"] = "baz"
|
||||
assert len(token1._.mutable) == 1
|
||||
assert token1._.mutable["foo"] == "baz"
|
||||
token1._.mutable["x"] = []
|
||||
token1._.mutable["x"].append("y")
|
||||
assert len(token1._.mutable) == 2
|
||||
assert token1._.mutable["x"] == ["y"]
|
||||
assert len(token2._.mutable) == 0
|
||||
|
|
|
@ -2,11 +2,13 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import functools
|
||||
import copy
|
||||
|
||||
from ..errors import Errors
|
||||
|
||||
|
||||
class Underscore(object):
|
||||
mutable_types = (dict, list, set)
|
||||
doc_extensions = {}
|
||||
span_extensions = {}
|
||||
token_extensions = {}
|
||||
|
@ -32,7 +34,15 @@ class Underscore(object):
|
|||
elif method is not None:
|
||||
return functools.partial(method, self._obj)
|
||||
else:
|
||||
return self._doc.user_data.get(self._get_key(name), default)
|
||||
key = self._get_key(name)
|
||||
if key in self._doc.user_data:
|
||||
return self._doc.user_data[key]
|
||||
elif isinstance(default, self.mutable_types):
|
||||
# Handle mutable default arguments (see #2581)
|
||||
new_default = copy.copy(default)
|
||||
self.__setattr__(name, new_default)
|
||||
return new_default
|
||||
return default
|
||||
|
||||
def __setattr__(self, name, value):
|
||||
if name not in self._extensions:
|
||||
|
|
|
@ -458,9 +458,7 @@ There are three main types of extensions, which can be defined using the
|
|||
1. **Attribute extensions.** Set a default value for an attribute, which can be
|
||||
overwritten manually at any time. Attribute extensions work like "normal"
|
||||
variables and are the quickest way to store arbitrary information on a `Doc`,
|
||||
`Span` or `Token`. Attribute defaults behaves just like argument defaults
|
||||
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments),
|
||||
and should not be used for mutable values like dictionaries or lists.
|
||||
`Span` or `Token`.
|
||||
|
||||
```python
|
||||
Doc.set_extension("hello", default=True)
|
||||
|
@ -527,25 +525,6 @@ Once you've registered your custom attribute, you can also use the built-in
|
|||
especially useful it you want to pass in a string instead of calling
|
||||
`doc._.my_attr`.
|
||||
|
||||
<Infobox title="Using mutable default values" variant="danger">
|
||||
|
||||
When using **mutable values** like dictionaries or lists as the `default`
|
||||
argument, keep in mind that they behave just like mutable default arguments
|
||||
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments).
|
||||
This can easily cause unintended results, like the same value being set on _all_
|
||||
objects instead of only one particular instance. In most cases, it's better to
|
||||
use **getters and setters**, and only set the `default` for boolean or string
|
||||
values.
|
||||
|
||||
```diff
|
||||
+ Doc.set_extension('fruits', getter=get_fruits, setter=set_fruits)
|
||||
|
||||
- Doc.set_extension('fruits', default={})
|
||||
- doc._.fruits['apple'] = u'🍎' # all docs now have {'apple': u'🍎'}
|
||||
```
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3}
|
||||
|
||||
This example shows the implementation of a pipeline component that fetches
|
||||
|
|
Loading…
Reference in New Issue
Block a user