💫 Support mutable default values for extension attributes (#3389)

* Support mutable default values in extensions

* Update documentation
This commit is contained in:
Ines Montani 2019-03-11 12:50:44 +01:00 committed by Matthew Honnibal
parent e77220e3ae
commit 7c05ca01e8
3 changed files with 46 additions and 23 deletions

View File

@ -106,3 +106,37 @@ def test_underscore_raises_for_invalid(invalid_kwargs):
def test_underscore_accepts_valid(valid_kwargs):
valid_kwargs["force"] = True
Doc.set_extension("test", **valid_kwargs)
def test_underscore_mutable_defaults_list(en_vocab):
"""Test that mutable default arguments are handled correctly (see #2581)."""
Doc.set_extension("mutable", default=[])
doc1 = Doc(en_vocab, words=["one"])
doc2 = Doc(en_vocab, words=["two"])
doc1._.mutable.append("foo")
assert len(doc1._.mutable) == 1
assert doc1._.mutable[0] == "foo"
assert len(doc2._.mutable) == 0
doc1._.mutable = ["bar", "baz"]
doc1._.mutable.append("foo")
assert len(doc1._.mutable) == 3
assert len(doc2._.mutable) == 0
def test_underscore_mutable_defaults_dict(en_vocab):
"""Test that mutable default arguments are handled correctly (see #2581)."""
Token.set_extension("mutable", default={})
token1 = Doc(en_vocab, words=["one"])[0]
token2 = Doc(en_vocab, words=["two"])[0]
token1._.mutable["foo"] = "bar"
assert len(token1._.mutable) == 1
assert token1._.mutable["foo"] == "bar"
assert len(token2._.mutable) == 0
token1._.mutable["foo"] = "baz"
assert len(token1._.mutable) == 1
assert token1._.mutable["foo"] == "baz"
token1._.mutable["x"] = []
token1._.mutable["x"].append("y")
assert len(token1._.mutable) == 2
assert token1._.mutable["x"] == ["y"]
assert len(token2._.mutable) == 0

View File

@ -2,11 +2,13 @@
from __future__ import unicode_literals
import functools
import copy
from ..errors import Errors
class Underscore(object):
mutable_types = (dict, list, set)
doc_extensions = {}
span_extensions = {}
token_extensions = {}
@ -32,7 +34,15 @@ class Underscore(object):
elif method is not None:
return functools.partial(method, self._obj)
else:
return self._doc.user_data.get(self._get_key(name), default)
key = self._get_key(name)
if key in self._doc.user_data:
return self._doc.user_data[key]
elif isinstance(default, self.mutable_types):
# Handle mutable default arguments (see #2581)
new_default = copy.copy(default)
self.__setattr__(name, new_default)
return new_default
return default
def __setattr__(self, name, value):
if name not in self._extensions:

View File

@ -458,9 +458,7 @@ There are three main types of extensions, which can be defined using the
1. **Attribute extensions.** Set a default value for an attribute, which can be
overwritten manually at any time. Attribute extensions work like "normal"
variables and are the quickest way to store arbitrary information on a `Doc`,
`Span` or `Token`. Attribute defaults behaves just like argument defaults
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments),
and should not be used for mutable values like dictionaries or lists.
`Span` or `Token`.
```python
Doc.set_extension("hello", default=True)
@ -527,25 +525,6 @@ Once you've registered your custom attribute, you can also use the built-in
especially useful it you want to pass in a string instead of calling
`doc._.my_attr`.
<Infobox title="Using mutable default values" variant="danger">
When using **mutable values** like dictionaries or lists as the `default`
argument, keep in mind that they behave just like mutable default arguments
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments).
This can easily cause unintended results, like the same value being set on _all_
objects instead of only one particular instance. In most cases, it's better to
use **getters and setters**, and only set the `default` for boolean or string
values.
```diff
+ Doc.set_extension('fruits', getter=get_fruits, setter=set_fruits)
- Doc.set_extension('fruits', default={})
- doc._.fruits['apple'] = u'🍎' # all docs now have {'apple': u'🍎'}
```
</Infobox>
### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3}
This example shows the implementation of a pipeline component that fetches