mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
💫 Support mutable default values for extension attributes (#3389)
* Support mutable default values in extensions * Update documentation
This commit is contained in:
parent
e77220e3ae
commit
7c05ca01e8
|
@ -106,3 +106,37 @@ def test_underscore_raises_for_invalid(invalid_kwargs):
|
||||||
def test_underscore_accepts_valid(valid_kwargs):
|
def test_underscore_accepts_valid(valid_kwargs):
|
||||||
valid_kwargs["force"] = True
|
valid_kwargs["force"] = True
|
||||||
Doc.set_extension("test", **valid_kwargs)
|
Doc.set_extension("test", **valid_kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_underscore_mutable_defaults_list(en_vocab):
|
||||||
|
"""Test that mutable default arguments are handled correctly (see #2581)."""
|
||||||
|
Doc.set_extension("mutable", default=[])
|
||||||
|
doc1 = Doc(en_vocab, words=["one"])
|
||||||
|
doc2 = Doc(en_vocab, words=["two"])
|
||||||
|
doc1._.mutable.append("foo")
|
||||||
|
assert len(doc1._.mutable) == 1
|
||||||
|
assert doc1._.mutable[0] == "foo"
|
||||||
|
assert len(doc2._.mutable) == 0
|
||||||
|
doc1._.mutable = ["bar", "baz"]
|
||||||
|
doc1._.mutable.append("foo")
|
||||||
|
assert len(doc1._.mutable) == 3
|
||||||
|
assert len(doc2._.mutable) == 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_underscore_mutable_defaults_dict(en_vocab):
|
||||||
|
"""Test that mutable default arguments are handled correctly (see #2581)."""
|
||||||
|
Token.set_extension("mutable", default={})
|
||||||
|
token1 = Doc(en_vocab, words=["one"])[0]
|
||||||
|
token2 = Doc(en_vocab, words=["two"])[0]
|
||||||
|
token1._.mutable["foo"] = "bar"
|
||||||
|
assert len(token1._.mutable) == 1
|
||||||
|
assert token1._.mutable["foo"] == "bar"
|
||||||
|
assert len(token2._.mutable) == 0
|
||||||
|
token1._.mutable["foo"] = "baz"
|
||||||
|
assert len(token1._.mutable) == 1
|
||||||
|
assert token1._.mutable["foo"] == "baz"
|
||||||
|
token1._.mutable["x"] = []
|
||||||
|
token1._.mutable["x"].append("y")
|
||||||
|
assert len(token1._.mutable) == 2
|
||||||
|
assert token1._.mutable["x"] == ["y"]
|
||||||
|
assert len(token2._.mutable) == 0
|
||||||
|
|
|
@ -2,11 +2,13 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import functools
|
import functools
|
||||||
|
import copy
|
||||||
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
|
||||||
|
|
||||||
class Underscore(object):
|
class Underscore(object):
|
||||||
|
mutable_types = (dict, list, set)
|
||||||
doc_extensions = {}
|
doc_extensions = {}
|
||||||
span_extensions = {}
|
span_extensions = {}
|
||||||
token_extensions = {}
|
token_extensions = {}
|
||||||
|
@ -32,7 +34,15 @@ class Underscore(object):
|
||||||
elif method is not None:
|
elif method is not None:
|
||||||
return functools.partial(method, self._obj)
|
return functools.partial(method, self._obj)
|
||||||
else:
|
else:
|
||||||
return self._doc.user_data.get(self._get_key(name), default)
|
key = self._get_key(name)
|
||||||
|
if key in self._doc.user_data:
|
||||||
|
return self._doc.user_data[key]
|
||||||
|
elif isinstance(default, self.mutable_types):
|
||||||
|
# Handle mutable default arguments (see #2581)
|
||||||
|
new_default = copy.copy(default)
|
||||||
|
self.__setattr__(name, new_default)
|
||||||
|
return new_default
|
||||||
|
return default
|
||||||
|
|
||||||
def __setattr__(self, name, value):
|
def __setattr__(self, name, value):
|
||||||
if name not in self._extensions:
|
if name not in self._extensions:
|
||||||
|
|
|
@ -458,9 +458,7 @@ There are three main types of extensions, which can be defined using the
|
||||||
1. **Attribute extensions.** Set a default value for an attribute, which can be
|
1. **Attribute extensions.** Set a default value for an attribute, which can be
|
||||||
overwritten manually at any time. Attribute extensions work like "normal"
|
overwritten manually at any time. Attribute extensions work like "normal"
|
||||||
variables and are the quickest way to store arbitrary information on a `Doc`,
|
variables and are the quickest way to store arbitrary information on a `Doc`,
|
||||||
`Span` or `Token`. Attribute defaults behaves just like argument defaults
|
`Span` or `Token`.
|
||||||
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments),
|
|
||||||
and should not be used for mutable values like dictionaries or lists.
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
Doc.set_extension("hello", default=True)
|
Doc.set_extension("hello", default=True)
|
||||||
|
@ -527,25 +525,6 @@ Once you've registered your custom attribute, you can also use the built-in
|
||||||
especially useful it you want to pass in a string instead of calling
|
especially useful it you want to pass in a string instead of calling
|
||||||
`doc._.my_attr`.
|
`doc._.my_attr`.
|
||||||
|
|
||||||
<Infobox title="Using mutable default values" variant="danger">
|
|
||||||
|
|
||||||
When using **mutable values** like dictionaries or lists as the `default`
|
|
||||||
argument, keep in mind that they behave just like mutable default arguments
|
|
||||||
[in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments).
|
|
||||||
This can easily cause unintended results, like the same value being set on _all_
|
|
||||||
objects instead of only one particular instance. In most cases, it's better to
|
|
||||||
use **getters and setters**, and only set the `default` for boolean or string
|
|
||||||
values.
|
|
||||||
|
|
||||||
```diff
|
|
||||||
+ Doc.set_extension('fruits', getter=get_fruits, setter=set_fruits)
|
|
||||||
|
|
||||||
- Doc.set_extension('fruits', default={})
|
|
||||||
- doc._.fruits['apple'] = u'🍎' # all docs now have {'apple': u'🍎'}
|
|
||||||
```
|
|
||||||
|
|
||||||
</Infobox>
|
|
||||||
|
|
||||||
### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3}
|
### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3}
|
||||||
|
|
||||||
This example shows the implementation of a pipeline component that fetches
|
This example shows the implementation of a pipeline component that fetches
|
||||||
|
|
Loading…
Reference in New Issue
Block a user