mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	💫 Support mutable default values for extension attributes (#3389)
* Support mutable default values in extensions * Update documentation
This commit is contained in:
		
							parent
							
								
									e77220e3ae
								
							
						
					
					
						commit
						7c05ca01e8
					
				|  | @ -106,3 +106,37 @@ def test_underscore_raises_for_invalid(invalid_kwargs): | |||
| def test_underscore_accepts_valid(valid_kwargs): | ||||
|     valid_kwargs["force"] = True | ||||
|     Doc.set_extension("test", **valid_kwargs) | ||||
| 
 | ||||
| 
 | ||||
| def test_underscore_mutable_defaults_list(en_vocab): | ||||
|     """Test that mutable default arguments are handled correctly (see #2581).""" | ||||
|     Doc.set_extension("mutable", default=[]) | ||||
|     doc1 = Doc(en_vocab, words=["one"]) | ||||
|     doc2 = Doc(en_vocab, words=["two"]) | ||||
|     doc1._.mutable.append("foo") | ||||
|     assert len(doc1._.mutable) == 1 | ||||
|     assert doc1._.mutable[0] == "foo" | ||||
|     assert len(doc2._.mutable) == 0 | ||||
|     doc1._.mutable = ["bar", "baz"] | ||||
|     doc1._.mutable.append("foo") | ||||
|     assert len(doc1._.mutable) == 3 | ||||
|     assert len(doc2._.mutable) == 0 | ||||
| 
 | ||||
| 
 | ||||
| def test_underscore_mutable_defaults_dict(en_vocab): | ||||
|     """Test that mutable default arguments are handled correctly (see #2581).""" | ||||
|     Token.set_extension("mutable", default={}) | ||||
|     token1 = Doc(en_vocab, words=["one"])[0] | ||||
|     token2 = Doc(en_vocab, words=["two"])[0] | ||||
|     token1._.mutable["foo"] = "bar" | ||||
|     assert len(token1._.mutable) == 1 | ||||
|     assert token1._.mutable["foo"] == "bar" | ||||
|     assert len(token2._.mutable) == 0 | ||||
|     token1._.mutable["foo"] = "baz" | ||||
|     assert len(token1._.mutable) == 1 | ||||
|     assert token1._.mutable["foo"] == "baz" | ||||
|     token1._.mutable["x"] = [] | ||||
|     token1._.mutable["x"].append("y") | ||||
|     assert len(token1._.mutable) == 2 | ||||
|     assert token1._.mutable["x"] == ["y"] | ||||
|     assert len(token2._.mutable) == 0 | ||||
|  |  | |||
|  | @ -2,11 +2,13 @@ | |||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import functools | ||||
| import copy | ||||
| 
 | ||||
| from ..errors import Errors | ||||
| 
 | ||||
| 
 | ||||
| class Underscore(object): | ||||
|     mutable_types = (dict, list, set) | ||||
|     doc_extensions = {} | ||||
|     span_extensions = {} | ||||
|     token_extensions = {} | ||||
|  | @ -32,7 +34,15 @@ class Underscore(object): | |||
|         elif method is not None: | ||||
|             return functools.partial(method, self._obj) | ||||
|         else: | ||||
|             return self._doc.user_data.get(self._get_key(name), default) | ||||
|             key = self._get_key(name) | ||||
|             if key in self._doc.user_data: | ||||
|                 return self._doc.user_data[key] | ||||
|             elif isinstance(default, self.mutable_types): | ||||
|                 # Handle mutable default arguments (see #2581) | ||||
|                 new_default = copy.copy(default) | ||||
|                 self.__setattr__(name, new_default) | ||||
|                 return new_default | ||||
|             return default | ||||
| 
 | ||||
|     def __setattr__(self, name, value): | ||||
|         if name not in self._extensions: | ||||
|  |  | |||
|  | @ -458,9 +458,7 @@ There are three main types of extensions, which can be defined using the | |||
| 1. **Attribute extensions.** Set a default value for an attribute, which can be | ||||
|    overwritten manually at any time. Attribute extensions work like "normal" | ||||
|    variables and are the quickest way to store arbitrary information on a `Doc`, | ||||
|    `Span` or `Token`. Attribute defaults behaves just like argument defaults | ||||
|    [in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments), | ||||
|    and should not be used for mutable values like dictionaries or lists. | ||||
|    `Span` or `Token`. | ||||
| 
 | ||||
|    ```python | ||||
|     Doc.set_extension("hello", default=True) | ||||
|  | @ -527,25 +525,6 @@ Once you've registered your custom attribute, you can also use the built-in | |||
| especially useful it you want to pass in a string instead of calling | ||||
| `doc._.my_attr`. | ||||
| 
 | ||||
| <Infobox title="Using mutable default values" variant="danger"> | ||||
| 
 | ||||
| When using **mutable values** like dictionaries or lists as the `default` | ||||
| argument, keep in mind that they behave just like mutable default arguments | ||||
| [in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments). | ||||
| This can easily cause unintended results, like the same value being set on _all_ | ||||
| objects instead of only one particular instance. In most cases, it's better to | ||||
| use **getters and setters**, and only set the `default` for boolean or string | ||||
| values. | ||||
| 
 | ||||
| ```diff | ||||
| + Doc.set_extension('fruits', getter=get_fruits, setter=set_fruits) | ||||
| 
 | ||||
| - Doc.set_extension('fruits', default={}) | ||||
| - doc._.fruits['apple'] = u'🍎'  # all docs now have {'apple': u'🍎'} | ||||
| ``` | ||||
| 
 | ||||
| </Infobox> | ||||
| 
 | ||||
| ### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3} | ||||
| 
 | ||||
| This example shows the implementation of a pipeline component that fetches | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user