mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	💫 Support mutable default values for extension attributes (#3389)
* Support mutable default values in extensions * Update documentation
This commit is contained in:
		
							parent
							
								
									e77220e3ae
								
							
						
					
					
						commit
						7c05ca01e8
					
				|  | @ -106,3 +106,37 @@ def test_underscore_raises_for_invalid(invalid_kwargs): | ||||||
| def test_underscore_accepts_valid(valid_kwargs): | def test_underscore_accepts_valid(valid_kwargs): | ||||||
|     valid_kwargs["force"] = True |     valid_kwargs["force"] = True | ||||||
|     Doc.set_extension("test", **valid_kwargs) |     Doc.set_extension("test", **valid_kwargs) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_underscore_mutable_defaults_list(en_vocab): | ||||||
|  |     """Test that mutable default arguments are handled correctly (see #2581).""" | ||||||
|  |     Doc.set_extension("mutable", default=[]) | ||||||
|  |     doc1 = Doc(en_vocab, words=["one"]) | ||||||
|  |     doc2 = Doc(en_vocab, words=["two"]) | ||||||
|  |     doc1._.mutable.append("foo") | ||||||
|  |     assert len(doc1._.mutable) == 1 | ||||||
|  |     assert doc1._.mutable[0] == "foo" | ||||||
|  |     assert len(doc2._.mutable) == 0 | ||||||
|  |     doc1._.mutable = ["bar", "baz"] | ||||||
|  |     doc1._.mutable.append("foo") | ||||||
|  |     assert len(doc1._.mutable) == 3 | ||||||
|  |     assert len(doc2._.mutable) == 0 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_underscore_mutable_defaults_dict(en_vocab): | ||||||
|  |     """Test that mutable default arguments are handled correctly (see #2581).""" | ||||||
|  |     Token.set_extension("mutable", default={}) | ||||||
|  |     token1 = Doc(en_vocab, words=["one"])[0] | ||||||
|  |     token2 = Doc(en_vocab, words=["two"])[0] | ||||||
|  |     token1._.mutable["foo"] = "bar" | ||||||
|  |     assert len(token1._.mutable) == 1 | ||||||
|  |     assert token1._.mutable["foo"] == "bar" | ||||||
|  |     assert len(token2._.mutable) == 0 | ||||||
|  |     token1._.mutable["foo"] = "baz" | ||||||
|  |     assert len(token1._.mutable) == 1 | ||||||
|  |     assert token1._.mutable["foo"] == "baz" | ||||||
|  |     token1._.mutable["x"] = [] | ||||||
|  |     token1._.mutable["x"].append("y") | ||||||
|  |     assert len(token1._.mutable) == 2 | ||||||
|  |     assert token1._.mutable["x"] == ["y"] | ||||||
|  |     assert len(token2._.mutable) == 0 | ||||||
|  |  | ||||||
|  | @ -2,11 +2,13 @@ | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| import functools | import functools | ||||||
|  | import copy | ||||||
| 
 | 
 | ||||||
| from ..errors import Errors | from ..errors import Errors | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Underscore(object): | class Underscore(object): | ||||||
|  |     mutable_types = (dict, list, set) | ||||||
|     doc_extensions = {} |     doc_extensions = {} | ||||||
|     span_extensions = {} |     span_extensions = {} | ||||||
|     token_extensions = {} |     token_extensions = {} | ||||||
|  | @ -32,7 +34,15 @@ class Underscore(object): | ||||||
|         elif method is not None: |         elif method is not None: | ||||||
|             return functools.partial(method, self._obj) |             return functools.partial(method, self._obj) | ||||||
|         else: |         else: | ||||||
|             return self._doc.user_data.get(self._get_key(name), default) |             key = self._get_key(name) | ||||||
|  |             if key in self._doc.user_data: | ||||||
|  |                 return self._doc.user_data[key] | ||||||
|  |             elif isinstance(default, self.mutable_types): | ||||||
|  |                 # Handle mutable default arguments (see #2581) | ||||||
|  |                 new_default = copy.copy(default) | ||||||
|  |                 self.__setattr__(name, new_default) | ||||||
|  |                 return new_default | ||||||
|  |             return default | ||||||
| 
 | 
 | ||||||
|     def __setattr__(self, name, value): |     def __setattr__(self, name, value): | ||||||
|         if name not in self._extensions: |         if name not in self._extensions: | ||||||
|  |  | ||||||
|  | @ -458,9 +458,7 @@ There are three main types of extensions, which can be defined using the | ||||||
| 1. **Attribute extensions.** Set a default value for an attribute, which can be | 1. **Attribute extensions.** Set a default value for an attribute, which can be | ||||||
|    overwritten manually at any time. Attribute extensions work like "normal" |    overwritten manually at any time. Attribute extensions work like "normal" | ||||||
|    variables and are the quickest way to store arbitrary information on a `Doc`, |    variables and are the quickest way to store arbitrary information on a `Doc`, | ||||||
|    `Span` or `Token`. Attribute defaults behaves just like argument defaults |    `Span` or `Token`. | ||||||
|    [in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments), |  | ||||||
|    and should not be used for mutable values like dictionaries or lists. |  | ||||||
| 
 | 
 | ||||||
|    ```python |    ```python | ||||||
|     Doc.set_extension("hello", default=True) |     Doc.set_extension("hello", default=True) | ||||||
|  | @ -527,25 +525,6 @@ Once you've registered your custom attribute, you can also use the built-in | ||||||
| especially useful it you want to pass in a string instead of calling | especially useful it you want to pass in a string instead of calling | ||||||
| `doc._.my_attr`. | `doc._.my_attr`. | ||||||
| 
 | 
 | ||||||
| <Infobox title="Using mutable default values" variant="danger"> |  | ||||||
| 
 |  | ||||||
| When using **mutable values** like dictionaries or lists as the `default` |  | ||||||
| argument, keep in mind that they behave just like mutable default arguments |  | ||||||
| [in Python functions](http://docs.python-guide.org/en/latest/writing/gotchas/#mutable-default-arguments). |  | ||||||
| This can easily cause unintended results, like the same value being set on _all_ |  | ||||||
| objects instead of only one particular instance. In most cases, it's better to |  | ||||||
| use **getters and setters**, and only set the `default` for boolean or string |  | ||||||
| values. |  | ||||||
| 
 |  | ||||||
| ```diff |  | ||||||
| + Doc.set_extension('fruits', getter=get_fruits, setter=set_fruits) |  | ||||||
| 
 |  | ||||||
| - Doc.set_extension('fruits', default={}) |  | ||||||
| - doc._.fruits['apple'] = u'🍎'  # all docs now have {'apple': u'🍎'} |  | ||||||
| ``` |  | ||||||
| 
 |  | ||||||
| </Infobox> |  | ||||||
| 
 |  | ||||||
| ### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3} | ### Example: Pipeline component for GPE entities and country meta data via a REST API {#component-example3} | ||||||
| 
 | 
 | ||||||
| This example shows the implementation of a pipeline component that fetches | This example shows the implementation of a pipeline component that fetches | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user