mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-05 23:06:28 +03:00
Merge branch 'master' into spacy.io
This commit is contained in:
commit
f0aea5b198
|
@ -9,7 +9,7 @@ from ..vocab cimport Vocab
|
|||
|
||||
|
||||
cdef class PhraseMatcher:
|
||||
cdef Vocab vocab
|
||||
cdef readonly Vocab vocab
|
||||
cdef attr_id_t attr
|
||||
cdef object _callbacks
|
||||
cdef object _docs
|
||||
|
|
13
spacy/tests/regression/test_issue4373.py
Normal file
13
spacy/tests/regression/test_issue4373.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from spacy.matcher import Matcher, PhraseMatcher
|
||||
from spacy.vocab import Vocab
|
||||
|
||||
|
||||
def test_issue4373():
|
||||
"""Test that PhraseMatcher.vocab can be accessed (like Matcher.vocab)."""
|
||||
matcher = Matcher(Vocab())
|
||||
assert isinstance(matcher.vocab, Vocab)
|
||||
matcher = PhraseMatcher(Vocab())
|
||||
assert isinstance(matcher.vocab, Vocab)
|
|
@ -46,7 +46,7 @@ Create a `DocBin` object to hold serialized annotations.
|
|||
| Argument | Type | Description |
|
||||
| ----------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `attrs` | list | List of attributes to serialize. `orth` (hash of token text) and `spacy` (whether the token is followed by whitespace) are always serialized, so they're not required. Defaults to `None`. |
|
||||
| `store_user_data` | bool | Whether to include the `Doc.user_data`. Defaults to `False`. |
|
||||
| `store_user_data` | bool | Whether to include the `Doc.user_data` and the values of custom extension attributes. Defaults to `False`. |
|
||||
| **RETURNS** | `DocBin` | The newly constructed object. |
|
||||
|
||||
## DocBin.\_\len\_\_ {#len tag="method"}
|
||||
|
|
|
@ -92,6 +92,25 @@ doc_bin = DocBin().from_bytes(bytes_data)
|
|||
docs = list(doc_bin.get_docs(nlp.vocab))
|
||||
```
|
||||
|
||||
If `store_user_data` is set to `True`, the `Doc.user_data` will be serialized as
|
||||
well, which includes the values of
|
||||
[extension attributes](/processing-pipelines#custom-components-attributes) (if
|
||||
they're serializable with msgpack).
|
||||
|
||||
<Infobox title="Important note on serializing extension attributes" variant="warning">
|
||||
|
||||
Including the `Doc.user_data` and extension attributes will only serialize the
|
||||
**values** of the attributes. To restore the values and access them via the
|
||||
`doc._.` property, you need to register the global attribute on the `Doc` again.
|
||||
|
||||
```python
|
||||
docs = list(doc_bin.get_docs(nlp.vocab))
|
||||
Doc.set_extension("my_custom_attr", default=None)
|
||||
print([doc._.my_custom_attr for doc in docs])
|
||||
```
|
||||
|
||||
</Infobox>
|
||||
|
||||
### Using Pickle {#pickle}
|
||||
|
||||
> #### Example
|
||||
|
|
Loading…
Reference in New Issue
Block a user