From f5780cb160d1787d900bc1ca5f8795958a0474fb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 20 Jun 2020 15:59:39 +0200 Subject: [PATCH] Serialize all attrs by default --- spacy/tokens/_serialize.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 8f3e942e3..3072787ae 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -9,6 +9,19 @@ from ..attrs import SPACY, ORTH, intify_attr from ..errors import Errors +ALL_ATTRS = ( + "ORTH", + "TAG", + "HEAD", + "DEP", + "SENT_START", + "ENT_IOB", + "ENT_TYPE", + "LEMMA", + "MORPH" +) + + class DocBin(object): """Pack Doc objects for binary serialization. @@ -39,7 +52,7 @@ class DocBin(object): document from the DocBin. """ - def __init__(self, attrs=None, store_user_data=False, docs=[]): + def __init__(self, attrs=ALL_ATTRS, store_user_data=False, docs=[]): """Create a DocBin object to hold serialized annotations. attrs (list): List of attributes to serialize. 'orth' and 'spacy' are @@ -49,7 +62,6 @@ class DocBin(object): DOCS: https://spacy.io/api/docbin#init """ - attrs = attrs or [] attrs = sorted([intify_attr(attr) for attr in attrs]) self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY] self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]