From 6eee024ff6fe12d3cac82e7daf3e582368bc670f Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 3 Nov 2021 09:14:29 +0100 Subject: [PATCH] Pickle Doc._context (#9603) --- spacy/tests/doc/test_pickle_doc.py | 2 ++ spacy/tokens/doc.pyx | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/spacy/tests/doc/test_pickle_doc.py b/spacy/tests/doc/test_pickle_doc.py index 28cb66714..738a751a0 100644 --- a/spacy/tests/doc/test_pickle_doc.py +++ b/spacy/tests/doc/test_pickle_doc.py @@ -5,9 +5,11 @@ from spacy.compat import pickle def test_pickle_single_doc(): nlp = Language() doc = nlp("pickle roundtrip") + doc._context = 3 data = pickle.dumps(doc, 1) doc2 = pickle.loads(data) assert doc2.text == "pickle roundtrip" + assert doc2._context == 3 def test_list_of_docs_pickles_efficiently(): diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 3709cece0..362a17784 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1710,17 +1710,18 @@ cdef int [:,:] _get_lca_matrix(Doc doc, int start, int end): def pickle_doc(doc): bytes_data = doc.to_bytes(exclude=["vocab", "user_data", "user_hooks"]) hooks_and_data = (doc.user_data, doc.user_hooks, doc.user_span_hooks, - doc.user_token_hooks) + doc.user_token_hooks, doc._context) return (unpickle_doc, (doc.vocab, srsly.pickle_dumps(hooks_and_data), bytes_data)) def unpickle_doc(vocab, hooks_and_data, bytes_data): - user_data, doc_hooks, span_hooks, token_hooks = srsly.pickle_loads(hooks_and_data) + user_data, doc_hooks, span_hooks, token_hooks, _context = srsly.pickle_loads(hooks_and_data) doc = Doc(vocab, user_data=user_data).from_bytes(bytes_data, exclude=["user_data"]) doc.user_hooks.update(doc_hooks) doc.user_span_hooks.update(span_hooks) doc.user_token_hooks.update(token_hooks) + doc._context = _context return doc