From 96bb8f2187569b4f4f6a01bc34b17cabfa8a9f6e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 28 Oct 2019 14:36:03 +0100 Subject: [PATCH] Add regression test for #4528 [ci skip] --- spacy/tests/regression/test_issue4528.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 spacy/tests/regression/test_issue4528.py diff --git a/spacy/tests/regression/test_issue4528.py b/spacy/tests/regression/test_issue4528.py new file mode 100644 index 000000000..fa678495e --- /dev/null +++ b/spacy/tests/regression/test_issue4528.py @@ -0,0 +1,21 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest +from spacy.tokens import Doc, DocBin + + +@pytest.mark.xfail +def test_issue4528(en_vocab): + """Test that user_data is correctly serialized in DocBin.""" + doc = Doc(en_vocab, words=["hello", "world"]) + doc.user_data["foo"] = "bar" + # This is how extension attribute values are stored in the user data + doc.user_data[("._.", "foo", None, None)] = "bar" + doc_bin = DocBin(store_user_data=True) + doc_bin.add(doc) + doc_bin_bytes = doc_bin.to_bytes() + new_doc_bin = DocBin(store_user_data=True).from_bytes(doc_bin_bytes) + new_doc = list(new_doc_bin.get_docs(en_vocab))[0] + assert new_doc.user_data["foo"] == "bar" + assert new_doc.user_data[("._.", "foo", None, None)] == "bar"