Fix serialization of extension attr values in DocBin (#4540)

This commit is contained in:
Ines Montani 2019-10-28 16:02:13 +01:00 committed by GitHub
parent df293f3894
commit a90025b277
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 3 deletions

View File

@ -1,11 +1,9 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from spacy.tokens import Doc, DocBin
@pytest.mark.xfail
def test_issue4528(en_vocab):
"""Test that user_data is correctly serialized in DocBin."""
doc = Doc(en_vocab, words=["hello", "world"])

View File

@ -103,7 +103,8 @@ class DocBin(object):
doc = Doc(vocab, words=words, spaces=spaces)
doc = doc.from_array(self.attrs, tokens)
if self.store_user_data:
doc.user_data.update(srsly.msgpack_loads(self.user_data[i]))
user_data = srsly.msgpack_loads(self.user_data[i], use_list=False)
doc.user_data.update(user_data)
yield doc
def merge(self, other):