Exclude user_data when copying doc in displaCy (closes #3882)

This commit is contained in:
Ines Montani 2019-06-26 14:37:05 +02:00
parent f22704621e
commit 6ccdf37574
2 changed files with 16 additions and 1 deletions

View File

@ -116,7 +116,7 @@ def parse_deps(orig_doc, options={}):
doc (Doc): Document do parse.
RETURNS (dict): Generated dependency parse keyed by words and arcs.
"""
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes())
doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"]))
if not doc.is_parsed:
user_warning(Warnings.W005)
if options.get("collapse_phrases", False):

View File

@ -0,0 +1,15 @@
# coding: utf8
from __future__ import unicode_literals
from spacy.displacy import parse_deps
from spacy.tokens import Doc
def test_issue3882(en_vocab):
"""Test that displaCy doesn't serialize the doc.user_data when making a
copy of the Doc.
"""
doc = Doc(en_vocab, words=["Hello", "world"])
doc.is_parsed = True
doc.user_data["test"] = set()
parse_deps(doc)