From 4de30a8e385d0795aa787466f429a15991f5ccdd Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 23 Oct 2016 16:40:27 +0200 Subject: [PATCH] Test Issue #514: Serialization fails after adding a new entity label. --- spacy/tests/serialize/test_serialization.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/spacy/tests/serialize/test_serialization.py b/spacy/tests/serialize/test_serialization.py index a150b8cf3..c02fc4de7 100644 --- a/spacy/tests/serialize/test_serialization.py +++ b/spacy/tests/serialize/test_serialization.py @@ -97,3 +97,21 @@ def test_serialize_empty_doc(): assert b == b'' loaded = Doc(vocab).from_bytes(b) assert len(loaded) == 0 + + +def test_serialize_after_adding_entity(): + # Re issue #514 + vocab = spacy.en.English.Defaults.create_vocab() + entity_recognizer = spacy.en.English.Defaults.create_entity() + + doc = Doc(vocab, words=u'This is a sentence about pasta .'.split()) + entity_recognizer.add_label('Food') + entity_recognizer(doc) + + + label_id = vocab.strings[u'Food'] + doc.ents = [(label_id, 5,6)] + + assert [(ent.label_, ent.text) for ent in doc.ents] == [(u'Food', u'pasta')] + + byte_string = doc.to_bytes()