From fa961ea69458ac3e4dffe8f91c83ae297490a539 Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Mon, 2 May 2016 11:01:56 +0200
Subject: [PATCH] add tests for serialization bug

---
 spacy/tests/serialize/test_serialization.py | 88 +++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 spacy/tests/serialize/test_serialization.py

diff --git a/spacy/tests/serialize/test_serialization.py b/spacy/tests/serialize/test_serialization.py
new file mode 100644
index 000000000..bb6f604c3
--- /dev/null
+++ b/spacy/tests/serialize/test_serialization.py
@@ -0,0 +1,88 @@
+from __future__ import unicode_literals
+import pytest
+
+from spacy.tokens import Doc
+
+def equal(doc1, doc2):
+	# tokens
+	assert [ t.orth for t in doc1 ] == [ t.orth for t in doc2 ]
+
+	# tags
+	assert [ t.pos for t in doc1 ] == [ t.pos for t in doc2 ]
+	assert [ t.tag for t in doc1 ] == [ t.tag for t in doc2 ]
+
+	# parse
+	assert [ t.head.i for t in doc1 ] == [ t.head.i for t in doc2 ]
+	assert [ t.dep for t in doc1 ] == [ t.dep for t in doc2 ]
+	
+	if doc1.is_parsed and doc2.is_parsed:
+		assert [ s for s in doc1.sents ] == [ s for s in doc2.sents ]
+
+	# entities
+	assert [ t.ent_type for t in doc1 ] == [ t.ent_type for t in doc2 ]
+	assert [ t.ent_iob for t in doc1 ] == [ t.ent_iob for t in doc2 ]
+	assert [ ent for ent in doc1.ents ] == [ ent for ent in doc2.ents ]
+
+
+@pytest.mark.models
+def test_serialize_tokens(EN):
+	doc1 = EN(u'This is a test sentence.',tag=False,parse=False,entity=False)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_tags(EN):
+	doc1 = EN(u'This is a test sentence.',tag=True,parse=False,entity=False)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_parse(EN):
+	doc1 = EN(u'This is a test sentence.',tag=False,parse=True,entity=False)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_ner(EN):
+	doc1 = EN(u'This is a test sentence.',tag=False,parse=False,entity=True)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_tags_parse(EN):
+	doc1 = EN(u'This is a test sentence.',tag=True,parse=True,entity=True)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_tags_ner(EN):
+	doc1 = EN(u'This is a test sentence.',tag=True,parse=False,entity=True)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_ner_parse(EN):
+	doc1 = EN(u'This is a test sentence.',tag=False,parse=True,entity=True)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)
+
+
+@pytest.mark.models
+def test_serialize_tokens_tags_parse_ner(EN):
+	doc1 = EN(u'This is a test sentence.',tag=True,parse=True,entity=True)
+
+	doc2 = Doc(EN.vocab).from_bytes(doc1.to_bytes())
+	equal(doc1,doc2)