From 5e040855a5c5e7725fd875e4b85e38d53e113796 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Tue, 3 Nov 2015 17:56:50 +1100
Subject: [PATCH] * Ensure morphological features and lemmas are loaded in
 from_array, re Issue #152

---
 spacy/tests/serialize/test_io.py | 12 ++++++++++++
 spacy/tokens/doc.pyx             |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/spacy/tests/serialize/test_io.py b/spacy/tests/serialize/test_io.py
index a64d0cabc..4157ee309 100644
--- a/spacy/tests/serialize/test_io.py
+++ b/spacy/tests/serialize/test_io.py
@@ -38,3 +38,15 @@ def test_left_right(EN):
         for child in word.rights:
             assert child.head.i == word.i
 
+
+@pytest.mark.models
+def test_lemmas(EN):
+    orig = EN(u'The geese are flying')
+    result = Doc(orig.vocab).from_bytes(orig.to_bytes())
+    the, geese, are, flying = result
+    assert the.lemma_ == 'the'
+    assert geese.lemma_ == 'goose'
+    assert are.lemma_ == 'be'
+    assert flying.lemma_ == 'fly'
+
+ 
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 01ccb4fd9..2ad1a1d4a 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -398,7 +398,7 @@ cdef class Doc:
                         self.is_parsed = True
             elif attr_id == TAG:
                 for i in range(length):
-                    tokens[i].tag = values[i]
+                    self.vocab.morphology.assign_tag(&tokens[i], values[i])
                     if not self.is_tagged and tokens[i].tag != 0:
                         self.is_tagged = True
             elif attr_id == POS:
@@ -413,6 +413,8 @@ cdef class Doc:
             elif attr_id == ENT_TYPE:
                 for i in range(length):
                     tokens[i].ent_type = values[i]
+            else:
+                raise ValueError("Unknown attribute ID: %d" % attr_id)
         set_children_from_heads(self.data, self.length)
         return self