* Ensure morphological features and lemmas are loaded in from_array, re Issue #152

2025-11-01 00:17:44 +03:00 · 2015-11-03 17:56:50 +11:00 · 2015-11-03 17:56:50 +11:00 · 5e040855a5
commit 5e040855a5
parent 64531d5a3a
2 changed files with 15 additions and 1 deletions
--- a/spacy/tests/serialize/test_io.py
+++ b/spacy/tests/serialize/test_io.py
@ -38,3 +38,15 @@ def test_left_right(EN):
        for child in word.rights:
            assert child.head.i == word.i

+
+@pytest.mark.models
+def test_lemmas(EN):
+    orig = EN(u'The geese are flying')
+    result = Doc(orig.vocab).from_bytes(orig.to_bytes())
+    the, geese, are, flying = result
+    assert the.lemma_ == 'the'
+    assert geese.lemma_ == 'goose'
+    assert are.lemma_ == 'be'
+    assert flying.lemma_ == 'fly'
+
+ 
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -398,7 +398,7 @@ cdef class Doc:
                        self.is_parsed = True
            elif attr_id == TAG:
                for i in range(length):
-                    tokens[i].tag = values[i]
+                    self.vocab.morphology.assign_tag(&tokens[i], values[i])
                    if not self.is_tagged and tokens[i].tag != 0:
                        self.is_tagged = True
            elif attr_id == POS:
@ -413,6 +413,8 @@ cdef class Doc:
            elif attr_id == ENT_TYPE:
                for i in range(length):
                    tokens[i].ent_type = values[i]
+            else:
+                raise ValueError("Unknown attribute ID: %d" % attr_id)
        set_children_from_heads(self.data, self.length)
        return self