Merge branch 'master' of https://github.com/honnibal/spaCy

2025-12-03 00:06:02 +03:00 · 2015-11-03 07:58:17 +01:00 · 2015-11-03 07:58:17 +01:00 · 25ed7be8f8
commit 25ed7be8f8
parent 2714fb4733 604ceac4c6
2 changed files with 16 additions and 3 deletions
--- a/spacy/tests/serialize/test_io.py
+++ b/spacy/tests/serialize/test_io.py
@ -38,3 +38,15 @@ def test_left_right(EN):
        for child in word.rights:
            assert child.head.i == word.i

+
+@pytest.mark.models
+def test_lemmas(EN):
+    orig = EN(u'The geese are flying')
+    result = Doc(orig.vocab).from_bytes(orig.to_bytes())
+    the, geese, are, flying = result
+    assert the.lemma_ == 'the'
+    assert geese.lemma_ == 'goose'
+    assert are.lemma_ == 'be'
+    assert flying.lemma_ == 'fly'
+
+ 
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -398,7 +398,7 @@ cdef class Doc:
                        self.is_parsed = True
            elif attr_id == TAG:
                for i in range(length):
-                    tokens[i].tag = values[i]
+                    self.vocab.morphology.assign_tag(&tokens[i], values[i])
                    if not self.is_tagged and tokens[i].tag != 0:
                        self.is_tagged = True
            elif attr_id == POS:
@ -413,6 +413,8 @@ cdef class Doc:
            elif attr_id == ENT_TYPE:
                for i in range(length):
                    tokens[i].ent_type = values[i]
+            else:
+                raise ValueError("Unknown attribute ID: %d" % attr_id)
        set_children_from_heads(self.data, self.length)
        return self

@ -469,8 +471,7 @@ cdef class Doc:
        # Update fields
        token.lex = lex
        token.spacy = self.data[end-1].spacy
-        # What to do about morphology??
-        # TODO: token.morph = ???
+        self.vocab.morphology.assign_tag(token, self.vocab.strings[tag])
        token.tag = self.vocab.strings[tag]
        token.lemma = self.vocab.strings[lemma]
        if ent_type == 'O':