* Ensure morphological features and lemmas are loaded in from_array, re Issue #152

This commit is contained in:
Matthew Honnibal 2015-11-03 17:56:50 +11:00
parent 64531d5a3a
commit 5e040855a5
2 changed files with 15 additions and 1 deletions

View File

@ -38,3 +38,15 @@ def test_left_right(EN):
for child in word.rights: for child in word.rights:
assert child.head.i == word.i assert child.head.i == word.i
@pytest.mark.models
def test_lemmas(EN):
orig = EN(u'The geese are flying')
result = Doc(orig.vocab).from_bytes(orig.to_bytes())
the, geese, are, flying = result
assert the.lemma_ == 'the'
assert geese.lemma_ == 'goose'
assert are.lemma_ == 'be'
assert flying.lemma_ == 'fly'

View File

@ -398,7 +398,7 @@ cdef class Doc:
self.is_parsed = True self.is_parsed = True
elif attr_id == TAG: elif attr_id == TAG:
for i in range(length): for i in range(length):
tokens[i].tag = values[i] self.vocab.morphology.assign_tag(&tokens[i], values[i])
if not self.is_tagged and tokens[i].tag != 0: if not self.is_tagged and tokens[i].tag != 0:
self.is_tagged = True self.is_tagged = True
elif attr_id == POS: elif attr_id == POS:
@ -413,6 +413,8 @@ cdef class Doc:
elif attr_id == ENT_TYPE: elif attr_id == ENT_TYPE:
for i in range(length): for i in range(length):
tokens[i].ent_type = values[i] tokens[i].ent_type = values[i]
else:
raise ValueError("Unknown attribute ID: %d" % attr_id)
set_children_from_heads(self.data, self.length) set_children_from_heads(self.data, self.length)
return self return self