Modify Token.morph to enable unsetting (#6043)

Modify `Token.morph` property so that `Token.c.morph` can be reset back
to an internal value of `0`. Allow setting `Token.morph` from a hash as
long as the morph string is already in the `StringStore`, setting it
indirectly through `Token.morph_` so that the value is added to the
morphology. If the hash is not in the `StringStore`, raise an error.
This commit is contained in:
Adriane Boyd 2020-09-13 14:06:07 +02:00 committed by GitHub
parent c7bd631b5f
commit ab270364f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 42 additions and 3 deletions

View File

@ -671,6 +671,9 @@ class Errors:
E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
"that you are providing a list of patterns as `List[List[dict]]`.")
E1009 = ("String for hash '{val}' not found in StringStore. Set the value "
"through token.morph_ instead or add the string to the "
"StringStore with `nlp.vocab.strings.add(string)`.")
@add_codes

View File

@ -66,3 +66,31 @@ def test_morph_set(i_has):
def test_morph_str(i_has):
assert str(i_has[0].morph) == "PronType=prs"
assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"
def test_morph_property(tokenizer):
doc = tokenizer("a dog")
# set through token.morph_
doc[0].morph_ = "PronType=prs"
assert doc[0].morph_ == "PronType=prs"
assert doc.to_array(["MORPH"])[0] != 0
# unset with token.morph
doc[0].morph = 0
assert doc.to_array(["MORPH"])[0] == 0
# empty morph is equivalent to "_"
doc[0].morph_ = ""
assert doc[0].morph_ == ""
assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
# "_" morph is also equivalent to empty morph
doc[0].morph_ = "_"
assert doc[0].morph_ == ""
assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
# set through existing hash with token.morph
tokenizer.vocab.strings.add("Feat=Val")
doc[0].morph = tokenizer.vocab.strings.add("Feat=Val")
assert doc[0].morph_ == "Feat=Val"

View File

@ -214,9 +214,17 @@ cdef class Token:
xp = get_array_module(vector)
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
@property
def morph(self):
return MorphAnalysis.from_id(self.vocab, self.c.morph)
property morph:
def __get__(self):
return MorphAnalysis.from_id(self.vocab, self.c.morph)
def __set__(self, attr_t morph):
if morph == 0:
self.c.morph = morph
elif morph in self.vocab.strings:
self.morph_ = self.vocab.strings[morph]
else:
raise ValueError(Errors.E1009.format(val=morph))
property morph_:
def __get__(self):