mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Modify Token.morph to enable unsetting (#6043)
Modify `Token.morph` property so that `Token.c.morph` can be reset back to an internal value of `0`. Allow setting `Token.morph` from a hash as long as the morph string is already in the `StringStore`, setting it indirectly through `Token.morph_` so that the value is added to the morphology. If the hash is not in the `StringStore`, raise an error.
This commit is contained in:
parent
c7bd631b5f
commit
ab270364f1
|
@ -671,6 +671,9 @@ class Errors:
|
|||
E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
|
||||
E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
|
||||
"that you are providing a list of patterns as `List[List[dict]]`.")
|
||||
E1009 = ("String for hash '{val}' not found in StringStore. Set the value "
|
||||
"through token.morph_ instead or add the string to the "
|
||||
"StringStore with `nlp.vocab.strings.add(string)`.")
|
||||
|
||||
|
||||
@add_codes
|
||||
|
|
|
@ -66,3 +66,31 @@ def test_morph_set(i_has):
|
|||
def test_morph_str(i_has):
|
||||
assert str(i_has[0].morph) == "PronType=prs"
|
||||
assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"
|
||||
|
||||
|
||||
def test_morph_property(tokenizer):
|
||||
doc = tokenizer("a dog")
|
||||
|
||||
# set through token.morph_
|
||||
doc[0].morph_ = "PronType=prs"
|
||||
assert doc[0].morph_ == "PronType=prs"
|
||||
assert doc.to_array(["MORPH"])[0] != 0
|
||||
|
||||
# unset with token.morph
|
||||
doc[0].morph = 0
|
||||
assert doc.to_array(["MORPH"])[0] == 0
|
||||
|
||||
# empty morph is equivalent to "_"
|
||||
doc[0].morph_ = ""
|
||||
assert doc[0].morph_ == ""
|
||||
assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
|
||||
|
||||
# "_" morph is also equivalent to empty morph
|
||||
doc[0].morph_ = "_"
|
||||
assert doc[0].morph_ == ""
|
||||
assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
|
||||
|
||||
# set through existing hash with token.morph
|
||||
tokenizer.vocab.strings.add("Feat=Val")
|
||||
doc[0].morph = tokenizer.vocab.strings.add("Feat=Val")
|
||||
assert doc[0].morph_ == "Feat=Val"
|
||||
|
|
|
@ -214,9 +214,17 @@ cdef class Token:
|
|||
xp = get_array_module(vector)
|
||||
return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
|
||||
|
||||
@property
|
||||
def morph(self):
|
||||
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
||||
property morph:
|
||||
def __get__(self):
|
||||
return MorphAnalysis.from_id(self.vocab, self.c.morph)
|
||||
|
||||
def __set__(self, attr_t morph):
|
||||
if morph == 0:
|
||||
self.c.morph = morph
|
||||
elif morph in self.vocab.strings:
|
||||
self.morph_ = self.vocab.strings[morph]
|
||||
else:
|
||||
raise ValueError(Errors.E1009.format(val=morph))
|
||||
|
||||
property morph_:
|
||||
def __get__(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user