From ab270364f17c45c2759c4bd057db8a08fdf19a62 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Sun, 13 Sep 2020 14:06:07 +0200
Subject: [PATCH] Modify Token.morph to enable unsetting (#6043)

Modify `Token.morph` property so that `Token.c.morph` can be reset back
to an internal value of `0`. Allow setting `Token.morph` from a hash as
long as the morph string is already in the `StringStore`, setting it
indirectly through `Token.morph_` so that the value is added to the
morphology. If the hash is not in the `StringStore`, raise an error.
---
 spacy/errors.py                       |  3 +++
 spacy/tests/doc/test_morphanalysis.py | 28 +++++++++++++++++++++++++++
 spacy/tokens/token.pyx                | 14 +++++++++++---
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 7164598b6..8f95609a6 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -671,6 +671,9 @@ class Errors:
     E1007 = ("Unsupported DependencyMatcher operator '{op}'.")
     E1008 = ("Invalid pattern: each pattern should be a list of dicts. Check "
              "that you are providing a list of patterns as `List[List[dict]]`.")
+    E1009 = ("String for hash '{val}' not found in StringStore. Set the value "
+             "through token.morph_ instead or add the string to the "
+             "StringStore with `nlp.vocab.strings.add(string)`.")
 
 
 @add_codes
diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py
index 6bfc198fd..f378ce042 100644
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@@ -66,3 +66,31 @@ def test_morph_set(i_has):
 def test_morph_str(i_has):
     assert str(i_has[0].morph) == "PronType=prs"
     assert str(i_has[1].morph) == "Number=sing|Person=three|Tense=pres|VerbForm=fin"
+
+
+def test_morph_property(tokenizer):
+    doc = tokenizer("a dog")
+
+    # set through token.morph_
+    doc[0].morph_ = "PronType=prs"
+    assert doc[0].morph_ == "PronType=prs"
+    assert doc.to_array(["MORPH"])[0] != 0
+
+    # unset with token.morph
+    doc[0].morph = 0
+    assert doc.to_array(["MORPH"])[0] == 0
+
+    # empty morph is equivalent to "_"
+    doc[0].morph_ = ""
+    assert doc[0].morph_ == ""
+    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
+
+    # "_" morph is also equivalent to empty morph
+    doc[0].morph_ = "_"
+    assert doc[0].morph_ == ""
+    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
+
+    # set through existing hash with token.morph
+    tokenizer.vocab.strings.add("Feat=Val")
+    doc[0].morph = tokenizer.vocab.strings.add("Feat=Val")
+    assert doc[0].morph_ == "Feat=Val"
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 50f1c5da3..2474f0637 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -214,9 +214,17 @@ cdef class Token:
         xp = get_array_module(vector)
         return (xp.dot(vector, other.vector) / (self.vector_norm * other.vector_norm))
 
-    @property
-    def morph(self):
-        return MorphAnalysis.from_id(self.vocab, self.c.morph)
+    property morph:
+        def __get__(self):
+            return MorphAnalysis.from_id(self.vocab, self.c.morph)
+
+        def __set__(self, attr_t morph):
+            if morph == 0:
+                self.c.morph = morph
+            elif morph in self.vocab.strings:
+                self.morph_ = self.vocab.strings[morph]
+            else:
+                raise ValueError(Errors.E1009.format(val=morph))
 
     property morph_:
         def __get__(self):