From 86c3ec9c2b3ad28797b26fb75b808bf573087b35 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Thu, 1 Oct 2020 22:21:46 +0200
Subject: [PATCH 1/3] Refactor Token morph setting (#6175)

* Refactor Token morph setting

* Remove `Token.morph_`
* Add `Token.set_morph()`
  * `0` resets `token.c.morph` to unset
  * Any other values are passed to `Morphology.add`

* Add token.morph setter to set from MorphAnalysis
---
 spacy/errors.py                             |  3 ++
 spacy/pipeline/morphologizer.pyx            |  4 +-
 spacy/tests/doc/test_array.py               |  6 +--
 spacy/tests/doc/test_doc_api.py             | 38 +++++++++++++++----
 spacy/tests/doc/test_morphanalysis.py       | 42 ++++++++++-----------
 spacy/tests/doc/test_retokenize_merge.py    |  4 +-
 spacy/tests/doc/test_retokenize_split.py    |  4 +-
 spacy/tests/matcher/test_matcher_api.py     | 24 ++++++------
 spacy/tests/matcher/test_phrase_matcher.py  |  4 +-
 spacy/tests/pipeline/test_attributeruler.py | 30 +++++++--------
 spacy/tests/pipeline/test_morphologizer.py  |  4 +-
 spacy/tests/test_scorer.py                  |  6 +--
 spacy/tests/training/test_new_example.py    |  2 +-
 spacy/tests/training/test_training.py       |  4 +-
 spacy/tokens/_serialize.py                  |  2 +-
 spacy/tokens/doc.pyx                        |  2 +-
 spacy/tokens/token.pyx                      | 26 ++++++-------
 spacy/training/example.pyx                  |  2 +-
 spacy/training/gold_io.pyx                  |  2 +-
 19 files changed, 118 insertions(+), 91 deletions(-)

diff --git a/spacy/errors.py b/spacy/errors.py
index 1c934d188..5236992e9 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -710,6 +710,9 @@ class Errors:
              "options: {modes}")
     E1012 = ("Entity spans and blocked/missing/outside spans should be "
              "provided to doc.set_ents as lists of `Span` objects.")
+    E1013 = ("Invalid morph: the MorphAnalysis must have the same vocab as the "
+             "token itself. To set the morph from this MorphAnalysis, set from "
+             "the string value with: `token.set_morph(str(other_morph))`.")
 
 
 @add_codes
diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx
index 60ad10a2b..ab0554692 100644
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@@ -149,7 +149,7 @@ class Morphologizer(Tagger):
         for example in get_examples():
             for i, token in enumerate(example.reference):
                 pos = token.pos_
-                morph = token.morph_
+                morph = str(token.morph)
                 # create and add the combined morph+POS label
                 morph_dict = Morphology.feats_to_dict(morph)
                 if pos:
@@ -167,7 +167,7 @@ class Morphologizer(Tagger):
             gold_array = []
             for i, token in enumerate(example.reference):
                 pos = token.pos_
-                morph = token.morph_
+                morph = str(token.morph)
                 morph_dict = Morphology.feats_to_dict(morph)
                 if pos:
                     morph_dict[self.POS_FEAT] = pos
diff --git a/spacy/tests/doc/test_array.py b/spacy/tests/doc/test_array.py
index 9c050f740..ef54c581c 100644
--- a/spacy/tests/doc/test_array.py
+++ b/spacy/tests/doc/test_array.py
@@ -46,9 +46,9 @@ def test_doc_array_morph(en_vocab):
     words = ["Eat", "blue", "ham"]
     morph = ["Feat=V", "Feat=J", "Feat=N"]
     doc = Doc(en_vocab, words=words, morphs=morph)
-    assert morph[0] == doc[0].morph_
-    assert morph[1] == doc[1].morph_
-    assert morph[2] == doc[2].morph_
+    assert morph[0] == str(doc[0].morph)
+    assert morph[1] == str(doc[1].morph)
+    assert morph[2] == str(doc[2].morph)
 
     feats_array = doc.to_array((ORTH, MORPH))
     assert feats_array[0][1] == doc[0].morph.key
diff --git a/spacy/tests/doc/test_doc_api.py b/spacy/tests/doc/test_doc_api.py
index 55a1c1ad2..e3e056685 100644
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@@ -319,15 +319,13 @@ def test_doc_from_array_morph(en_vocab):
     words = ["I", "live", "in", "New", "York", "."]
     morphs = ["Feat1=A", "Feat1=B", "Feat1=C", "Feat1=A|Feat2=D", "Feat2=E", "Feat3=F"]
     # fmt: on
-    doc = Doc(en_vocab, words=words)
-    for i, morph in enumerate(morphs):
-        doc[i].morph_ = morph
+    doc = Doc(en_vocab, words=words, morphs=morphs)
     attrs = [MORPH]
     arr = doc.to_array(attrs)
     new_doc = Doc(en_vocab, words=words)
     new_doc.from_array(attrs, arr)
-    assert [t.morph_ for t in new_doc] == morphs
-    assert [t.morph_ for t in doc] == [t.morph_ for t in new_doc]
+    assert [str(t.morph) for t in new_doc] == morphs
+    assert [str(t.morph) for t in doc] == [str(t.morph) for t in new_doc]
 
 
 def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
@@ -423,7 +421,7 @@ def test_has_annotation(en_vocab):
 
     doc[0].tag_ = "A"
     doc[0].pos_ = "X"
-    doc[0].morph_ = "Feat=Val"
+    doc[0].set_morph("Feat=Val")
     doc[0].lemma_ = "a"
     doc[0].dep_ = "dep"
     doc[0].head = doc[1]
@@ -435,7 +433,7 @@ def test_has_annotation(en_vocab):
 
     doc[1].tag_ = "A"
     doc[1].pos_ = "X"
-    doc[1].morph_ = ""
+    doc[1].set_morph("")
     doc[1].lemma_ = "a"
     doc[1].dep_ = "dep"
     doc.ents = [Span(doc, 0, 2, label="HELLO")]
@@ -538,6 +536,32 @@ def test_doc_ents_setter():
     assert [e.label_ for e in doc.ents] == ["HELLO", "WORLD"]
 
 
+def test_doc_morph_setter(en_tokenizer, de_tokenizer):
+    doc1 = en_tokenizer("a b")
+    doc1b = en_tokenizer("c d")
+    doc2 = de_tokenizer("a b")
+
+    # unset values can be copied
+    doc1[0].morph = doc1[1].morph
+    assert doc1[0].morph.key == 0
+    assert doc1[1].morph.key == 0
+
+    # morph values from the same vocab can be copied
+    doc1[0].set_morph("Feat=Val")
+    doc1[1].morph = doc1[0].morph
+    assert doc1[0].morph == doc1[1].morph
+
+    # ... also across docs
+    doc1b[0].morph = doc1[0].morph
+    assert doc1[0].morph == doc1b[0].morph
+
+    doc2[0].set_morph("Feat2=Val2")
+
+    # the morph value must come from the same vocab
+    with pytest.raises(ValueError):
+        doc1[0].morph = doc2[0].morph
+
+
 def test_doc_init_iob():
     """Test ents validation/normalization in Doc.__init__"""
     words = ["a", "b", "c", "d", "e"]
diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py
index f378ce042..56c80dd66 100644
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@@ -4,13 +4,13 @@ import pytest
 @pytest.fixture
 def i_has(en_tokenizer):
     doc = en_tokenizer("I has")
-    doc[0].morph_ = {"PronType": "prs"}
-    doc[1].morph_ = {
+    doc[0].set_morph({"PronType": "prs"})
+    doc[1].set_morph({
         "VerbForm": "fin",
         "Tense": "pres",
         "Number": "sing",
         "Person": "three",
-    }
+    })
 
     return doc
 
@@ -47,20 +47,20 @@ def test_morph_get(i_has):
 def test_morph_set(i_has):
     assert i_has[0].morph.get("PronType") == ["prs"]
     # set by string
-    i_has[0].morph_ = "PronType=unk"
+    i_has[0].set_morph("PronType=unk")
     assert i_has[0].morph.get("PronType") == ["unk"]
     # set by string, fields are alphabetized
-    i_has[0].morph_ = "PronType=123|NounType=unk"
-    assert i_has[0].morph_ == "NounType=unk|PronType=123"
+    i_has[0].set_morph("PronType=123|NounType=unk")
+    assert str(i_has[0].morph) == "NounType=unk|PronType=123"
     # set by dict
-    i_has[0].morph_ = {"AType": "123", "BType": "unk"}
-    assert i_has[0].morph_ == "AType=123|BType=unk"
+    i_has[0].set_morph({"AType": "123", "BType": "unk"})
+    assert str(i_has[0].morph) == "AType=123|BType=unk"
     # set by string with multiple values, fields and values are alphabetized
-    i_has[0].morph_ = "BType=c|AType=b,a"
-    assert i_has[0].morph_ == "AType=a,b|BType=c"
+    i_has[0].set_morph("BType=c|AType=b,a")
+    assert str(i_has[0].morph) == "AType=a,b|BType=c"
     # set by dict with multiple values, fields and values are alphabetized
-    i_has[0].morph_ = {"AType": "b,a", "BType": "c"}
-    assert i_has[0].morph_ == "AType=a,b|BType=c"
+    i_has[0].set_morph({"AType": "b,a", "BType": "c"})
+    assert str(i_has[0].morph) == "AType=a,b|BType=c"
 
 
 def test_morph_str(i_has):
@@ -72,25 +72,25 @@ def test_morph_property(tokenizer):
     doc = tokenizer("a dog")
 
     # set through token.morph_
-    doc[0].morph_ = "PronType=prs"
-    assert doc[0].morph_ == "PronType=prs"
+    doc[0].set_morph("PronType=prs")
+    assert str(doc[0].morph) == "PronType=prs"
     assert doc.to_array(["MORPH"])[0] != 0
 
     # unset with token.morph
-    doc[0].morph = 0
+    doc[0].set_morph(0)
     assert doc.to_array(["MORPH"])[0] == 0
 
     # empty morph is equivalent to "_"
-    doc[0].morph_ = ""
-    assert doc[0].morph_ == ""
+    doc[0].set_morph("")
+    assert str(doc[0].morph) == ""
     assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 
     # "_" morph is also equivalent to empty morph
-    doc[0].morph_ = "_"
-    assert doc[0].morph_ == ""
+    doc[0].set_morph("_")
+    assert str(doc[0].morph) == ""
     assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 
     # set through existing hash with token.morph
     tokenizer.vocab.strings.add("Feat=Val")
-    doc[0].morph = tokenizer.vocab.strings.add("Feat=Val")
-    assert doc[0].morph_ == "Feat=Val"
+    doc[0].set_morph(tokenizer.vocab.strings.add("Feat=Val"))
+    assert str(doc[0].morph) == "Feat=Val"
diff --git a/spacy/tests/doc/test_retokenize_merge.py b/spacy/tests/doc/test_retokenize_merge.py
index ab186b062..cb886545a 100644
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@@ -21,11 +21,11 @@ def test_doc_retokenize_merge(en_tokenizer):
     assert doc[4].text == "the beach boys"
     assert doc[4].text_with_ws == "the beach boys "
     assert doc[4].tag_ == "NAMED"
-    assert doc[4].morph_ == "Number=Plur"
+    assert str(doc[4].morph) == "Number=Plur"
     assert doc[5].text == "all night"
     assert doc[5].text_with_ws == "all night"
     assert doc[5].tag_ == "NAMED"
-    assert doc[5].morph_ == "Number=Plur"
+    assert str(doc[5].morph) == "Number=Plur"
 
 
 def test_doc_retokenize_merge_children(en_tokenizer):
diff --git a/spacy/tests/doc/test_retokenize_split.py b/spacy/tests/doc/test_retokenize_split.py
index 4d4b170f9..238e36d59 100644
--- a/spacy/tests/doc/test_retokenize_split.py
+++ b/spacy/tests/doc/test_retokenize_split.py
@@ -27,11 +27,11 @@ def test_doc_retokenize_split(en_vocab):
     assert doc[0].text == "Los"
     assert doc[0].head.text == "Angeles"
     assert doc[0].idx == 0
-    assert doc[0].morph_ == "Number=Sing"
+    assert str(doc[0].morph) == "Number=Sing"
     assert doc[1].idx == 3
     assert doc[1].text == "Angeles"
     assert doc[1].head.text == "start"
-    assert doc[1].morph_ == "Number=Sing"
+    assert str(doc[1].morph) == "Number=Sing"
     assert doc[2].text == "start"
     assert doc[2].head.text == "."
     assert doc[3].text == "."
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index 627110cdd..77b09f376 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -236,13 +236,13 @@ def test_matcher_subset_value_operator(en_vocab):
     matcher.add("M", [pattern])
     doc = Doc(en_vocab, words=["a", "b", "c"])
     assert len(matcher(doc)) == 3
-    doc[0].morph_ = "Feat=Val"
+    doc[0].set_morph("Feat=Val")
     assert len(matcher(doc)) == 3
-    doc[0].morph_ = "Feat=Val|Feat2=Val2"
+    doc[0].set_morph("Feat=Val|Feat2=Val2")
     assert len(matcher(doc)) == 3
-    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
+    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3")
     assert len(matcher(doc)) == 2
-    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
+    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4")
     assert len(matcher(doc)) == 2
 
     # IS_SUBSET acts like "IN" for attrs other than MORPH
@@ -268,11 +268,11 @@ def test_matcher_superset_value_operator(en_vocab):
     matcher.add("M", [pattern])
     doc = Doc(en_vocab, words=["a", "b", "c"])
     assert len(matcher(doc)) == 0
-    doc[0].morph_ = "Feat=Val|Feat2=Val2"
+    doc[0].set_morph("Feat=Val|Feat2=Val2")
     assert len(matcher(doc)) == 0
-    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
+    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3")
     assert len(matcher(doc)) == 1
-    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
+    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4")
     assert len(matcher(doc)) == 1
 
     # IS_SUPERSET with more than one value only matches for MORPH
@@ -310,9 +310,9 @@ def test_matcher_morph_handling(en_vocab):
     doc = Doc(en_vocab, words=["a", "b", "c"])
     assert len(matcher(doc)) == 0
 
-    doc[0].morph_ = "Feat2=Val2|Feat1=Val1"
+    doc[0].set_morph("Feat2=Val2|Feat1=Val1")
     assert len(matcher(doc)) == 2
-    doc[0].morph_ = "Feat1=Val1|Feat2=Val2"
+    doc[0].set_morph("Feat1=Val1|Feat2=Val2")
     assert len(matcher(doc)) == 2
 
     # multiple values are split
@@ -324,9 +324,9 @@ def test_matcher_morph_handling(en_vocab):
     doc = Doc(en_vocab, words=["a", "b", "c"])
     assert len(matcher(doc)) == 0
 
-    doc[0].morph_ = "Feat2=Val2,Val3|Feat1=Val1"
+    doc[0].set_morph("Feat2=Val2,Val3|Feat1=Val1")
     assert len(matcher(doc)) == 1
-    doc[0].morph_ = "Feat1=Val1,Val3|Feat2=Val2"
+    doc[0].set_morph("Feat1=Val1,Val3|Feat2=Val2")
     assert len(matcher(doc)) == 2
 
 
@@ -405,7 +405,7 @@ def test_attr_pipeline_checks(en_vocab):
     doc2 = Doc(en_vocab, words=["Test"])
     doc2[0].tag_ = "TAG"
     doc2[0].pos_ = "X"
-    doc2[0].morph_ = "Feat=Val"
+    doc2[0].set_morph("Feat=Val")
     doc2[0].lemma_ = "LEMMA"
     doc3 = Doc(en_vocab, words=["Test"])
     # DEP requires DEP
diff --git a/spacy/tests/matcher/test_phrase_matcher.py b/spacy/tests/matcher/test_phrase_matcher.py
index 522356ffc..1b81fd780 100644
--- a/spacy/tests/matcher/test_phrase_matcher.py
+++ b/spacy/tests/matcher/test_phrase_matcher.py
@@ -190,7 +190,7 @@ def test_phrase_matcher_validation(en_vocab):
     doc2 = Doc(en_vocab, words=["Test"])
     doc2[0].tag_ = "TAG"
     doc2[0].pos_ = "X"
-    doc2[0].morph_ = "Feat=Val"
+    doc2[0].set_morph("Feat=Val")
     doc3 = Doc(en_vocab, words=["Test"])
     matcher = PhraseMatcher(en_vocab, validate=True)
     with pytest.warns(UserWarning):
@@ -217,7 +217,7 @@ def test_attr_pipeline_checks(en_vocab):
     doc2 = Doc(en_vocab, words=["Test"])
     doc2[0].tag_ = "TAG"
     doc2[0].pos_ = "X"
-    doc2[0].morph_ = "Feat=Val"
+    doc2[0].set_morph("Feat=Val")
     doc2[0].lemma_ = "LEMMA"
     doc3 = Doc(en_vocab, words=["Test"])
     # DEP requires DEP
diff --git a/spacy/tests/pipeline/test_attributeruler.py b/spacy/tests/pipeline/test_attributeruler.py
index b9e5894dd..5773127af 100644
--- a/spacy/tests/pipeline/test_attributeruler.py
+++ b/spacy/tests/pipeline/test_attributeruler.py
@@ -69,9 +69,9 @@ def test_attributeruler_init(nlp, pattern_dicts):
         a.add(**p)
     doc = nlp("This is a test.")
     assert doc[2].lemma_ == "the"
-    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
     assert doc[3].lemma_ == "cat"
-    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
     assert doc.has_annotation("LEMMA")
     assert doc.has_annotation("MORPH")
 
@@ -81,9 +81,9 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
     nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
     doc = nlp("This is a test.")
     assert doc[2].lemma_ == "the"
-    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
     assert doc[3].lemma_ == "cat"
-    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
     assert doc.has_annotation("LEMMA")
     assert doc.has_annotation("MORPH")
     nlp.remove_pipe("attribute_ruler")
@@ -94,9 +94,9 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
     )
     doc = nlp("This is a test.")
     assert doc[2].lemma_ == "the"
-    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
     assert doc[3].lemma_ == "cat"
-    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
     assert doc.has_annotation("LEMMA")
     assert doc.has_annotation("MORPH")
 
@@ -106,9 +106,9 @@ def test_attributeruler_score(nlp, pattern_dicts):
     nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
     doc = nlp("This is a test.")
     assert doc[2].lemma_ == "the"
-    assert doc[2].morph_ == "Case=Nom|Number=Plur"
+    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
     assert doc[3].lemma_ == "cat"
-    assert doc[3].morph_ == "Case=Nom|Number=Sing"
+    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
 
     dev_examples = [
         Example.from_dict(
@@ -150,10 +150,10 @@ def test_attributeruler_tag_map(nlp, tag_map):
     for i in range(len(doc)):
         if i == 4:
             assert doc[i].pos_ == "PUNCT"
-            assert doc[i].morph_ == "PunctType=peri"
+            assert str(doc[i].morph) == "PunctType=peri"
         else:
             assert doc[i].pos_ == ""
-            assert doc[i].morph_ == ""
+            assert str(doc[i].morph) == ""
 
 
 def test_attributeruler_morph_rules(nlp, morph_rules):
@@ -168,11 +168,11 @@ def test_attributeruler_morph_rules(nlp, morph_rules):
     for i in range(len(doc)):
         if i != 2:
             assert doc[i].pos_ == ""
-            assert doc[i].morph_ == ""
+            assert str(doc[i].morph) == ""
         else:
             assert doc[2].pos_ == "DET"
             assert doc[2].lemma_ == "a"
-            assert doc[2].morph_ == "Case=Nom"
+            assert str(doc[2].morph) == "Case=Nom"
 
 
 def test_attributeruler_indices(nlp):
@@ -194,14 +194,14 @@ def test_attributeruler_indices(nlp):
     for i in range(len(doc)):
         if i == 1:
             assert doc[i].lemma_ == "was"
-            assert doc[i].morph_ == "Case=Nom|Number=Sing"
+            assert str(doc[i].morph) == "Case=Nom|Number=Sing"
         elif i == 2:
             assert doc[i].lemma_ == "the"
-            assert doc[i].morph_ == "Case=Nom|Number=Plur"
+            assert str(doc[i].morph) == "Case=Nom|Number=Plur"
         elif i == 3:
             assert doc[i].lemma_ == "cat"
         else:
-            assert doc[i].morph_ == ""
+            assert str(doc[i].morph) == ""
     # raises an error when trying to modify a token outside of the match
     a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=2)
     with pytest.raises(ValueError):
diff --git a/spacy/tests/pipeline/test_morphologizer.py b/spacy/tests/pipeline/test_morphologizer.py
index 5d605f4e6..af81129c0 100644
--- a/spacy/tests/pipeline/test_morphologizer.py
+++ b/spacy/tests/pipeline/test_morphologizer.py
@@ -91,7 +91,7 @@ def test_overfitting_IO():
     doc = nlp(test_text)
     gold_morphs = ["Feat=N", "Feat=V", "", ""]
     gold_pos_tags = ["NOUN", "VERB", "ADJ", ""]
-    assert [t.morph_ for t in doc] == gold_morphs
+    assert [str(t.morph) for t in doc] == gold_morphs
     assert [t.pos_ for t in doc] == gold_pos_tags
 
     # Also test the results are still the same after IO
@@ -99,5 +99,5 @@ def test_overfitting_IO():
         nlp.to_disk(tmp_dir)
         nlp2 = util.load_model_from_path(tmp_dir)
         doc2 = nlp2(test_text)
-        assert [t.morph_ for t in doc2] == gold_morphs
+        assert [str(t.morph) for t in doc2] == gold_morphs
         assert [t.pos_ for t in doc2] == gold_pos_tags
diff --git a/spacy/tests/test_scorer.py b/spacy/tests/test_scorer.py
index 187aa1b52..039f3d4d8 100644
--- a/spacy/tests/test_scorer.py
+++ b/spacy/tests/test_scorer.py
@@ -76,7 +76,7 @@ def tagged_doc():
     for i in range(len(tags)):
         doc[i].tag_ = tags[i]
         doc[i].pos_ = pos[i]
-        doc[i].morph_ = morphs[i]
+        doc[i].set_morph(morphs[i])
         if i > 0:
             doc[i].is_sent_start = False
     return doc
@@ -242,7 +242,7 @@ def test_tag_score(tagged_doc):
     gold = {
         "tags": [t.tag_ for t in tagged_doc],
         "pos": [t.pos_ for t in tagged_doc],
-        "morphs": [t.morph_ for t in tagged_doc],
+        "morphs": [str(t.morph) for t in tagged_doc],
         "sent_starts": [1 if t.is_sent_start else -1 for t in tagged_doc],
     }
     example = Example.from_dict(tagged_doc, gold)
@@ -259,7 +259,7 @@ def test_tag_score(tagged_doc):
     tags[0] = "NN"
     pos = [t.pos_ for t in tagged_doc]
     pos[1] = "X"
-    morphs = [t.morph_ for t in tagged_doc]
+    morphs = [str(t.morph) for t in tagged_doc]
     morphs[1] = "Number=sing"
     morphs[2] = "Number=plur"
     gold = {
diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py
index 81207b640..06db86a12 100644
--- a/spacy/tests/training/test_new_example.py
+++ b/spacy/tests/training/test_new_example.py
@@ -113,7 +113,7 @@ def test_Example_from_dict_with_morphology(annots):
     predicted = Doc(vocab, words=annots["words"])
     example = Example.from_dict(predicted, annots)
     for i, token in enumerate(example.reference):
-        assert token.morph_ == annots["morphs"][i]
+        assert str(token.morph) == annots["morphs"][i]
 
 
 @pytest.mark.parametrize(
diff --git a/spacy/tests/training/test_training.py b/spacy/tests/training/test_training.py
index 28a411e6d..405801f62 100644
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@@ -460,7 +460,7 @@ def test_roundtrip_docs_to_docbin(doc):
     idx = [t.idx for t in doc]
     tags = [t.tag_ for t in doc]
     pos = [t.pos_ for t in doc]
-    morphs = [t.morph_ for t in doc]
+    morphs = [str(t.morph) for t in doc]
     lemmas = [t.lemma_ for t in doc]
     deps = [t.dep_ for t in doc]
     heads = [t.head.i for t in doc]
@@ -482,7 +482,7 @@ def test_roundtrip_docs_to_docbin(doc):
     assert idx == [t.idx for t in reloaded_example.reference]
     assert tags == [t.tag_ for t in reloaded_example.reference]
     assert pos == [t.pos_ for t in reloaded_example.reference]
-    assert morphs == [t.morph_ for t in reloaded_example.reference]
+    assert morphs == [str(t.morph) for t in reloaded_example.reference]
     assert lemmas == [t.lemma_ for t in reloaded_example.reference]
     assert deps == [t.dep_ for t in reloaded_example.reference]
     assert heads == [t.head.i for t in reloaded_example.reference]
diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py
index 2d4e9af9d..ed283a86b 100644
--- a/spacy/tokens/_serialize.py
+++ b/spacy/tokens/_serialize.py
@@ -101,7 +101,7 @@ class DocBin:
             self.strings.add(token.text)
             self.strings.add(token.tag_)
             self.strings.add(token.lemma_)
-            self.strings.add(token.morph_)
+            self.strings.add(str(token.morph))
             self.strings.add(token.dep_)
             self.strings.add(token.ent_type_)
             self.strings.add(token.ent_kb_id_)
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 29fbb6076..9dfa6e714 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1248,7 +1248,7 @@ cdef class Doc:
         for token in self:
             strings.add(token.tag_)
             strings.add(token.lemma_)
-            strings.add(token.morph_)
+            strings.add(str(token.morph))
             strings.add(token.dep_)
             strings.add(token.ent_type_)
             strings.add(token.ent_kb_id_)
diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx
index 239de4559..8099abd92 100644
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@@ -215,20 +215,20 @@ cdef class Token:
         def __get__(self):
             return MorphAnalysis.from_id(self.vocab, self.c.morph)
 
-        def __set__(self, attr_t morph):
-            if morph == 0:
-                self.c.morph = morph
-            elif morph in self.vocab.strings:
-                self.morph_ = self.vocab.strings[morph]
-            else:
-                raise ValueError(Errors.E1009.format(val=morph))
+        def __set__(self, MorphAnalysis morph):
+            # Check that the morph has the same vocab
+            if self.vocab != morph.vocab:
+                raise ValueError(Errors.E1013)
+            self.c.morph = morph.c.key
 
-    property morph_:
-        def __get__(self):
-            return str(MorphAnalysis.from_id(self.vocab, self.c.morph))
-
-        def __set__(self, features):
-            cdef hash_t key = self.vocab.morphology.add(features)
+    def set_morph(self, features):
+        cdef hash_t key
+        if features is 0:
+            self.c.morph = 0
+        else:
+            if isinstance(features, int):
+                features = self.vocab.strings[features]
+            key = self.vocab.morphology.add(features)
             self.c.morph = key
 
     @property
diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx
index ca93b6464..f6225135c 100644
--- a/spacy/training/example.pyx
+++ b/spacy/training/example.pyx
@@ -226,7 +226,7 @@ cdef class Example:
                 "TAG": [t.tag_ for t in self.reference],
                 "LEMMA": [t.lemma_ for t in self.reference],
                 "POS": [t.pos_ for t in self.reference],
-                "MORPH": [t.morph_ for t in self.reference],
+                "MORPH": [str(t.morph) for t in self.reference],
                 "HEAD": [t.head.i for t in self.reference],
                 "DEP": [t.dep_ for t in self.reference],
                 "SENT_START": [int(bool(t.is_sent_start)) for t in self.reference]
diff --git a/spacy/training/gold_io.pyx b/spacy/training/gold_io.pyx
index 8b9f5ab2b..8fb6b8565 100644
--- a/spacy/training/gold_io.pyx
+++ b/spacy/training/gold_io.pyx
@@ -44,7 +44,7 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
                 if include_annotation["POS"]:
                     json_token["pos"] = token.pos_
                 if include_annotation["MORPH"]:
-                    json_token["morph"] = token.morph_
+                    json_token["morph"] = str(token.morph)
                 if include_annotation["LEMMA"]:
                     json_token["lemma"] = token.lemma_
                 if include_annotation["DEP"]:

From 5762876dcc1be42e982ad989335f9a485a7c3be3 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 1 Oct 2020 22:27:37 +0200
Subject: [PATCH 2/3] Update default config [ci skip]

---
 spacy/default_config.cfg | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/spacy/default_config.cfg b/spacy/default_config.cfg
index 6bd1ed24d..d7fc46ea0 100644
--- a/spacy/default_config.cfg
+++ b/spacy/default_config.cfg
@@ -2,7 +2,6 @@
 train = null
 dev = null
 vectors = null
-vocab_data = null
 init_tok2vec = null
 
 [system]
@@ -11,8 +10,13 @@ gpu_allocator = null
 
 [nlp]
 lang = null
+# List of pipeline component names, in order. The names should correspond to
+# components defined in the [components block]
 pipeline = []
+# Components that are loaded but disabled by default
 disabled = []
+# Optional callbacks to modify the nlp object before it's initialized, after
+# it's created and after the pipeline has been set up
 before_creation = null
 after_creation = null
 after_pipeline_creation = null
@@ -20,6 +24,7 @@ after_pipeline_creation = null
 [nlp.tokenizer]
 @tokenizers = "spacy.Tokenizer.v1"
 
+# The pipeline components and their models
 [components]
 
 # Readers for corpora like dev and train.
@@ -38,8 +43,7 @@ max_length = 0
 limit = 0
 # Apply some simply data augmentation, where we replace tokens with variations.
 # This is especially useful for punctuation and case replacement, to help
-# generalize beyond corpora that don't have smart-quotes, or only have smart
-# quotes, etc.
+# generalize beyond corpora that don't/only have smart quotes etc.
 augmenter = null
 
 [corpora.dev]
@@ -53,6 +57,7 @@ gold_preproc = false
 max_length = 0
 # Limitation on number of training examples
 limit = 0
+# Optional callback for data augmentation
 augmenter = null
 
 # Training hyper-parameters and additional features.
@@ -102,17 +107,18 @@ use_averages = false
 eps = 1e-8
 learn_rate = 0.001
 
-# The 'initialize' step is run before training or pretraining. Components and
-# the tokenizer can each define their own arguments via their .initialize
-# methods that are populated by the config. This lets them gather resources like
-# lookup tables and build label sets, construct vocabularies, etc.
+# These settings are used when nlp.initialize() is called (typically before
+# training or pretraining). Components and the tokenizer can each define their
+# own arguments via their initialize methods that are populated by the config.
+# This lets them gather data resources, build label sets etc.
 [initialize]
-vocab_data = ${paths.vocab_data}
-lookups = null
 vectors = ${paths.vectors}
 # Extra resources for transfer-learning or pseudo-rehearsal
 init_tok2vec = ${paths.init_tok2vec}
+# Data and lookups for vocabulary
+vocab_data = null
+lookups = null
 # Arguments passed to the tokenizer's initialize method
 tokenizer = {}
-# Arguments passed to the initialize methods of the components (keyed by component name)
+# Arguments for initialize methods of the components (keyed by component)
 components = {}

From 50162b8726641248802bfa43d4d63ca26f8efd09 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 1 Oct 2020 22:27:45 +0200
Subject: [PATCH 3/3] Try to work around Sharp build issue [ci skip]

---
 website/gatsby-config.js | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/website/gatsby-config.js b/website/gatsby-config.js
index c1a2f9ab9..4650711ac 100644
--- a/website/gatsby-config.js
+++ b/website/gatsby-config.js
@@ -1,6 +1,11 @@
 const autoprefixer = require('autoprefixer')
 const path = require('path')
 
+// https://florian.ec/blog/gatsby-build-netlify-segmentation-fault/
+const sharp = require('sharp')
+sharp.cache(false)
+sharp.simd(false)
+
 // Markdown plugins
 const wrapSectionPlugin = require('./src/plugins/remark-wrap-section.js')
 const customAttrsPlugin = require('./src/plugins/remark-custom-attrs.js')