Merge pull request #6179 from adrianeboyd/feature/token-morph-refactor-2 [ci skip]

2026-01-04 16:03:12 +03:00 · 2020-10-02 12:10:27 +02:00 · 2020-10-02 12:10:27 +02:00 · d2aa662ab2
commit d2aa662ab2
parent 0f11c2150d f83dfe62da
4 changed files with 96 additions and 79 deletions
--- a/spacy/tests/doc/test_morphanalysis.py
+++ b/spacy/tests/doc/test_morphanalysis.py
@ -77,7 +77,7 @@ def test_morph_property(tokenizer):
    assert doc.to_array(["MORPH"])[0] != 0

    # unset with token.morph
-    doc[0].set_morph(0)
+    doc[0].set_morph(None)
    assert doc.to_array(["MORPH"])[0] == 0

    # empty morph is equivalent to "_"
--- a/spacy/tokens/token.pyx
+++ b/spacy/tokens/token.pyx
@ -223,8 +223,10 @@ cdef class Token:

    def set_morph(self, features):
        cdef hash_t key
-        if features is 0:
+        if features is None:
            self.c.morph = 0
+        elif isinstance(features, MorphAnalysis):
+            self.morph = features
        else:
            if isinstance(features, int):
                features = self.vocab.strings[features]
--- a/website/docs/api/token.md
+++ b/website/docs/api/token.md
@ -172,6 +172,25 @@ Get a neighboring token.
 | `i`         | The relative position of the token to get. Defaults to `1`. ~~int~~ |
 | **RETURNS** | The token at position `self.doc[self.i+i]`. ~~Token~~               |

+## Token.set_morph {#set_morph tag="method"}
+
+Set the morphological analysis from a UD FEATS string, hash value of a UD FEATS
+string, features dict or `MorphAnalysis`. The value `None` can be used to reset
+the morph to an unset state.
+
+> #### Example
+>
+> ```python
+> doc = nlp("Give it back! He pleaded.")
+> doc[0].set_morph("Mood=Imp|VerbForm=Fin")
+> assert "Mood=Imp" in doc[0].morph
+> assert doc[0].morph.get("Mood") == ["Imp"]
+> ```
+
+| Name     | Description                                                                       |
+| -------- | --------------------------------------------------------------------------------- |
+| features | The morphological features to set. ~~Union[int, dict, str, MorphAnalysis, None]~~ |
+
 ## Token.is_ancestor {#is_ancestor tag="method" model="parser"}

 Check whether this token is a parent, grandparent, etc. of another in the
@ -393,7 +412,7 @@ The L2 norm of the token's vector representation.
 ## Attributes {#attributes}

 | Name                                         | Description                                                                                                                                                                                                                                                           |
-| -------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `doc`                                        | The parent document. ~~Doc~~                                                                                                                                                                                                                                          |
 | `lex` <Tag variant="new">3</Tag>             | The underlying lexeme. ~~Lexeme~~                                                                                                                                                                                                                                     |
 | `sent` <Tag variant="new">2.0.12</Tag>       | The sentence span that this token is a part of. ~~Span~~                                                                                                                                                                                                              |
@ -451,7 +470,6 @@ The L2 norm of the token's vector representation.
 | `tag`                                        | Fine-grained part-of-speech. ~~int~~                                                                                                                                                                                                                                  |
 | `tag_`                                       | Fine-grained part-of-speech. ~~str~~                                                                                                                                                                                                                                  |
 | `morph` <Tag variant="new">3</Tag>           | Morphological analysis. ~~MorphAnalysis~~                                                                                                                                                                                                                             |
-| `morph_` <Tag variant="new">3</Tag>          | Morphological analysis in the Universal Dependencies [FEATS](https://universaldependencies.org/format.html#morphological-annotation) format. ~~str~~                                                                                                                     |
 | `dep`                                        | Syntactic dependency relation. ~~int~~                                                                                                                                                                                                                                |
 | `dep_`                                       | Syntactic dependency relation. ~~str~~                                                                                                                                                                                                                                |
 | `lang`                                       | Language of the parent document's vocabulary. ~~int~~                                                                                                                                                                                                                 |
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@ -56,16 +56,13 @@ create a surface form. Here are some examples:

 Morphological features are stored in the [`MorphAnalysis`](/api/morphanalysis)
 under `Token.morph`, which allows you to access individual morphological
-features. The attribute `Token.morph_` provides the morphological analysis in
-the Universal Dependencies
-[FEATS](https://universaldependencies.org/format.html#morphological-annotation)
-format.
+features.

 > #### 📝 Things to try
 >
 > 1. Change "I" to "She". You should see that the morphological features change
 >    and express that it's a pronoun in the third person.
-> 2. Inspect `token.morph_` for the other tokens.
+> 2. Inspect `token.morph` for the other tokens.

 ```python
 ### {executable="true"}
@ -75,7 +72,7 @@ nlp = spacy.load("en_core_web_sm")
 print("Pipeline:", nlp.pipe_names)
 doc = nlp("I was reading the paper.")
 token = doc[0]  # 'I'
-print(token.morph_)  # 'Case=Nom|Number=Sing|Person=1|PronType=Prs'
+print(token.morph)  # 'Case=Nom|Number=Sing|Person=1|PronType=Prs'
 print(token.morph.get("PronType"))  # ['Prs']
 ```

@ -91,7 +88,7 @@ import spacy

 nlp = spacy.load("de_core_news_sm")
 doc = nlp("Wo bist du?") # English: 'Where are you?'
-print(doc[2].morph_)  # 'Case=Nom|Number=Sing|Person=2|PronType=Prs'
+print(doc[2].morph)  # 'Case=Nom|Number=Sing|Person=2|PronType=Prs'
 print(doc[2].pos_) # 'PRON'
 ```

@ -117,7 +114,7 @@ import spacy

 nlp = spacy.load("en_core_web_sm")
 doc = nlp("Where are you?")
-print(doc[2].morph_)  # 'Case=Nom|Person=2|PronType=Prs'
+print(doc[2].morph)  # 'Case=Nom|Person=2|PronType=Prs'
 print(doc[2].pos_)  # 'PRON'
 ```