From e05b2ccc7cca4e28d6031fa9b19bc3bb3c8c258f Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Thu, 20 Apr 2023 14:06:32 +0200 Subject: [PATCH] Add default option to MorphAnalysis.get (#12545) * Add default to MorphAnalysis.get Similar to `dict`, allow a `default` option for `MorphAnalysis.get` for the user to provide a default return value if the field is not found. The default return value remains `[]`, which is not the same as `dict.get`, but is already established as this method's default return value with the return type `List[str]`. However the new `default` option does not enforce that the user-provided default is actually `List[str]`. * Restore test case --- spacy/tests/doc/test_morphanalysis.py | 2 ++ spacy/tokens/morphanalysis.pyi | 4 ++-- spacy/tokens/morphanalysis.pyx | 6 +++++- website/docs/api/morphology.mdx | 9 +++++---- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/spacy/tests/doc/test_morphanalysis.py b/spacy/tests/doc/test_morphanalysis.py index 918d4acdc..49e32b936 100644 --- a/spacy/tests/doc/test_morphanalysis.py +++ b/spacy/tests/doc/test_morphanalysis.py @@ -33,6 +33,8 @@ def test_token_morph_key(i_has): def test_morph_props(i_has): assert i_has[0].morph.get("PronType") == ["prs"] assert i_has[1].morph.get("PronType") == [] + assert i_has[1].morph.get("AsdfType", ["asdf"]) == ["asdf"] + assert i_has[1].morph.get("AsdfType", default=["asdf", "qwer"]) == ["asdf", "qwer"] def test_morph_iter(i_has): diff --git a/spacy/tokens/morphanalysis.pyi b/spacy/tokens/morphanalysis.pyi index b86203cc4..a5376e80d 100644 --- a/spacy/tokens/morphanalysis.pyi +++ b/spacy/tokens/morphanalysis.pyi @@ -1,4 +1,4 @@ -from typing import Any, Dict, Iterator, List, Union +from typing import Any, Dict, Iterator, List, Optional, Union from ..vocab import Vocab class MorphAnalysis: @@ -13,7 +13,7 @@ class MorphAnalysis: def __hash__(self) -> int: ... def __eq__(self, other: MorphAnalysis) -> bool: ... # type: ignore[override] def __ne__(self, other: MorphAnalysis) -> bool: ... # type: ignore[override] - def get(self, field: Any) -> List[str]: ... + def get(self, field: Any, default: Optional[List[str]]) -> List[str]: ... def to_json(self) -> str: ... def to_dict(self) -> Dict[str, str]: ... def __str__(self) -> str: ... diff --git a/spacy/tokens/morphanalysis.pyx b/spacy/tokens/morphanalysis.pyx index a7d1f2e44..baa3800a1 100644 --- a/spacy/tokens/morphanalysis.pyx +++ b/spacy/tokens/morphanalysis.pyx @@ -58,10 +58,14 @@ cdef class MorphAnalysis: def __ne__(self, other): return self.key != other.key - def get(self, field): + def get(self, field, default=None): """Retrieve feature values by field.""" cdef attr_t field_id = self.vocab.strings.as_int(field) cdef np.ndarray results = get_by_field(&self.c, field_id) + if len(results) == 0: + if default is None: + default = [] + return default features = [self.vocab.strings[result] for result in results] return [f.split(Morphology.FIELD_SEP)[1] for f in features] diff --git a/website/docs/api/morphology.mdx b/website/docs/api/morphology.mdx index 68d80b814..5d4affafe 100644 --- a/website/docs/api/morphology.mdx +++ b/website/docs/api/morphology.mdx @@ -213,10 +213,11 @@ Retrieve values for a feature by field. > assert morph.get("Feat1") == ["Val1", "Val2"] > ``` -| Name | Description | -| ----------- | ------------------------------------------------ | -| `field` | The field to retrieve. ~~str~~ | -| **RETURNS** | A list of the individual features. ~~List[str]~~ | +| Name | Description | +| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ | +| `field` | The field to retrieve. ~~str~~ | +| `default` 3.6 | The value to return if the field is not present. If unset or `None`, the default return value is `[]`. ~~Optional[List[str]]~~ | +| **RETURNS** | A list of the individual features. ~~List[str]~~ | ### MorphAnalysis.to_dict {id="morphanalysis-to_dict",tag="method"}