Add permitted values

2025-08-04 04:10:20 +03:00 · 2021-12-08 14:58:02 +01:00 · 2021-12-08 14:58:02 +01:00 · 183d535ef4
commit 183d535ef4
parent 9f7f234b0f
2 changed files with 121 additions and 8 deletions
--- a/spacy/tests/test_visualization.py
+++ b/spacy/tests/test_visualization.py
@ -420,6 +420,37 @@ def test_visualization_get_entity_custom_attribute_missing(en_vocab):
        Visualizer().get_entity(doc[2], "_.depp")
 def test_visualization_get_entity_permitted_values(en_vocab):
    doc = Doc(
        en_vocab,
        words=[
            "I",
            "saw",
            "a",
            "horse",
            "yesterday",
            "that",
            "was",
            "injured",
            ".",
        ],
        heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
        deps=["dep"] * 9,
    )
    visualizer = Visualizer()
    assert [visualizer.get_entity(token, "head.i", permitted_values=(3, 7)) for token in doc] == [
        "",
        "",
        "3",
        "",
        "",
        "7",
        "7",
        "3",
        "",
    ]
 def test_visualization_minimal_render_table_one_sentence(
    fully_featured_doc_one_sentence,
 ):
@ -450,6 +481,67 @@ def test_visualization_minimal_render_table_one_sentence(
    )
 def test_visualization_minimal_render_table_permitted_values(
    fully_featured_doc_one_sentence,
 ):
    formats = [
        AttributeFormat("tree_left"),
        AttributeFormat("dep_"),
        AttributeFormat("text"),
        AttributeFormat("lemma_", permitted_values=("fly", "to")),
        AttributeFormat("pos_"),
        AttributeFormat("tag_"),
        AttributeFormat("morph"),
        AttributeFormat("ent_type_"),
    ]
    assert (
        Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
        == """
  ╔>╔═   poss       Sarah           PROPN   NNP   NounType=prop|Number=sing   PERSON
  ║ ╚>   case       's              PART    POS   Poss=yes                          
 ╔>╚═══   nsubj      sister          NOUN    NN    Number=sing                       
 ╠═════   ROOT       flew      fly   VERB    VBD   Tense=past|VerbForm=fin           
 ╠>╔═══   prep       to        to    ADP     IN                                      
 ║ ║ ╔>   compound   Silicon         PROPN   NNP   NounType=prop|Number=sing   GPE   
 ║ ╚>╚═   pobj       Valley          PROPN   NNP   NounType=prop|Number=sing   GPE   
 ╠══>╔═   prep       via             ADP     IN                                      
 ║   ╚>   pobj       London          PROPN   NNP   NounType=prop|Number=sing   GPE   
 ╚════>   punct      .               PUNCT   .     PunctType=peri
    """.strip()
    )
 def test_visualization_spacing(
    fully_featured_doc_one_sentence,
 ):
    formats = [
        AttributeFormat("tree_left"),
        AttributeFormat("dep_"),
        AttributeFormat("text"),
        AttributeFormat("lemma_"),
        AttributeFormat("pos_"),
        AttributeFormat("tag_"),
        AttributeFormat("morph"),
        AttributeFormat("ent_type_"),
    ]
    assert (
        Visualizer()
        .render_table(fully_featured_doc_one_sentence, formats, spacing=1)
        .strip()
        == """
  ╔>╔═ poss     Sarah   sarah   PROPN NNP NounType=prop|Number=sing PERSON
  ║ ╚> case     's      's      PART  POS Poss=yes                        
 ╔>╚═══ nsubj    sister  sister  NOUN  NN  Number=sing                     
 ╠═════ ROOT     flew    fly     VERB  VBD Tense=past|VerbForm=fin         
 ╠>╔═══ prep     to      to      ADP   IN                                  
 ║ ║ ╔> compound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE   
 ║ ╚>╚═ pobj     Valley  valley  PROPN NNP NounType=prop|Number=sing GPE   
 ╠══>╔═ prep     via     via     ADP   IN                                  
 ║   ╚> pobj     London  london  PROPN NNP NounType=prop|Number=sing GPE   
 ╚════> punct    .       .       PUNCT .   PunctType=peri
    """.strip()
    )
 def test_visualization_minimal_render_table_two_sentences(
    fully_featured_doc_two_sentences,
 ):
--- a/spacy/visualization.py
+++ b/spacy/visualization.py
@ -16,6 +16,7 @@ class AttributeFormat:
        max_width: int = None,
        fg_color: Union[str, int] = None,
        bg_color: Union[str, int] = None,
        permitted_values: tuple = None,
        value_dependent_fg_colors: dict[str, Union[str, int]] = None,
        value_dependent_bg_colors: dict[str, Union[str, int]] = None,
    ):
@ -25,6 +26,7 @@ class AttributeFormat:
        self.max_width = max_width
        self.fg_color = fg_color
        self.bg_color = bg_color
        self.permitted_values = permitted_values
        self.value_dependent_fg_colors = value_dependent_fg_colors
        self.value_dependent_bg_colors = value_dependent_bg_colors
@ -65,7 +67,6 @@ ROOT_LEFT_CHARS = {
 class Visualizer:
    def __init__(self):
        self.printer = wasabi.Printer(no_print=True)
@ -300,19 +301,30 @@ class Visualizer:
        token: Token,
        entity_name: str,
        *,
        permitted_values: list[str] = None,
        value_dependent_fg_colors: dict[str : Union[str, int]] = None,
        value_dependent_bg_colors: dict[str : Union[str, int]] = None,
-        truncate_at_width: int = None
+        truncate_at_width: int = None,
    ) -> str:
        obj = token
        parts = entity_name.split(".")
        for part in parts[:-1]:
            obj = getattr(obj, part)
        value = str(getattr(obj, parts[-1]))
        if permitted_values is not None and value not in (str(v) for v in permitted_values):
            return ""
        if truncate_at_width is not None:
            value = value[:truncate_at_width]
-        fg_color = value_dependent_fg_colors.get(value, None) if value_dependent_fg_colors is not None else None
+        fg_color = (
-        bg_color = value_dependent_bg_colors.get(value, None) if value_dependent_bg_colors is not None else None
+            value_dependent_fg_colors.get(value, None)
            if value_dependent_fg_colors is not None
            else None
        )
        bg_color = (
            value_dependent_bg_colors.get(value, None)
            if value_dependent_bg_colors is not None
            else None
        )
        if fg_color is not None or bg_color is not None:
            value = self.printer.text(value, color=fg_color, bg_color=bg_color)
        return value
@ -329,12 +341,19 @@ class Visualizer:
            widths = []
            for column in columns:
                # get the values without any color codes
-                if column.attribute == 'tree_left':
+                if column.attribute == "tree_left":
                    width = len(tree_left[0])
-                elif column.attribute == 'tree_right':
+                elif column.attribute == "tree_right":
                    width = len(tree_right[0])
                else:
-                    width = max(len(self.get_entity(token, column.attribute)) for token in sent)
+                    width = max(
                        len(
                            self.get_entity(
                                token, column.attribute, permitted_values=column.permitted_values
                            )
                        )
                        for token in sent
                    )
                    if column.max_width is not None:
                        width = min(width, column.max_width)
                width = max(width, len(column.name))
@ -348,9 +367,10 @@ class Visualizer:
                    else self.get_entity(
                        token,
                        column.attribute,
                        permitted_values=column.permitted_values,
                        value_dependent_fg_colors=column.value_dependent_fg_colors,
                        value_dependent_bg_colors=column.value_dependent_bg_colors,
-                        truncate_at_width=widths[column_index]
+                        truncate_at_width=widths[column_index],
                    )
                    for column_index, column in enumerate(columns)
                ]
@ -372,6 +392,7 @@ class Visualizer:
                    widths=widths,
                    fg_colors=fg_colors,
                    bg_colors=bg_colors,
                    spacing=spacing,
                )
                + linesep
            )