Code improvements

2025-08-02 11:20:19 +03:00 · 2023-01-27 09:11:58 +01:00 · 2023-01-27 09:11:58 +01:00 · acc8018302
commit acc8018302
parent c89ef32ea5
2 changed files with 35 additions and 32 deletions
--- a/spacy/visualization.py
+++ b/spacy/visualization.py
@ -41,7 +41,7 @@ ROOT_LEFT_CHARS = {

 class AttributeFormat:
    """
-    Instructions for rendering information about a token property, e.g. lemma_, ent_type_.
+    Instructions for rendering information about a token property, e.g. *lemma_*, *ent_type_*.
    """

    def __init__(
@ -57,13 +57,13 @@ class AttributeFormat:
        value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
    ):
        """
-        attribute:              the token attribute, e.g. lemma_, ._.holmes.lemma
-        name:                   the name to display e.g. in column headers
+        attribute:              the token attribute, e.g. *lemma_*, .*_.holmes.lemma*.
+        name:                   the name to display e.g. in column headers.
        aligns:                 where appropriate the column alignment 'l' (left,
                                    default), 'r' (right) or 'c' (center).
        max_width:              a maximum width to which values of the attribute should be truncated.
-        fg_color:               the foreground color that should be used to display instances of the attribute
-        bg_color:               the background color that should be used to display instances of the attribute
+        fg_color:               the foreground color that should be used to display instances of the attribute.
+        bg_color:               the background color that should be used to display instances of the attribute.
        value_dep_fg_colors:    a dictionary from values to foreground colors that should be used to display those values.
        value_dep_bg_colors:    a dictionary from values to background colors that should be used to display those values.
        """
@ -90,24 +90,26 @@ class AttributeFormat:
    ) -> str:
        """
        right_pad_to_len:           the width to which values should be right-padded, or 'None' for no right-padding.
-        ignore_colors:              no colors should be rendered, typically because the values are required to calculate widths
+        ignore_colors:              no colors should be rendered, typically because the values are required to calculate widths.
        """
        value = _get_token_value(token, self.attribute)
        if self.max_width is not None:
            value = value[: self.max_width]
-        fg_color = None
-        bg_color = None
        if right_pad_to_len is not None:
            right_padding = " " * (right_pad_to_len - len(value))
        else:
            right_padding = ""
+        value_dep_fg_color = None
+        value_dep_bg_color = None
        if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
            if len(self.value_dep_fg_colors) > 0:
-                fg_color = self.value_dep_fg_colors.get(value, None)
+                value_dep_fg_color = self.value_dep_fg_colors.get(value, None)
            if len(self.value_dep_bg_colors) > 0:
-                bg_color = self.value_dep_bg_colors.get(value, None)
-        if fg_color is not None or bg_color is not None:
-            value = self.printer.text(value, color=fg_color, bg_color=bg_color)
+                value_dep_bg_color = self.value_dep_bg_colors.get(value, None)
+        if value_dep_fg_color is not None or value_dep_bg_color is not None:
+            value = self.printer.text(
+                value, color=value_dep_fg_color, bg_color=value_dep_bg_color
+            )
        return value + right_padding


@ -120,7 +122,10 @@ def render_dep_tree(sent, root_right: bool) -> List[str]:
    root_right: True if the tree should be rendered with the root on the right-hand side,
                False if the tree should be rendered with the root on the left-hand side.

-    Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
+    Algorithm adapted from https://github.com/KoichiYasuoka/deplacy. It was confirmed that
+    this code outputted equivalent trees to deplacy for a large number of sentences; there
+    were a handful of cases where the trees were different, but in these cases
+    the trees outputted here were confirmed to be linguistically corrrect.
    """

    # Check sent is really a sentence
@ -346,18 +351,16 @@ def render_table(
    adj_start_i = doc[adj_start_i].sent.start
    end_i = doc[end_i].sent.end
    for sent in doc[adj_start_i:end_i].sents:
-        if "tree_right" in (c.attribute for c in cols):
-            tree_right = render_dep_tree(sent, True)
-        if "tree_left" in (c.attribute for c in cols):
-            tree_left = render_dep_tree(sent, False)
        widths = []
        for col in cols:
-            # get the values without any color codes
-            if col.attribute == "tree_left":
-                width = len(tree_left[0])  # type: ignore
-            elif col.attribute == "tree_right":
-                width = len(tree_right[0])  # type: ignore
+            if col.attribute == "tree_right":
+                tree_right = render_dep_tree(sent, True)
+                width = len(tree_right[0])
+            elif col.attribute == "tree_left":
+                tree_left = render_dep_tree(sent, False)
+                width = len(tree_left[0])
            else:
+                # get the values without any color codes
                if len(sent) > 0:
                    width = max(
                        len(col.render(token, ignore_colors=True)) for token in sent
@ -462,7 +465,7 @@ def _get_token_value(token, attribute: str) -> str:
    """
    Get value *token.x.y.z*.

-    token: the token
+    token: the token.
    attribute: the attribute name, e.g. *x.y.z*.
    """
    obj = token
@ -483,9 +486,9 @@ def _get_adjusted_start_i(
    Get the position at which to start rendering a document, which may be
    adjusted by a search for a specific attribute value.

-    doc: the document
-    start_i: the user-specified start index
-    cols: the list of attribute columns being displayed
+    doc: the document.
+    start_i: the user-specified start index.
+    cols: the list of attribute columns being displayed.
    search_attr_name: the name of the attribute for which values are being searched,
        i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
    search_attr_value: the attribute value for which to search.
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@ -540,9 +540,9 @@ is displayed.
 > ```

 | Name                | Description                                                                                                                                                                                                                                     |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `search_attr_name`  | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. ~~Optional[str]~~                                                                                                                          |
-| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. ~~Optional[str]~~                                                                                                                           |
+| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `search_attr_name`  | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. A value only has an effect in conjunction with a value for `search_attr_value`. ~~Optional[str]~~                                |
+| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. A value only has an effect in conjunction with a value for `search_attr_name`. ~~Optional[str]~~                                  |
 | _keyword-only_      |                                                                                                                                                                                                                                                 |
 | `start_i`           | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~                                                                                                                                                 |
 | `length`            | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ |