From acc8018302222fea3691425263a6dd3e8558a58b Mon Sep 17 00:00:00 2001
From: richardpaulhudson <richard@explosion.ai>
Date: Fri, 27 Jan 2023 09:11:58 +0100
Subject: [PATCH] Code improvements

---
 spacy/visualization.py   | 55 +++++++++++++++++++++-------------------
 website/docs/api/doc.mdx | 12 ++++-----
 2 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/spacy/visualization.py b/spacy/visualization.py
index 1de5dd486..22578c46c 100644
--- a/spacy/visualization.py
+++ b/spacy/visualization.py
@@ -41,7 +41,7 @@ ROOT_LEFT_CHARS = {
 
 class AttributeFormat:
     """
-    Instructions for rendering information about a token property, e.g. lemma_, ent_type_.
+    Instructions for rendering information about a token property, e.g. *lemma_*, *ent_type_*.
     """
 
     def __init__(
@@ -57,13 +57,13 @@ class AttributeFormat:
         value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
     ):
         """
-        attribute:              the token attribute, e.g. lemma_, ._.holmes.lemma
-        name:                   the name to display e.g. in column headers
+        attribute:              the token attribute, e.g. *lemma_*, .*_.holmes.lemma*.
+        name:                   the name to display e.g. in column headers.
         aligns:                 where appropriate the column alignment 'l' (left,
                                     default), 'r' (right) or 'c' (center).
         max_width:              a maximum width to which values of the attribute should be truncated.
-        fg_color:               the foreground color that should be used to display instances of the attribute
-        bg_color:               the background color that should be used to display instances of the attribute
+        fg_color:               the foreground color that should be used to display instances of the attribute.
+        bg_color:               the background color that should be used to display instances of the attribute.
         value_dep_fg_colors:    a dictionary from values to foreground colors that should be used to display those values.
         value_dep_bg_colors:    a dictionary from values to background colors that should be used to display those values.
         """
@@ -90,24 +90,26 @@ class AttributeFormat:
     ) -> str:
         """
         right_pad_to_len:           the width to which values should be right-padded, or 'None' for no right-padding.
-        ignore_colors:              no colors should be rendered, typically because the values are required to calculate widths
+        ignore_colors:              no colors should be rendered, typically because the values are required to calculate widths.
         """
         value = _get_token_value(token, self.attribute)
         if self.max_width is not None:
             value = value[: self.max_width]
-        fg_color = None
-        bg_color = None
         if right_pad_to_len is not None:
             right_padding = " " * (right_pad_to_len - len(value))
         else:
             right_padding = ""
+        value_dep_fg_color = None
+        value_dep_bg_color = None
         if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
             if len(self.value_dep_fg_colors) > 0:
-                fg_color = self.value_dep_fg_colors.get(value, None)
+                value_dep_fg_color = self.value_dep_fg_colors.get(value, None)
             if len(self.value_dep_bg_colors) > 0:
-                bg_color = self.value_dep_bg_colors.get(value, None)
-        if fg_color is not None or bg_color is not None:
-            value = self.printer.text(value, color=fg_color, bg_color=bg_color)
+                value_dep_bg_color = self.value_dep_bg_colors.get(value, None)
+        if value_dep_fg_color is not None or value_dep_bg_color is not None:
+            value = self.printer.text(
+                value, color=value_dep_fg_color, bg_color=value_dep_bg_color
+            )
         return value + right_padding
 
 
@@ -120,7 +122,10 @@ def render_dep_tree(sent, root_right: bool) -> List[str]:
     root_right: True if the tree should be rendered with the root on the right-hand side,
                 False if the tree should be rendered with the root on the left-hand side.
 
-    Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
+    Algorithm adapted from https://github.com/KoichiYasuoka/deplacy. It was confirmed that
+    this code outputted equivalent trees to deplacy for a large number of sentences; there
+    were a handful of cases where the trees were different, but in these cases
+    the trees outputted here were confirmed to be linguistically corrrect.
     """
 
     # Check sent is really a sentence
@@ -346,18 +351,16 @@ def render_table(
     adj_start_i = doc[adj_start_i].sent.start
     end_i = doc[end_i].sent.end
     for sent in doc[adj_start_i:end_i].sents:
-        if "tree_right" in (c.attribute for c in cols):
-            tree_right = render_dep_tree(sent, True)
-        if "tree_left" in (c.attribute for c in cols):
-            tree_left = render_dep_tree(sent, False)
         widths = []
         for col in cols:
-            # get the values without any color codes
-            if col.attribute == "tree_left":
-                width = len(tree_left[0])  # type: ignore
-            elif col.attribute == "tree_right":
-                width = len(tree_right[0])  # type: ignore
+            if col.attribute == "tree_right":
+                tree_right = render_dep_tree(sent, True)
+                width = len(tree_right[0])
+            elif col.attribute == "tree_left":
+                tree_left = render_dep_tree(sent, False)
+                width = len(tree_left[0])
             else:
+                # get the values without any color codes
                 if len(sent) > 0:
                     width = max(
                         len(col.render(token, ignore_colors=True)) for token in sent
@@ -462,7 +465,7 @@ def _get_token_value(token, attribute: str) -> str:
     """
     Get value *token.x.y.z*.
 
-    token: the token
+    token: the token.
     attribute: the attribute name, e.g. *x.y.z*.
     """
     obj = token
@@ -483,9 +486,9 @@ def _get_adjusted_start_i(
     Get the position at which to start rendering a document, which may be
     adjusted by a search for a specific attribute value.
 
-    doc: the document
-    start_i: the user-specified start index
-    cols: the list of attribute columns being displayed
+    doc: the document.
+    start_i: the user-specified start index.
+    cols: the list of attribute columns being displayed.
     search_attr_name: the name of the attribute for which values are being searched,
         i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
     search_attr_value: the attribute value for which to search.
diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx
index 2a4c5b88d..6a01754f1 100644
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@@ -539,12 +539,12 @@ is displayed.
 > doc.inspect("text", "time", length=20)
 > ```
 
-| Name                | Description                                                                                                                                                                                                                                    |
-| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `search_attr_name`  | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. ~~Optional[str]~~                                                                                                                          |
-| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. ~~Optional[str]~~                                                                                                                           |
-| _keyword-only_      |                                                                                                                                                                                                                                                |
-| `start_i`           | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~                                                                                                                                                |
+| Name                | Description                                                                                                                                                                                                                                     |
+| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `search_attr_name`  | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. A value only has an effect in conjunction with a value for `search_attr_value`. ~~Optional[str]~~                                |
+| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. A value only has an effect in conjunction with a value for `search_attr_name`. ~~Optional[str]~~                                  |
+| _keyword-only_      |                                                                                                                                                                                                                                                 |
+| `start_i`           | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~                                                                                                                                                 |
 | `length`            | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ |
 
 ## Doc.retokenize {id="retokenize",tag="contextmanager",version="2.1"}