From bd00611259e7a4639d732bd96e24d161cf066560 Mon Sep 17 00:00:00 2001
From: Richard Hudson <richard@explosion.ai>
Date: Wed, 8 Dec 2021 17:47:29 +0100
Subject: [PATCH] Add render_text

---
 spacy/tests/conftest.py           |   2 +-
 spacy/tests/test_visualization.py |  42 +++++++++
 spacy/visualization.py            | 141 +++++++++++++++++-------------
 3 files changed, 121 insertions(+), 64 deletions(-)

diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 0cd2727b7..c021788b9 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -549,7 +549,7 @@ def fully_featured_doc_two_sentences(en_vocab):
         "it",
         "."
     ]
-    spaces = [False, True, True, True, True, True, True, True, False, False, True, True, False, False]
+    spaces = [False, True, True, True, True, True, True, True, False, True, True, True, False, False]
     pos = [
         "PROPN",
         "PART",
diff --git a/spacy/tests/test_visualization.py b/spacy/tests/test_visualization.py
index c5366c7c6..5908dbefd 100644
--- a/spacy/tests/test_visualization.py
+++ b/spacy/tests/test_visualization.py
@@ -639,3 +639,45 @@ def test_visualization_rich_render_table_two_sentences(
         if supports_ansi
         else "\n  tree   dep        index   text      lemma     pos     tag   morph                       ent   \n------   --------   -----   -------   -------   -----   ---   -------------------------   ------\n  ╔>╔═   poss           0   Sarah     sarah     PROPN   NNP   NounType=prop|Number=sing   PERSON\n  ║ ╚>   case           1   's        's        PART    POS   Poss=yes                          \n╔>╚═══   nsubj          2   sister    sister    NOUN    NN    Number=sing                       \n╠═════   ROOT           3   flew      fly       VERB    VBD   Tense=past|VerbForm=fin           \n╠>╔═══   prep           4   to        to        ADP     IN                                      \n║ ║ ╔>   compound       5   Silicon   silicon   PROPN   NNP   NounType=prop|Number=sing   GPE   \n║ ╚>╚═   pobj           6   Valley    valley    PROPN   NNP   NounType=prop|Number=sing   GPE   \n╠══>╔═   prep           7   via       via       ADP     IN                                      \n║   ╚>   pobj           8   London    london    PROPN   NNP   NounType=prop|Number=sing   GPE   \n╚════>   punct          9   .         .         PUNCT   .     PunctType=peri                    \n\n\ntree   dep     index   text    lemma   pos     tag   morph                                                    ent\n----   -----   -----   -----   -----   -----   ---   ------------------------------------------------------   ---\n  ╔>   nsubj      10   She     she     PRON    PRP   Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs       \n  ╠═   ROOT       11   loved   love    VERB    VBD   Tense=Past|VerbForm=Fin                                     \n  ╠>   dobj       12   it      it      PRON    PRP   Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs      \n  ╚>   punct      13   .       .       PUNCT   .     PunctType=peri                                              \n\n"
     )
+
+def test_render_text_with_text_format(
+    fully_featured_doc_two_sentences,
+):
+    formats = [
+        AttributeFormat(
+            "ent_type_",
+            fg_color=50,
+            value_dependent_fg_colors={"PERSON": 50},
+            value_dependent_bg_colors={"PERSON": 12},
+        ),
+        AttributeFormat(
+            "text",
+            fg_color=50,
+            bg_color=53,
+            value_dependent_fg_colors={"PERSON": 50},
+            value_dependent_bg_colors={"PERSON": 12},
+        ),
+        AttributeFormat(
+            "lemma_",
+            fg_color=50,
+            bg_color=53,
+            permitted_values=("fly", "valley")
+        ),
+    ]
+    assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
+
+def test_render_text_without_text_format(
+    fully_featured_doc_two_sentences,
+):
+    formats = [
+        AttributeFormat(
+            "ent_type_",
+            value_dependent_fg_colors={"PERSON": 50},
+            value_dependent_bg_colors={"PERSON": 12},
+        ),
+        AttributeFormat(
+            "lemma_",
+            permitted_values=("fly", "valley")
+        ),
+    ]
+    assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
\ No newline at end of file
diff --git a/spacy/visualization.py b/spacy/visualization.py
index e0796c8b3..c940f4f76 100644
--- a/spacy/visualization.py
+++ b/spacy/visualization.py
@@ -6,63 +6,6 @@ from spacy.tokens import Span, Token, Doc
 from spacy.util import working_dir
 
 
-class AttributeFormat:
-    def __init__(
-        self,
-        attribute: str,
-        *,
-        name: str = "",
-        aligns: str = "l",
-        max_width: int = None,
-        fg_color: Union[str, int] = None,
-        bg_color: Union[str, int] = None,
-        permitted_values: tuple = None,
-        value_dependent_fg_colors: dict[str, Union[str, int]] = None,
-        value_dependent_bg_colors: dict[str, Union[str, int]] = None,
-    ):
-        self.attribute = attribute
-        self.name = name
-        self.aligns = aligns
-        self.max_width = max_width
-        self.fg_color = fg_color
-        self.bg_color = bg_color
-        self.permitted_values = permitted_values
-        self.value_dependent_fg_colors = value_dependent_fg_colors
-        self.value_dependent_bg_colors = value_dependent_bg_colors
-        self.printer = wasabi.Printer(no_print=True)
-
-    def render(
-        self,
-        token: Token,
-        *,
-        ignore_colors: bool = False,
-    ) -> str:
-        obj = token
-        parts = self.attribute.split(".")
-        for part in parts[:-1]:
-            obj = getattr(obj, part)
-        value = str(getattr(obj, parts[-1]))
-        if self.permitted_values is not None and value not in (
-            str(v) for v in self.permitted_values
-        ):
-            return ""
-        if self.max_width is not None:
-            value = value[: self.max_width]
-        fg_color = (
-            self.value_dependent_fg_colors.get(value, None)
-            if not ignore_colors and self.value_dependent_fg_colors is not None
-            else None
-        )
-        bg_color = (
-            self.value_dependent_bg_colors.get(value, None)
-            if not ignore_colors and self.value_dependent_bg_colors is not None
-            else None
-        )
-        if fg_color is not None or bg_color is not None:
-            value = self.printer.text(value, color=fg_color, bg_color=bg_color)
-        return value
-
-
 SPACE = 0
 HALF_HORIZONTAL_LINE = 1  # the half is the half further away from the root
 FULL_HORIZONTAL_LINE = 3
@@ -98,6 +41,65 @@ ROOT_LEFT_CHARS = {
 }
 
 
+class AttributeFormat:
+    def __init__(
+        self,
+        attribute: str,
+        *,
+        name: str = "",
+        aligns: str = "l",
+        max_width: int = None,
+        fg_color: Union[str, int] = None,
+        bg_color: Union[str, int] = None,
+        permitted_values: tuple = None,
+        value_dependent_fg_colors: dict[str, Union[str, int]] = None,
+        value_dependent_bg_colors: dict[str, Union[str, int]] = None,
+    ):
+        self.attribute = attribute
+        self.name = name
+        self.aligns = aligns
+        self.max_width = max_width
+        self.fg_color = fg_color
+        self.bg_color = bg_color
+        self.permitted_values = permitted_values
+        self.value_dependent_fg_colors = value_dependent_fg_colors
+        self.value_dependent_bg_colors = value_dependent_bg_colors
+        self.printer = wasabi.Printer(no_print=True)
+
+    def render(
+        self,
+        token: Token,
+        *,
+        ignore_colors: bool = False,
+        render_all_colors_within_values: bool = False,
+    ) -> str:
+        obj = token
+        parts = self.attribute.split(".")
+        for part in parts[:-1]:
+            obj = getattr(obj, part)
+        value = str(getattr(obj, parts[-1]))
+        if self.permitted_values is not None and value not in (
+            str(v) for v in self.permitted_values
+        ):
+            return ""
+        if self.max_width is not None:
+            value = value[: self.max_width]
+        fg_color = None
+        bg_color = None
+        if not ignore_colors and len(value) > 0:
+            if self.value_dependent_fg_colors is not None:
+                fg_color = self.value_dependent_fg_colors.get(value, None)
+            if fg_color is None and render_all_colors_within_values:
+                fg_color = self.fg_color
+            if self.value_dependent_bg_colors is not None:
+                bg_color = self.value_dependent_bg_colors.get(value, None)
+            if bg_color is None and render_all_colors_within_values:
+                bg_color = self.bg_color
+        if fg_color is not None or bg_color is not None:
+            value = self.printer.text(value, color=fg_color, bg_color=bg_color)
+        return value
+
+
 class Visualizer:
     @staticmethod
     def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
@@ -384,10 +386,23 @@ class Visualizer:
 
     def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
         return_string = ""
+        text_attributes = [a for a in attributes if a.attribute == "text"]
+        text_attribute = (
+            text_attributes[0] if len(text_attributes) > 0 else AttributeFormat("text")
+        )
         for token in doc:
-            return_string += token.text_with_ws
-            for attribute in attributes:
-                if self.get_entity(
-                    token,
-                ):
-                    pass
+            this_token_strings = [""]
+            for attribute in (a for a in attributes if a.attribute != "text"):
+                attribute_text = attribute.render(
+                    token, render_all_colors_within_values=True
+                )
+                if attribute_text is not None and len(attribute_text) > 0:
+                    this_token_strings.append(" " + attribute_text)
+            this_token_strings[0] = (
+                token.text
+                if len(this_token_strings) == 1
+                else text_attribute.render(token, render_all_colors_within_values=True)
+            )
+            this_token_strings.append(token.whitespace_)
+            return_string += "".join(this_token_strings)
+        return return_string