From bd00611259e7a4639d732bd96e24d161cf066560 Mon Sep 17 00:00:00 2001 From: Richard Hudson Date: Wed, 8 Dec 2021 17:47:29 +0100 Subject: [PATCH] Add render_text --- spacy/tests/conftest.py | 2 +- spacy/tests/test_visualization.py | 42 +++++++++ spacy/visualization.py | 141 +++++++++++++++++------------- 3 files changed, 121 insertions(+), 64 deletions(-) diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 0cd2727b7..c021788b9 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -549,7 +549,7 @@ def fully_featured_doc_two_sentences(en_vocab): "it", "." ] - spaces = [False, True, True, True, True, True, True, True, False, False, True, True, False, False] + spaces = [False, True, True, True, True, True, True, True, False, True, True, True, False, False] pos = [ "PROPN", "PART", diff --git a/spacy/tests/test_visualization.py b/spacy/tests/test_visualization.py index c5366c7c6..5908dbefd 100644 --- a/spacy/tests/test_visualization.py +++ b/spacy/tests/test_visualization.py @@ -639,3 +639,45 @@ def test_visualization_rich_render_table_two_sentences( if supports_ansi else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" ) + +def test_render_text_with_text_format( + fully_featured_doc_two_sentences, +): + formats = [ + AttributeFormat( + "ent_type_", + fg_color=50, + value_dependent_fg_colors={"PERSON": 50}, + value_dependent_bg_colors={"PERSON": 12}, + ), + AttributeFormat( + "text", + fg_color=50, + bg_color=53, + value_dependent_fg_colors={"PERSON": 50}, + value_dependent_bg_colors={"PERSON": 12}, + ), + AttributeFormat( + "lemma_", + fg_color=50, + bg_color=53, + permitted_values=("fly", "valley") + ), + ] + assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." + +def test_render_text_without_text_format( + fully_featured_doc_two_sentences, +): + formats = [ + AttributeFormat( + "ent_type_", + value_dependent_fg_colors={"PERSON": 50}, + value_dependent_bg_colors={"PERSON": 12}, + ), + AttributeFormat( + "lemma_", + permitted_values=("fly", "valley") + ), + ] + assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." \ No newline at end of file diff --git a/spacy/visualization.py b/spacy/visualization.py index e0796c8b3..c940f4f76 100644 --- a/spacy/visualization.py +++ b/spacy/visualization.py @@ -6,63 +6,6 @@ from spacy.tokens import Span, Token, Doc from spacy.util import working_dir -class AttributeFormat: - def __init__( - self, - attribute: str, - *, - name: str = "", - aligns: str = "l", - max_width: int = None, - fg_color: Union[str, int] = None, - bg_color: Union[str, int] = None, - permitted_values: tuple = None, - value_dependent_fg_colors: dict[str, Union[str, int]] = None, - value_dependent_bg_colors: dict[str, Union[str, int]] = None, - ): - self.attribute = attribute - self.name = name - self.aligns = aligns - self.max_width = max_width - self.fg_color = fg_color - self.bg_color = bg_color - self.permitted_values = permitted_values - self.value_dependent_fg_colors = value_dependent_fg_colors - self.value_dependent_bg_colors = value_dependent_bg_colors - self.printer = wasabi.Printer(no_print=True) - - def render( - self, - token: Token, - *, - ignore_colors: bool = False, - ) -> str: - obj = token - parts = self.attribute.split(".") - for part in parts[:-1]: - obj = getattr(obj, part) - value = str(getattr(obj, parts[-1])) - if self.permitted_values is not None and value not in ( - str(v) for v in self.permitted_values - ): - return "" - if self.max_width is not None: - value = value[: self.max_width] - fg_color = ( - self.value_dependent_fg_colors.get(value, None) - if not ignore_colors and self.value_dependent_fg_colors is not None - else None - ) - bg_color = ( - self.value_dependent_bg_colors.get(value, None) - if not ignore_colors and self.value_dependent_bg_colors is not None - else None - ) - if fg_color is not None or bg_color is not None: - value = self.printer.text(value, color=fg_color, bg_color=bg_color) - return value - - SPACE = 0 HALF_HORIZONTAL_LINE = 1 # the half is the half further away from the root FULL_HORIZONTAL_LINE = 3 @@ -98,6 +41,65 @@ ROOT_LEFT_CHARS = { } +class AttributeFormat: + def __init__( + self, + attribute: str, + *, + name: str = "", + aligns: str = "l", + max_width: int = None, + fg_color: Union[str, int] = None, + bg_color: Union[str, int] = None, + permitted_values: tuple = None, + value_dependent_fg_colors: dict[str, Union[str, int]] = None, + value_dependent_bg_colors: dict[str, Union[str, int]] = None, + ): + self.attribute = attribute + self.name = name + self.aligns = aligns + self.max_width = max_width + self.fg_color = fg_color + self.bg_color = bg_color + self.permitted_values = permitted_values + self.value_dependent_fg_colors = value_dependent_fg_colors + self.value_dependent_bg_colors = value_dependent_bg_colors + self.printer = wasabi.Printer(no_print=True) + + def render( + self, + token: Token, + *, + ignore_colors: bool = False, + render_all_colors_within_values: bool = False, + ) -> str: + obj = token + parts = self.attribute.split(".") + for part in parts[:-1]: + obj = getattr(obj, part) + value = str(getattr(obj, parts[-1])) + if self.permitted_values is not None and value not in ( + str(v) for v in self.permitted_values + ): + return "" + if self.max_width is not None: + value = value[: self.max_width] + fg_color = None + bg_color = None + if not ignore_colors and len(value) > 0: + if self.value_dependent_fg_colors is not None: + fg_color = self.value_dependent_fg_colors.get(value, None) + if fg_color is None and render_all_colors_within_values: + fg_color = self.fg_color + if self.value_dependent_bg_colors is not None: + bg_color = self.value_dependent_bg_colors.get(value, None) + if bg_color is None and render_all_colors_within_values: + bg_color = self.bg_color + if fg_color is not None or bg_color is not None: + value = self.printer.text(value, color=fg_color, bg_color=bg_color) + return value + + class Visualizer: @staticmethod def render_dependency_tree(sent: Span, root_right: bool) -> list[str]: @@ -384,10 +386,23 @@ class Visualizer: def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str: return_string = "" + text_attributes = [a for a in attributes if a.attribute == "text"] + text_attribute = ( + text_attributes[0] if len(text_attributes) > 0 else AttributeFormat("text") + ) for token in doc: - return_string += token.text_with_ws - for attribute in attributes: - if self.get_entity( - token, - ): - pass + this_token_strings = [""] + for attribute in (a for a in attributes if a.attribute != "text"): + attribute_text = attribute.render( + token, render_all_colors_within_values=True + ) + if attribute_text is not None and len(attribute_text) > 0: + this_token_strings.append(" " + attribute_text) + this_token_strings[0] = ( + token.text + if len(this_token_strings) == 1 + else text_attribute.render(token, render_all_colors_within_values=True) + ) + this_token_strings.append(token.whitespace_) + return_string += "".join(this_token_strings) + return return_string