mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Add render_text
This commit is contained in:
parent
49f3fd39b9
commit
bd00611259
|
@ -549,7 +549,7 @@ def fully_featured_doc_two_sentences(en_vocab):
|
||||||
"it",
|
"it",
|
||||||
"."
|
"."
|
||||||
]
|
]
|
||||||
spaces = [False, True, True, True, True, True, True, True, False, False, True, True, False, False]
|
spaces = [False, True, True, True, True, True, True, True, False, True, True, True, False, False]
|
||||||
pos = [
|
pos = [
|
||||||
"PROPN",
|
"PROPN",
|
||||||
"PART",
|
"PART",
|
||||||
|
|
|
@ -639,3 +639,45 @@ def test_visualization_rich_render_table_two_sentences(
|
||||||
if supports_ansi
|
if supports_ansi
|
||||||
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_render_text_with_text_format(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat(
|
||||||
|
"ent_type_",
|
||||||
|
fg_color=50,
|
||||||
|
value_dependent_fg_colors={"PERSON": 50},
|
||||||
|
value_dependent_bg_colors={"PERSON": 12},
|
||||||
|
),
|
||||||
|
AttributeFormat(
|
||||||
|
"text",
|
||||||
|
fg_color=50,
|
||||||
|
bg_color=53,
|
||||||
|
value_dependent_fg_colors={"PERSON": 50},
|
||||||
|
value_dependent_bg_colors={"PERSON": 12},
|
||||||
|
),
|
||||||
|
AttributeFormat(
|
||||||
|
"lemma_",
|
||||||
|
fg_color=50,
|
||||||
|
bg_color=53,
|
||||||
|
permitted_values=("fly", "valley")
|
||||||
|
),
|
||||||
|
]
|
||||||
|
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||||
|
|
||||||
|
def test_render_text_without_text_format(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat(
|
||||||
|
"ent_type_",
|
||||||
|
value_dependent_fg_colors={"PERSON": 50},
|
||||||
|
value_dependent_bg_colors={"PERSON": 12},
|
||||||
|
),
|
||||||
|
AttributeFormat(
|
||||||
|
"lemma_",
|
||||||
|
permitted_values=("fly", "valley")
|
||||||
|
),
|
||||||
|
]
|
||||||
|
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
|
@ -6,63 +6,6 @@ from spacy.tokens import Span, Token, Doc
|
||||||
from spacy.util import working_dir
|
from spacy.util import working_dir
|
||||||
|
|
||||||
|
|
||||||
class AttributeFormat:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
attribute: str,
|
|
||||||
*,
|
|
||||||
name: str = "",
|
|
||||||
aligns: str = "l",
|
|
||||||
max_width: int = None,
|
|
||||||
fg_color: Union[str, int] = None,
|
|
||||||
bg_color: Union[str, int] = None,
|
|
||||||
permitted_values: tuple = None,
|
|
||||||
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
|
|
||||||
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
|
|
||||||
):
|
|
||||||
self.attribute = attribute
|
|
||||||
self.name = name
|
|
||||||
self.aligns = aligns
|
|
||||||
self.max_width = max_width
|
|
||||||
self.fg_color = fg_color
|
|
||||||
self.bg_color = bg_color
|
|
||||||
self.permitted_values = permitted_values
|
|
||||||
self.value_dependent_fg_colors = value_dependent_fg_colors
|
|
||||||
self.value_dependent_bg_colors = value_dependent_bg_colors
|
|
||||||
self.printer = wasabi.Printer(no_print=True)
|
|
||||||
|
|
||||||
def render(
|
|
||||||
self,
|
|
||||||
token: Token,
|
|
||||||
*,
|
|
||||||
ignore_colors: bool = False,
|
|
||||||
) -> str:
|
|
||||||
obj = token
|
|
||||||
parts = self.attribute.split(".")
|
|
||||||
for part in parts[:-1]:
|
|
||||||
obj = getattr(obj, part)
|
|
||||||
value = str(getattr(obj, parts[-1]))
|
|
||||||
if self.permitted_values is not None and value not in (
|
|
||||||
str(v) for v in self.permitted_values
|
|
||||||
):
|
|
||||||
return ""
|
|
||||||
if self.max_width is not None:
|
|
||||||
value = value[: self.max_width]
|
|
||||||
fg_color = (
|
|
||||||
self.value_dependent_fg_colors.get(value, None)
|
|
||||||
if not ignore_colors and self.value_dependent_fg_colors is not None
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
bg_color = (
|
|
||||||
self.value_dependent_bg_colors.get(value, None)
|
|
||||||
if not ignore_colors and self.value_dependent_bg_colors is not None
|
|
||||||
else None
|
|
||||||
)
|
|
||||||
if fg_color is not None or bg_color is not None:
|
|
||||||
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
SPACE = 0
|
SPACE = 0
|
||||||
HALF_HORIZONTAL_LINE = 1 # the half is the half further away from the root
|
HALF_HORIZONTAL_LINE = 1 # the half is the half further away from the root
|
||||||
FULL_HORIZONTAL_LINE = 3
|
FULL_HORIZONTAL_LINE = 3
|
||||||
|
@ -98,6 +41,65 @@ ROOT_LEFT_CHARS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class AttributeFormat:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
attribute: str,
|
||||||
|
*,
|
||||||
|
name: str = "",
|
||||||
|
aligns: str = "l",
|
||||||
|
max_width: int = None,
|
||||||
|
fg_color: Union[str, int] = None,
|
||||||
|
bg_color: Union[str, int] = None,
|
||||||
|
permitted_values: tuple = None,
|
||||||
|
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
|
||||||
|
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
|
||||||
|
):
|
||||||
|
self.attribute = attribute
|
||||||
|
self.name = name
|
||||||
|
self.aligns = aligns
|
||||||
|
self.max_width = max_width
|
||||||
|
self.fg_color = fg_color
|
||||||
|
self.bg_color = bg_color
|
||||||
|
self.permitted_values = permitted_values
|
||||||
|
self.value_dependent_fg_colors = value_dependent_fg_colors
|
||||||
|
self.value_dependent_bg_colors = value_dependent_bg_colors
|
||||||
|
self.printer = wasabi.Printer(no_print=True)
|
||||||
|
|
||||||
|
def render(
|
||||||
|
self,
|
||||||
|
token: Token,
|
||||||
|
*,
|
||||||
|
ignore_colors: bool = False,
|
||||||
|
render_all_colors_within_values: bool = False,
|
||||||
|
) -> str:
|
||||||
|
obj = token
|
||||||
|
parts = self.attribute.split(".")
|
||||||
|
for part in parts[:-1]:
|
||||||
|
obj = getattr(obj, part)
|
||||||
|
value = str(getattr(obj, parts[-1]))
|
||||||
|
if self.permitted_values is not None and value not in (
|
||||||
|
str(v) for v in self.permitted_values
|
||||||
|
):
|
||||||
|
return ""
|
||||||
|
if self.max_width is not None:
|
||||||
|
value = value[: self.max_width]
|
||||||
|
fg_color = None
|
||||||
|
bg_color = None
|
||||||
|
if not ignore_colors and len(value) > 0:
|
||||||
|
if self.value_dependent_fg_colors is not None:
|
||||||
|
fg_color = self.value_dependent_fg_colors.get(value, None)
|
||||||
|
if fg_color is None and render_all_colors_within_values:
|
||||||
|
fg_color = self.fg_color
|
||||||
|
if self.value_dependent_bg_colors is not None:
|
||||||
|
bg_color = self.value_dependent_bg_colors.get(value, None)
|
||||||
|
if bg_color is None and render_all_colors_within_values:
|
||||||
|
bg_color = self.bg_color
|
||||||
|
if fg_color is not None or bg_color is not None:
|
||||||
|
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class Visualizer:
|
class Visualizer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
|
def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
|
||||||
|
@ -384,10 +386,23 @@ class Visualizer:
|
||||||
|
|
||||||
def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
|
def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
|
||||||
return_string = ""
|
return_string = ""
|
||||||
|
text_attributes = [a for a in attributes if a.attribute == "text"]
|
||||||
|
text_attribute = (
|
||||||
|
text_attributes[0] if len(text_attributes) > 0 else AttributeFormat("text")
|
||||||
|
)
|
||||||
for token in doc:
|
for token in doc:
|
||||||
return_string += token.text_with_ws
|
this_token_strings = [""]
|
||||||
for attribute in attributes:
|
for attribute in (a for a in attributes if a.attribute != "text"):
|
||||||
if self.get_entity(
|
attribute_text = attribute.render(
|
||||||
token,
|
token, render_all_colors_within_values=True
|
||||||
):
|
)
|
||||||
pass
|
if attribute_text is not None and len(attribute_text) > 0:
|
||||||
|
this_token_strings.append(" " + attribute_text)
|
||||||
|
this_token_strings[0] = (
|
||||||
|
token.text
|
||||||
|
if len(this_token_strings) == 1
|
||||||
|
else text_attribute.render(token, render_all_colors_within_values=True)
|
||||||
|
)
|
||||||
|
this_token_strings.append(token.whitespace_)
|
||||||
|
return_string += "".join(this_token_strings)
|
||||||
|
return return_string
|
||||||
|
|
Loading…
Reference in New Issue
Block a user