Add render_instances function

This commit is contained in:
Richard Hudson 2021-12-08 19:24:32 +01:00
parent bd00611259
commit ed788c5def
2 changed files with 250 additions and 15 deletions

View File

@ -486,6 +486,38 @@ def test_visualization_minimal_render_table_one_sentence(
) )
def test_visualization_minimal_render_table_empty_text_no_headers(
en_vocab,
):
formats = [
AttributeFormat("tree_left"),
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
def test_visualization_minimal_render_table_empty_text_headers(
en_vocab,
):
formats = [
AttributeFormat("tree_left", name="tree"),
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"),
]
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
def test_visualization_minimal_render_table_permitted_values( def test_visualization_minimal_render_table_permitted_values(
fully_featured_doc_one_sentence, fully_featured_doc_one_sentence,
): ):
@ -640,7 +672,8 @@ def test_visualization_rich_render_table_two_sentences(
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
) )
def test_render_text_with_text_format(
def test_visualization_text_with_text_format(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
): ):
formats = [ formats = [
@ -658,15 +691,18 @@ def test_render_text_with_text_format(
value_dependent_bg_colors={"PERSON": 12}, value_dependent_bg_colors={"PERSON": 12},
), ),
AttributeFormat( AttributeFormat(
"lemma_", "lemma_", fg_color=50, bg_color=53, permitted_values=("fly", "valley")
fg_color=50,
bg_color=53,
permitted_values=("fly", "valley")
), ),
] ]
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." assert (
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
== "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it."
if supports_ansi
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
)
def test_render_text_without_text_format(
def test_visualization_render_text_without_text_format(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
): ):
formats = [ formats = [
@ -675,9 +711,143 @@ def test_render_text_without_text_format(
value_dependent_fg_colors={"PERSON": 50}, value_dependent_fg_colors={"PERSON": 50},
value_dependent_bg_colors={"PERSON": 12}, value_dependent_bg_colors={"PERSON": 12},
), ),
AttributeFormat( AttributeFormat("lemma_", permitted_values=("fly", "valley")),
"lemma_",
permitted_values=("fly", "valley")
),
] ]
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." assert (
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
== "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
if supports_ansi
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
)
def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
)
def test_visualization_minimal_render_instances_two_sentences_value_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
def test_visualization_minimal_render_instances_two_sentences_missing_value_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_", name="dep"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
)
== "\ndep text \n--- ---- \n"
)
def test_visualization_minimal_render_instances_two_sentences_type_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=True,
)
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
def test_visualization_minimal_render_instances_two_sentences_type_grouping_colors(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_", fg_color=20),
AttributeFormat("text", bg_color=30),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=True,
)
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
if supports_ansi
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)

View File

@ -344,9 +344,12 @@ class Visualizer:
elif column.attribute == "tree_right": elif column.attribute == "tree_right":
width = len(tree_right[0]) width = len(tree_right[0])
else: else:
width = max( if len(sent) > 0:
len(column.render(token, ignore_colors=True)) for token in sent width = max(
) len(column.render(token, ignore_colors=True)) for token in sent
)
else:
width = 0
if column.max_width is not None: if column.max_width is not None:
width = min(width, column.max_width) width = min(width, column.max_width)
width = max(width, len(column.name)) width = max(width, len(column.name))
@ -406,3 +409,65 @@ class Visualizer:
this_token_strings.append(token.whitespace_) this_token_strings.append(token.whitespace_)
return_string += "".join(this_token_strings) return_string += "".join(this_token_strings)
return return_string return return_string
def render_instances(
self,
doc: Doc,
*,
search_attributes: list[AttributeFormat],
display_columns: list[AttributeFormat],
group: bool,
spacing: int = 3,
) -> str:
def filter(token: Token) -> bool:
for attribute in search_attributes:
value = attribute.render(token, ignore_colors=True)
if len(value) == 0:
return False
return True
tokens = [token for token in doc if filter(token)]
if group:
tokens.sort(
key=(
lambda token: [attribute.render(token, ignore_colors=True)
for attribute in search_attributes]
)
)
widths = []
for column in display_columns:
if len(tokens) > 0:
width = max(
len(column.render(token, ignore_colors=True)) for token in tokens
)
else:
width = 0
if column.max_width is not None:
width = min(width, column.max_width)
width = max(width, len(column.name))
widths.append(width)
data = [
[
column.render(token)
for column_index, column in enumerate(display_columns)
]
for token in tokens
]
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
header = [c.name for c in display_columns]
else:
header = None
aligns = [c.aligns for c in display_columns]
fg_colors = [c.fg_color for c in display_columns]
bg_colors = [c.bg_color for c in display_columns]
return wasabi.table(
data,
header=header,
divider=True,
aligns=aligns,
widths=widths,
fg_colors=fg_colors,
bg_colors=bg_colors,
spacing=spacing,
)