mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 04:10:20 +03:00
Add render_instances function
This commit is contained in:
parent
bd00611259
commit
ed788c5def
|
@ -486,6 +486,38 @@ def test_visualization_minimal_render_table_one_sentence(
|
|||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_table_empty_text_no_headers(
|
||||
en_vocab,
|
||||
):
|
||||
formats = [
|
||||
AttributeFormat("tree_left"),
|
||||
AttributeFormat("dep_"),
|
||||
AttributeFormat("text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
|
||||
|
||||
|
||||
def test_visualization_minimal_render_table_empty_text_headers(
|
||||
en_vocab,
|
||||
):
|
||||
formats = [
|
||||
AttributeFormat("tree_left", name="tree"),
|
||||
AttributeFormat("dep_"),
|
||||
AttributeFormat("text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_", name="ent"),
|
||||
]
|
||||
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
|
||||
|
||||
|
||||
def test_visualization_minimal_render_table_permitted_values(
|
||||
fully_featured_doc_one_sentence,
|
||||
):
|
||||
|
@ -640,7 +672,8 @@ def test_visualization_rich_render_table_two_sentences(
|
|||
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
||||
)
|
||||
|
||||
def test_render_text_with_text_format(
|
||||
|
||||
def test_visualization_text_with_text_format(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
formats = [
|
||||
|
@ -658,15 +691,18 @@ def test_render_text_with_text_format(
|
|||
value_dependent_bg_colors={"PERSON": 12},
|
||||
),
|
||||
AttributeFormat(
|
||||
"lemma_",
|
||||
fg_color=50,
|
||||
bg_color=53,
|
||||
permitted_values=("fly", "valley")
|
||||
"lemma_", fg_color=50, bg_color=53, permitted_values=("fly", "valley")
|
||||
),
|
||||
]
|
||||
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||
assert (
|
||||
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
|
||||
== "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it."
|
||||
if supports_ansi
|
||||
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||
)
|
||||
|
||||
def test_render_text_without_text_format(
|
||||
|
||||
def test_visualization_render_text_without_text_format(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
formats = [
|
||||
|
@ -675,9 +711,143 @@ def test_render_text_without_text_format(
|
|||
value_dependent_fg_colors={"PERSON": 50},
|
||||
value_dependent_bg_colors={"PERSON": 12},
|
||||
),
|
||||
AttributeFormat(
|
||||
"lemma_",
|
||||
permitted_values=("fly", "valley")
|
||||
),
|
||||
AttributeFormat("lemma_", permitted_values=("fly", "valley")),
|
||||
]
|
||||
assert Visualizer().render_text(fully_featured_doc_two_sentences, formats) == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." if supports_ansi else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||
assert (
|
||||
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
|
||||
== "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||
if supports_ansi
|
||||
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
display_columns = [
|
||||
AttributeFormat("dep_"),
|
||||
AttributeFormat("text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
|
||||
search_attributes = [AttributeFormat("ent_type_")]
|
||||
|
||||
assert (
|
||||
Visualizer().render_instances(
|
||||
fully_featured_doc_two_sentences,
|
||||
search_attributes=search_attributes,
|
||||
display_columns=display_columns,
|
||||
group=False,
|
||||
)
|
||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_instances_two_sentences_value_non_grouping(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
display_columns = [
|
||||
AttributeFormat("dep_"),
|
||||
AttributeFormat("text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
|
||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
|
||||
|
||||
assert (
|
||||
Visualizer().render_instances(
|
||||
fully_featured_doc_two_sentences,
|
||||
search_attributes=search_attributes,
|
||||
display_columns=display_columns,
|
||||
group=False,
|
||||
)
|
||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_instances_two_sentences_missing_value_non_grouping(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
display_columns = [
|
||||
AttributeFormat("dep_", name="dep"),
|
||||
AttributeFormat("text", name="text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
|
||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
|
||||
|
||||
assert (
|
||||
Visualizer().render_instances(
|
||||
fully_featured_doc_two_sentences,
|
||||
search_attributes=search_attributes,
|
||||
display_columns=display_columns,
|
||||
group=False,
|
||||
)
|
||||
== "\ndep text \n--- ---- \n"
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_instances_two_sentences_type_grouping(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
display_columns = [
|
||||
AttributeFormat("dep_"),
|
||||
AttributeFormat("text"),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
|
||||
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
|
||||
|
||||
assert (
|
||||
Visualizer().render_instances(
|
||||
fully_featured_doc_two_sentences,
|
||||
search_attributes=search_attributes,
|
||||
display_columns=display_columns,
|
||||
group=True,
|
||||
)
|
||||
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||
)
|
||||
|
||||
|
||||
def test_visualization_minimal_render_instances_two_sentences_type_grouping_colors(
|
||||
fully_featured_doc_two_sentences,
|
||||
):
|
||||
display_columns = [
|
||||
AttributeFormat("dep_", fg_color=20),
|
||||
AttributeFormat("text", bg_color=30),
|
||||
AttributeFormat("lemma_"),
|
||||
AttributeFormat("pos_"),
|
||||
AttributeFormat("tag_"),
|
||||
AttributeFormat("morph"),
|
||||
AttributeFormat("ent_type_"),
|
||||
]
|
||||
|
||||
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
|
||||
|
||||
assert (
|
||||
Visualizer().render_instances(
|
||||
fully_featured_doc_two_sentences,
|
||||
search_attributes=search_attributes,
|
||||
display_columns=display_columns,
|
||||
group=True,
|
||||
)
|
||||
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||
if supports_ansi
|
||||
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||
)
|
||||
|
|
|
@ -344,9 +344,12 @@ class Visualizer:
|
|||
elif column.attribute == "tree_right":
|
||||
width = len(tree_right[0])
|
||||
else:
|
||||
if len(sent) > 0:
|
||||
width = max(
|
||||
len(column.render(token, ignore_colors=True)) for token in sent
|
||||
)
|
||||
else:
|
||||
width = 0
|
||||
if column.max_width is not None:
|
||||
width = min(width, column.max_width)
|
||||
width = max(width, len(column.name))
|
||||
|
@ -406,3 +409,65 @@ class Visualizer:
|
|||
this_token_strings.append(token.whitespace_)
|
||||
return_string += "".join(this_token_strings)
|
||||
return return_string
|
||||
|
||||
def render_instances(
|
||||
self,
|
||||
doc: Doc,
|
||||
*,
|
||||
search_attributes: list[AttributeFormat],
|
||||
display_columns: list[AttributeFormat],
|
||||
group: bool,
|
||||
spacing: int = 3,
|
||||
) -> str:
|
||||
def filter(token: Token) -> bool:
|
||||
for attribute in search_attributes:
|
||||
value = attribute.render(token, ignore_colors=True)
|
||||
if len(value) == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
tokens = [token for token in doc if filter(token)]
|
||||
if group:
|
||||
tokens.sort(
|
||||
key=(
|
||||
lambda token: [attribute.render(token, ignore_colors=True)
|
||||
for attribute in search_attributes]
|
||||
)
|
||||
)
|
||||
|
||||
widths = []
|
||||
for column in display_columns:
|
||||
if len(tokens) > 0:
|
||||
width = max(
|
||||
len(column.render(token, ignore_colors=True)) for token in tokens
|
||||
)
|
||||
else:
|
||||
width = 0
|
||||
if column.max_width is not None:
|
||||
width = min(width, column.max_width)
|
||||
width = max(width, len(column.name))
|
||||
widths.append(width)
|
||||
data = [
|
||||
[
|
||||
column.render(token)
|
||||
for column_index, column in enumerate(display_columns)
|
||||
]
|
||||
for token in tokens
|
||||
]
|
||||
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
|
||||
header = [c.name for c in display_columns]
|
||||
else:
|
||||
header = None
|
||||
aligns = [c.aligns for c in display_columns]
|
||||
fg_colors = [c.fg_color for c in display_columns]
|
||||
bg_colors = [c.bg_color for c in display_columns]
|
||||
return wasabi.table(
|
||||
data,
|
||||
header=header,
|
||||
divider=True,
|
||||
aligns=aligns,
|
||||
widths=widths,
|
||||
fg_colors=fg_colors,
|
||||
bg_colors=bg_colors,
|
||||
spacing=spacing,
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user