Interim state

This commit is contained in:
richardpaulhudson 2023-01-26 16:39:37 +01:00
parent 37ef9550f2
commit 3002658b9c
2 changed files with 275 additions and 470 deletions

View File

@ -213,28 +213,6 @@ def test_viz_render_colors(horse_doc):
else "dep"
)
# whole row
assert (
AttributeFormat(
"dep_",
).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
if SUPPORTS_ANSI
else "dep"
)
# whole row with value dependent colors
assert (
AttributeFormat(
"dep_",
value_dep_fg_colors={"dep": 2},
value_dep_bg_colors={"dep": 11},
).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
if SUPPORTS_ANSI
else "dep"
)
# foreground only
assert (
AttributeFormat(
@ -273,12 +251,6 @@ def test_viz_render_custom_attributes(horse_doc):
AttributeFormat("._depp").render(horse_doc[2])
def test_viz_render_permitted_values(horse_doc):
attribute_format = AttributeFormat("head.i", permitted_vals=(3, 7))
vals = ["", "", "3", "", "", "7", "7", "3", ""]
assert [attribute_format.render(token) for token in horse_doc] == vals
def test_viz_minimal_render_table_one_sentence(
fully_featured_doc_one_sentence,
):
@ -293,9 +265,7 @@ def test_viz_minimal_render_table_one_sentence(
AttributeFormat("ent_type_"),
]
assert (
Visualizer()
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
.strip()
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -325,7 +295,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
# headers
formats = [
@ -338,39 +308,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"),
]
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_viz_minimal_render_table_permitted_values(
fully_featured_doc_one_sentence,
):
formats = [
AttributeFormat("tree_left"),
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_", permitted_vals=("fly", "to")),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
assert (
Visualizer()
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
.strip()
== """
> poss Sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's PART POS Poss=yes
> nsubj sister NOUN NN Number=sing
ROOT flew fly VERB VBD Tense=past|VerbForm=fin
> prep to to ADP IN
> compound Silicon PROPN NNP NounType=prop|Number=sing GPE
> pobj Valley PROPN NNP NounType=prop|Number=sing GPE
> prep via ADP IN
> pobj London PROPN NNP NounType=prop|Number=sing GPE
> punct . PUNCT . PunctType=peri
""".strip()
)
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_viz_minimal_render_table_spacing(
@ -387,9 +325,7 @@ def test_viz_minimal_render_table_spacing(
AttributeFormat("ent_type_"),
]
assert (
Visualizer()
.render_table(fully_featured_doc_one_sentence, formats, spacing=1)
.strip()
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -421,7 +357,7 @@ def test_viz_minimal_render_table_two_sentences(
assert (
Visualizer()
.render_table(fully_featured_doc_two_sentences, formats, spacing=3)
.render(fully_featured_doc_two_sentences, formats, spacing=3)
.strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
@ -465,7 +401,7 @@ def test_viz_rich_render_table_one_sentence(
),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
@ -493,7 +429,7 @@ def test_viz_rich_render_table_one_sentence(
),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
@ -520,253 +456,224 @@ def test_viz_rich_render_table_two_sentences(
value_dep_bg_colors={"PERSON": 12},
),
]
assert (
Visualizer().render_table(fully_featured_doc_two_sentences, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
print(
repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
)
target = (
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- ----- --- --------------- ---\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she PRON PRP Case=Nom|Gender \n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love VERB VBD Tense=Past|Verb \n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it PRON PRP Case=Acc|Gender \n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . PUNCT . PunctType=peri \n\n"
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
)
assert (
Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
)
== target
)
def test_viz_text_with_text_format(
def test_viz_rich_render_table_start(
fully_featured_doc_two_sentences,
):
formats = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
AttributeFormat("dep_", name="dep", fg_color=2),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_", name="lemma"),
AttributeFormat("pos_", name="pos", fg_color=100),
AttributeFormat("tag_", name="tag", fg_color=100),
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
AttributeFormat(
"ent_type_",
fg_color=50,
name="ent",
fg_color=196,
value_dep_fg_colors={"PERSON": 50},
value_dep_bg_colors={"PERSON": 12},
),
AttributeFormat(
"text",
fg_color=50,
bg_color=53,
value_dep_fg_colors={"PERSON": 50},
value_dep_bg_colors={"PERSON": 12},
),
AttributeFormat(
"lemma_", fg_color=50, bg_color=53, permitted_vals=("fly", "valley")
),
]
assert (
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
== "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it."
print(
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
)
print(
repr(
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
)
)
target = (
"\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
start_i=11,
search_attr_name="pos",
search_attr_value="VERB",
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
start_i=2,
search_attr_name="lemma",
search_attr_value="love",
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
search_attr_name="lemma",
search_attr_value="love",
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
start_i=2,
length=3,
search_attr_name="lemma",
search_attr_value="love",
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
search_attr_name="lemma_",
search_attr_value="love",
)
== target
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
search_attr_name="lemma",
search_attr_value="lovef",
)
== ""
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
search_attr_name="lemma_",
search_attr_value="lovef",
)
== ""
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
search_attr_name="lemmaa",
search_attr_value="love",
)
== ""
)
assert (
Visualizer().render(
fully_featured_doc_two_sentences,
formats,
spacing=3,
start_i=50,
search_attr_name="lemma",
search_attr_value="love",
)
== ""
)
def test_viz_render_text_without_text_format(
def test_viz_rich_render_table_end(
fully_featured_doc_two_sentences,
):
formats = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
AttributeFormat("dep_", name="dep", fg_color=2),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_", name="lemma"),
AttributeFormat("pos_", name="pos", fg_color=100),
AttributeFormat("tag_", name="tag", fg_color=100),
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
AttributeFormat(
"ent_type_",
name="ent",
fg_color=196,
value_dep_fg_colors={"PERSON": 50},
value_dep_bg_colors={"PERSON": 12},
),
AttributeFormat("lemma_", permitted_vals=("fly", "valley")),
]
assert (
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
== "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
target = (
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI
else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it."
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n"
)
def test_viz_render_instances_two_sentences(
fully_featured_doc_two_sentences,
):
# search on entity type
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
)
# search on entity type with permitted values
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
== target
)
# include surrounding tokens
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=2,
surrounding_tokens_fg_color=11,
surrounding_tokens_bg_color=None,
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;11mcase\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;11mnsubj\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n"
if SUPPORTS_ANSI
else "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \n"
== target
)
# missing permitted value
display_columns = [
AttributeFormat("dep_", name="dep"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
Visualizer().render(
fully_featured_doc_two_sentences, formats, spacing=3, length=3
)
== "\ndep text \n--- ---- \n"
== target
)
# missing permitted value, include surrounding tokens
display_columns = [
AttributeFormat("dep_", name="dep"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])]
assert (
Visualizer().render_instances(
Visualizer().render(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=False,
formats,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
search_attr_name="pos",
search_attr_value="VERB",
)
== "\ndep text \n--- ---- \n"
)
# with grouping
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=True,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
# with grouping and colors
display_columns = [
AttributeFormat("dep_", fg_color=20),
AttributeFormat("text", bg_color=30),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attrs=search_attributes,
display_cols=display_columns,
group=True,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
if SUPPORTS_ANSI
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
== target
)

View File

@ -1,3 +1,6 @@
from curses import beep
from operator import index
from re import search
from typing import Dict, List, Optional, Union, cast
import wasabi
from wasabi.util import supports_ansi
@ -66,9 +69,6 @@ class AttributeFormat:
max_width: a maximum width to which values of the attribute should be truncated.
fg_color: the foreground color that should be used to display instances of the attribute
bg_color: the background color that should be used to display instances of the attribute
permitted_vals: a tuple of values of the attribute that should be displayed. If
permitted_values is not None and a value of the attribute is not
in permitted_values, the empty string is rendered instead of the value.
value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values.
"""
@ -78,7 +78,6 @@ class AttributeFormat:
self.max_width = max_width
self.fg_color = fg_color
self.bg_color = bg_color
self.permitted_vals = permitted_vals
self.value_dep_fg_colors = value_dep_fg_colors
self.value_dep_bg_colors = value_dep_bg_colors
self.printer = wasabi.Printer(no_print=True)
@ -89,27 +88,12 @@ class AttributeFormat:
*,
right_pad_to_len: Optional[int] = None,
ignore_colors: bool = False,
render_all_colors_in_vals: bool = False,
whole_row_fg_color: Union[int, str, None] = None,
whole_row_bg_color: Union[int, str, None] = None,
) -> str:
"""
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
render_all_colors_in_vals: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi.
This argument is set to True when rendering a text to signal that colors should be rendered here.
whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors.
whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_bg_colors.
"""
obj = token
parts = self.attribute.split(".")
for part in parts[:-1]:
obj = getattr(obj, part)
value = str(getattr(obj, parts[-1]))
if self.permitted_vals is not None and value not in (
str(v) for v in self.permitted_vals
):
return ""
value = get_token_value(token, self.attribute)
if self.max_width is not None:
value = value[: self.max_width]
fg_color = None
@ -119,18 +103,10 @@ class AttributeFormat:
else:
right_padding = ""
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
if whole_row_fg_color is not None:
fg_color = whole_row_fg_color
elif self.value_dep_fg_colors is not None:
if self.value_dep_fg_colors is not None:
fg_color = self.value_dep_fg_colors.get(value, None)
if fg_color is None and render_all_colors_in_vals:
fg_color = self.fg_color
if self.value_dep_bg_colors is not None:
bg_color = self.value_dep_bg_colors.get(value, None)
if whole_row_bg_color is not None:
bg_color = whole_row_bg_color
elif bg_color is None and render_all_colors_in_vals:
bg_color = self.bg_color
if fg_color is not None or bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
return value + right_padding
@ -323,18 +299,59 @@ class Visualizer:
for vert_pos in range(sent.end - sent.start)
]
def render_table(self, doc: Doc, cols: List[AttributeFormat], spacing: int) -> str:
def render(
self,
doc: Doc,
cols: List[AttributeFormat],
spacing: int = 2,
start_i: int = 0,
length: Optional[int] = None,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
) -> str:
"""Renders a document as a table.
TODO: specify a specific portion of the document to display.
cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
"""
return_str = ""
for sent in doc.sents:
if search_attr_name is not None and search_attr_value is not None:
adj_start_i = get_adjusted_start_i(
doc, start_i, cols, search_attr_name, search_attr_value
)
else:
adj_start_i = start_i
if adj_start_i >= len(doc):
return return_str
end_i = len(doc) - 1
if length is not None:
end_i = min(end_i, adj_start_i + length)
elif start_i > 0 or (
search_attr_name is not None and search_attr_value is not None
):
end_i = adj_start_i
adj_start_i = doc[adj_start_i].sent.start
end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols):
tree_right = self.render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols):
@ -393,163 +410,44 @@ class Visualizer:
)
return return_str
def render_text(self, doc: Doc, attrs: List[AttributeFormat]) -> str:
"""Renders a text interspersed with attribute labels.
TODO: specify a specific portion of the document to display.
"""
return_str = ""
text_attrs = [a for a in attrs if a.attribute == "text"]
text_attr = text_attrs[0] if len(text_attrs) > 0 else AttributeFormat("text")
for token in doc:
this_token_strs = [""]
for attr in (a for a in attrs if a.attribute != "text"):
attr_text = attr.render(token, render_all_colors_in_vals=True)
if attr_text is not None and len(attr_text) > 0:
this_token_strs.append(" " + attr_text)
if len(this_token_strs) == 1:
this_token_strs[0] = token.text
else:
this_token_strs[0] = text_attr.render(
token, render_all_colors_in_vals=True
)
this_token_strs.append(token.whitespace_)
return_str += "".join(this_token_strs)
return return_str
def get_token_value(token: Token, attribute: str) -> str:
"""
Get value *token.x.y.z*.
token: the token
attribute: the attribute name, e.g. *x.y.z*.
"""
obj = token
parts = attribute.split(".")
for part in parts[:-1]:
obj = getattr(obj, part)
return str(getattr(obj, parts[-1]))
def render_instances(
self,
doc: Doc,
*,
search_attrs: List[AttributeFormat],
display_cols: List[AttributeFormat],
group: bool,
spacing: int,
surrounding_tokens_height: int,
surrounding_tokens_fg_color: Union[str, int],
surrounding_tokens_bg_color: Union[str, int],
) -> str:
"""Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'.
TODO: specify a specific portion of the document to display.
search_attrs: the attribute(s) or attribute value(s) that cause a row to be displayed for a token.
display_cols: the attributes that should be displayed in each row.
group: True if the rows should be ordered by the search attribute values,
False if they should retain their in-document order.
spacing: the number of spaces between each column.
surrounding_tokens_height: a number of rows that should be displayed with information about tokens
before and after matched tokens. Consecutive matching tokens, e.g.
tokens belonging to the same named entity, are rendered together as a single group.
surrounding_tokens_fg_color: a foreground color to use for surrounding token rows.
surrounding_tokens_bg_color: a background color to use for surrounding token rows.
Note that if surrounding_tokens_bg_color is None, any background color defined for the attribute
will be used instead, which is unlikely to be the desired result.
"""
def get_adjusted_start_i(
doc: Doc,
start_i: int,
cols: List[AttributeFormat],
search_attr_name: str,
search_attr_value: str,
):
"""
Get the position at which to start rendering a document, which may be
adjusted by a search for a specific attribute value.
doc: the document
start_i: the user-specified start index
cols: the list of attribute columns being displayed
search_attr_name: the name of the attribute for which values are being searched,
i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
search_attr_value: the attribute value for which to search.
def filter(token: Token) -> bool:
for attr in search_attrs:
value = attr.render(token, ignore_colors=True)
if len(value) == 0:
return False
return True
matched_tokens = [token for token in doc if filter(token)]
tokens_to_display_inds: List[int] = []
for token in matched_tokens:
for ind in range(
token.i - surrounding_tokens_height,
token.i + surrounding_tokens_height + 1,
):
if ind >= 0 and ind < len(doc):
tokens_to_display_inds.append(ind)
widths = []
for col in display_cols:
if len(tokens_to_display_inds) > 0:
width = max(
len(col.render(doc[i], ignore_colors=True))
for i in tokens_to_display_inds
)
else:
width = 0
if col.max_width is not None:
width = min(width, col.max_width)
width = max(width, len(col.name))
widths.append(width)
if group:
matched_tokens.sort(
key=(
lambda token: [
attr.render(token, ignore_colors=True) for attr in search_attrs
]
)
)
rows = []
token_ind_to_display = -1
for matched_token_ind, matched_token in enumerate(matched_tokens):
if surrounding_tokens_height > 0:
surrounding_start_ind = max(
0, matched_token.i - surrounding_tokens_height
)
if token_ind_to_display + 1 == matched_token.i:
surrounding_start_ind = token_ind_to_display + 1
surrounding_end_ind = min(
len(doc), matched_token.i + surrounding_tokens_height + 1
)
if (
matched_token_ind + 1 < len(matched_tokens)
and matched_token.i + 1 == matched_tokens[matched_token_ind + 1].i
):
surrounding_end_ind = matched_token.i + 1
else:
surrounding_start_ind = matched_token.i
surrounding_end_ind = surrounding_start_ind + 1
for token_ind_to_display in range(
surrounding_start_ind, surrounding_end_ind
):
if token_ind_to_display == matched_token.i:
rows.append(
[
col.render(
matched_token,
right_pad_to_len=widths[col_ind],
)
for col_ind, col in enumerate(display_cols)
]
)
else:
rows.append(
[
col.render(
doc[token_ind_to_display],
whole_row_fg_color=surrounding_tokens_fg_color,
whole_row_bg_color=surrounding_tokens_bg_color,
right_pad_to_len=widths[col_ind],
)
for col_ind, col in enumerate(display_cols)
]
)
if (
matched_token_ind + 1 < len(matched_tokens)
and token_ind_to_display + 1 != matched_tokens[matched_token_ind + 1].i
):
rows.append([])
header: Optional[List[str]]
if len([1 for c in display_cols if len(c.name) > 0]) > 0:
header = [c.name for c in display_cols]
else:
header = None
aligns = [c.aligns for c in display_cols]
fg_colors = [c.fg_color for c in display_cols]
bg_colors = [c.bg_color for c in display_cols]
return wasabi.table(
rows,
header=header,
divider=True,
aligns=aligns,
widths=widths,
fg_colors=fg_colors,
bg_colors=bg_colors,
spacing=spacing,
)
"""
for col in cols:
if col.name == search_attr_name or col.attribute == search_attr_name:
for token in doc[start_i:]:
if get_token_value(token, col.attribute) == search_attr_value:
return token.i
else:
return len(doc)