From 3002658b9c1661cc7ed112c46801b5466bc7e57e Mon Sep 17 00:00:00 2001 From: richardpaulhudson Date: Thu, 26 Jan 2023 16:39:37 +0100 Subject: [PATCH] Interim state --- spacy/tests/test_visualization.py | 463 ++++++++++++------------------ spacy/visualization.py | 282 ++++++------------ 2 files changed, 275 insertions(+), 470 deletions(-) diff --git a/spacy/tests/test_visualization.py b/spacy/tests/test_visualization.py index 6010a9e9a..7ce37e894 100644 --- a/spacy/tests/test_visualization.py +++ b/spacy/tests/test_visualization.py @@ -213,28 +213,6 @@ def test_viz_render_colors(horse_doc): else "dep" ) - # whole row - assert ( - AttributeFormat( - "dep_", - ).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9) - == "\x1b[38;5;8;48;5;9mdep\x1b[0m" - if SUPPORTS_ANSI - else "dep" - ) - - # whole row with value dependent colors - assert ( - AttributeFormat( - "dep_", - value_dep_fg_colors={"dep": 2}, - value_dep_bg_colors={"dep": 11}, - ).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9) - == "\x1b[38;5;8;48;5;9mdep\x1b[0m" - if SUPPORTS_ANSI - else "dep" - ) - # foreground only assert ( AttributeFormat( @@ -273,12 +251,6 @@ def test_viz_render_custom_attributes(horse_doc): AttributeFormat("._depp").render(horse_doc[2]) -def test_viz_render_permitted_values(horse_doc): - attribute_format = AttributeFormat("head.i", permitted_vals=(3, 7)) - vals = ["", "", "3", "", "", "7", "7", "3", ""] - assert [attribute_format.render(token) for token in horse_doc] == vals - - def test_viz_minimal_render_table_one_sentence( fully_featured_doc_one_sentence, ): @@ -293,9 +265,7 @@ def test_viz_minimal_render_table_one_sentence( AttributeFormat("ent_type_"), ] assert ( - Visualizer() - .render_table(fully_featured_doc_one_sentence, formats, spacing=3) - .strip() + Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip() == """ ╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON ║ ╚> case 's 's PART POS Poss=yes @@ -325,7 +295,7 @@ def test_viz_minimal_render_table_empty_text( AttributeFormat("morph"), AttributeFormat("ent_type_"), ] - assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == "" + assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" # headers formats = [ @@ -338,39 +308,7 @@ def test_viz_minimal_render_table_empty_text( AttributeFormat("morph"), AttributeFormat("ent_type_", name="ent"), ] - assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == "" - - -def test_viz_minimal_render_table_permitted_values( - fully_featured_doc_one_sentence, -): - formats = [ - AttributeFormat("tree_left"), - AttributeFormat("dep_"), - AttributeFormat("text"), - AttributeFormat("lemma_", permitted_vals=("fly", "to")), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - assert ( - Visualizer() - .render_table(fully_featured_doc_one_sentence, formats, spacing=3) - .strip() - == """ - ╔>╔═ poss Sarah PROPN NNP NounType=prop|Number=sing PERSON - ║ ╚> case 's PART POS Poss=yes -╔>╚═══ nsubj sister NOUN NN Number=sing -╠═════ ROOT flew fly VERB VBD Tense=past|VerbForm=fin -╠>╔═══ prep to to ADP IN -║ ║ ╔> compound Silicon PROPN NNP NounType=prop|Number=sing GPE -║ ╚>╚═ pobj Valley PROPN NNP NounType=prop|Number=sing GPE -╠══>╔═ prep via ADP IN -║ ╚> pobj London PROPN NNP NounType=prop|Number=sing GPE -╚════> punct . PUNCT . PunctType=peri - """.strip() - ) + assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" def test_viz_minimal_render_table_spacing( @@ -387,9 +325,7 @@ def test_viz_minimal_render_table_spacing( AttributeFormat("ent_type_"), ] assert ( - Visualizer() - .render_table(fully_featured_doc_one_sentence, formats, spacing=1) - .strip() + Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip() == """ ╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON ║ ╚> case 's 's PART POS Poss=yes @@ -421,7 +357,7 @@ def test_viz_minimal_render_table_two_sentences( assert ( Visualizer() - .render_table(fully_featured_doc_two_sentences, formats, spacing=3) + .render(fully_featured_doc_two_sentences, formats, spacing=3) .strip() == """ ╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON @@ -465,7 +401,7 @@ def test_viz_rich_render_table_one_sentence( ), ] assert ( - Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3) + Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if SUPPORTS_ANSI else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n" @@ -493,7 +429,7 @@ def test_viz_rich_render_table_one_sentence( ), ] assert ( - Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3) + Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" if SUPPORTS_ANSI else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" @@ -520,253 +456,224 @@ def test_viz_rich_render_table_two_sentences( value_dep_bg_colors={"PERSON": 12}, ), ] - assert ( - Visualizer().render_table(fully_featured_doc_two_sentences, formats, spacing=3) - == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" + print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) + print( + repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) + ) + target = ( + "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if SUPPORTS_ANSI - else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- ----- --- --------------- ---\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she PRON PRP Case=Nom|Gender \n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love VERB VBD Tense=Past|Verb \n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it PRON PRP Case=Acc|Gender \n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . PUNCT . PunctType=peri \n\n" + else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" + ) + assert ( + Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300 + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9 + ) + == target ) -def test_viz_text_with_text_format( +def test_viz_rich_render_table_start( fully_featured_doc_two_sentences, ): formats = [ + AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2), + AttributeFormat("dep_", name="dep", fg_color=2), + AttributeFormat("i", name="index", aligns="r"), + AttributeFormat("text", name="text"), + AttributeFormat("lemma_", name="lemma"), + AttributeFormat("pos_", name="pos", fg_color=100), + AttributeFormat("tag_", name="tag", fg_color=100), + AttributeFormat("morph", name="morph", fg_color=100, max_width=15), AttributeFormat( "ent_type_", - fg_color=50, + name="ent", + fg_color=196, value_dep_fg_colors={"PERSON": 50}, value_dep_bg_colors={"PERSON": 12}, ), - AttributeFormat( - "text", - fg_color=50, - bg_color=53, - value_dep_fg_colors={"PERSON": 50}, - value_dep_bg_colors={"PERSON": 12}, - ), - AttributeFormat( - "lemma_", fg_color=50, bg_color=53, permitted_vals=("fly", "valley") - ), ] - assert ( - Visualizer().render_text(fully_featured_doc_two_sentences, formats) - == "\x1b[38;5;50;48;5;53mSarah\x1b[0m \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister \x1b[38;5;50;48;5;53mflew\x1b[0m \x1b[38;5;50;48;5;53mfly\x1b[0m to \x1b[38;5;50;48;5;53mSilicon\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mValley\x1b[0m \x1b[38;5;50mGPE\x1b[0m \x1b[38;5;50;48;5;53mvalley\x1b[0m via \x1b[38;5;50;48;5;53mLondon\x1b[0m \x1b[38;5;50mGPE\x1b[0m. She loved it." + print( + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 + ) + ) + print( + repr( + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 + ) + ) + ) + target = ( + "\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if SUPPORTS_ANSI - else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." + else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + start_i=11, + search_attr_name="pos", + search_attr_value="VERB", + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + start_i=2, + search_attr_name="lemma", + search_attr_value="love", + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + search_attr_name="lemma", + search_attr_value="love", + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + start_i=2, + length=3, + search_attr_name="lemma", + search_attr_value="love", + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + search_attr_name="lemma_", + search_attr_value="love", + ) + == target + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + search_attr_name="lemma", + search_attr_value="lovef", + ) + == "" + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + search_attr_name="lemma_", + search_attr_value="lovef", + ) + == "" + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + search_attr_name="lemmaa", + search_attr_value="love", + ) + == "" + ) + assert ( + Visualizer().render( + fully_featured_doc_two_sentences, + formats, + spacing=3, + start_i=50, + search_attr_name="lemma", + search_attr_value="love", + ) + == "" ) -def test_viz_render_text_without_text_format( +def test_viz_rich_render_table_end( fully_featured_doc_two_sentences, ): formats = [ + AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2), + AttributeFormat("dep_", name="dep", fg_color=2), + AttributeFormat("i", name="index", aligns="r"), + AttributeFormat("text", name="text"), + AttributeFormat("lemma_", name="lemma"), + AttributeFormat("pos_", name="pos", fg_color=100), + AttributeFormat("tag_", name="tag", fg_color=100), + AttributeFormat("morph", name="morph", fg_color=100, max_width=15), AttributeFormat( "ent_type_", + name="ent", + fg_color=196, value_dep_fg_colors={"PERSON": 50}, value_dep_bg_colors={"PERSON": 12}, ), - AttributeFormat("lemma_", permitted_vals=("fly", "valley")), ] - assert ( - Visualizer().render_text(fully_featured_doc_two_sentences, formats) - == "Sarah \x1b[38;5;50;48;5;12mPERSON\x1b[0m's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." + target = ( + "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if SUPPORTS_ANSI - else "Sarah PERSON's sister flew fly to Silicon GPE Valley GPE valley via London GPE. She loved it." + else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n" ) - -def test_viz_render_instances_two_sentences( - fully_featured_doc_two_sentences, -): - # search on entity type - display_columns = [ - AttributeFormat("dep_"), - AttributeFormat("text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_")] - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=False, - spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, - ) - == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n" - ) - - # search on entity type with permitted values - display_columns = [ - AttributeFormat("dep_"), - AttributeFormat("text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])] - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=False, - spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=2 ) - == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + == target ) - - # include surrounding tokens - display_columns = [ - AttributeFormat("dep_"), - AttributeFormat("text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])] - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=False, - spacing=3, - surrounding_tokens_height=2, - surrounding_tokens_fg_color=11, - surrounding_tokens_bg_color=None, + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3 ) - == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;11mcase\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;11mnsubj\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n" - if SUPPORTS_ANSI - else "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \n" + == target ) - - # missing permitted value - display_columns = [ - AttributeFormat("dep_", name="dep"), - AttributeFormat("text", name="text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])] - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=False, - spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, + Visualizer().render( + fully_featured_doc_two_sentences, formats, spacing=3, length=3 ) - == "\ndep text \n--- ---- \n" + == target ) - - # missing permitted value, include surrounding tokens - display_columns = [ - AttributeFormat("dep_", name="dep"), - AttributeFormat("text", name="text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])] - assert ( - Visualizer().render_instances( + Visualizer().render( fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=False, + formats, spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, + search_attr_name="pos", + search_attr_value="VERB", ) - == "\ndep text \n--- ---- \n" - ) - - # with grouping - display_columns = [ - AttributeFormat("dep_"), - AttributeFormat("text"), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")] - - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=True, - spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, - ) - == "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" - ) - - # with grouping and colors - display_columns = [ - AttributeFormat("dep_", fg_color=20), - AttributeFormat("text", bg_color=30), - AttributeFormat("lemma_"), - AttributeFormat("pos_"), - AttributeFormat("tag_"), - AttributeFormat("morph"), - AttributeFormat("ent_type_"), - ] - - search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")] - - assert ( - Visualizer().render_instances( - fully_featured_doc_two_sentences, - search_attrs=search_attributes, - display_cols=display_columns, - group=True, - spacing=3, - surrounding_tokens_height=0, - surrounding_tokens_fg_color=None, - surrounding_tokens_bg_color=None, - ) - == "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n" - if SUPPORTS_ANSI - else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + == target ) diff --git a/spacy/visualization.py b/spacy/visualization.py index ac0de959a..75954a8e2 100644 --- a/spacy/visualization.py +++ b/spacy/visualization.py @@ -1,3 +1,6 @@ +from curses import beep +from operator import index +from re import search from typing import Dict, List, Optional, Union, cast import wasabi from wasabi.util import supports_ansi @@ -66,9 +69,6 @@ class AttributeFormat: max_width: a maximum width to which values of the attribute should be truncated. fg_color: the foreground color that should be used to display instances of the attribute bg_color: the background color that should be used to display instances of the attribute - permitted_vals: a tuple of values of the attribute that should be displayed. If - permitted_values is not None and a value of the attribute is not - in permitted_values, the empty string is rendered instead of the value. value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values. value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values. """ @@ -78,7 +78,6 @@ class AttributeFormat: self.max_width = max_width self.fg_color = fg_color self.bg_color = bg_color - self.permitted_vals = permitted_vals self.value_dep_fg_colors = value_dep_fg_colors self.value_dep_bg_colors = value_dep_bg_colors self.printer = wasabi.Printer(no_print=True) @@ -89,27 +88,12 @@ class AttributeFormat: *, right_pad_to_len: Optional[int] = None, ignore_colors: bool = False, - render_all_colors_in_vals: bool = False, - whole_row_fg_color: Union[int, str, None] = None, - whole_row_bg_color: Union[int, str, None] = None, ) -> str: """ right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding. ignore_colors: no colors should be rendered, typically because the values are required to calculate widths - render_all_colors_in_vals: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi. - This argument is set to True when rendering a text to signal that colors should be rendered here. - whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors. - whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_bg_colors. """ - obj = token - parts = self.attribute.split(".") - for part in parts[:-1]: - obj = getattr(obj, part) - value = str(getattr(obj, parts[-1])) - if self.permitted_vals is not None and value not in ( - str(v) for v in self.permitted_vals - ): - return "" + value = get_token_value(token, self.attribute) if self.max_width is not None: value = value[: self.max_width] fg_color = None @@ -119,18 +103,10 @@ class AttributeFormat: else: right_padding = "" if SUPPORTS_ANSI and not ignore_colors and len(value) > 0: - if whole_row_fg_color is not None: - fg_color = whole_row_fg_color - elif self.value_dep_fg_colors is not None: + if self.value_dep_fg_colors is not None: fg_color = self.value_dep_fg_colors.get(value, None) - if fg_color is None and render_all_colors_in_vals: - fg_color = self.fg_color if self.value_dep_bg_colors is not None: bg_color = self.value_dep_bg_colors.get(value, None) - if whole_row_bg_color is not None: - bg_color = whole_row_bg_color - elif bg_color is None and render_all_colors_in_vals: - bg_color = self.bg_color if fg_color is not None or bg_color is not None: value = self.printer.text(value, color=fg_color, bg_color=bg_color) return value + right_padding @@ -323,18 +299,59 @@ class Visualizer: for vert_pos in range(sent.end - sent.start) ] - def render_table(self, doc: Doc, cols: List[AttributeFormat], spacing: int) -> str: + def render( + self, + doc: Doc, + cols: List[AttributeFormat], + spacing: int = 2, + start_i: int = 0, + length: Optional[int] = None, + search_attr_name: Optional[str] = None, + search_attr_value: Optional[str] = None, + ) -> str: """Renders a document as a table. TODO: specify a specific portion of the document to display. - cols: the attribute formats of the columns to display. - tree_right and tree_left are magic values for the - attributes that render dependency trees where the - roots are on the left or right respectively. - spacing: the number of spaces between each column in the table. + cols: the attribute formats of the columns to display. + tree_right and tree_left are magic values for the + attributes that render dependency trees where the + roots are on the left or right respectively. + spacing: the number of spaces between each column in the table. + start_i: the token index at which to start searching, or at + whose sentence to start rendering. Default: 0. + length: the number of tokens after *start_i* at whose sentence + to stop rendering. If *None*, the rest of the + document is rendered. + search_attr_name: the name of an attribute to search for in order to + determine where to start rendering, e.g. "lemma_", + or *None* if no search is to be carried out. If either + of *search_attr_name* and *search_attr_value* is *None*, + the behaviour is as if both were *None*. + search_attr_value: the value of an attribute to search for in order to + determine where to start rendering, e.g. "be", + or *None* if no search is to be carried out. If either + of *search_attr_name* and *search_attr_value* is *None*, + the behaviour is as if both were *None*. """ return_str = "" - for sent in doc.sents: + if search_attr_name is not None and search_attr_value is not None: + adj_start_i = get_adjusted_start_i( + doc, start_i, cols, search_attr_name, search_attr_value + ) + else: + adj_start_i = start_i + if adj_start_i >= len(doc): + return return_str + end_i = len(doc) - 1 + if length is not None: + end_i = min(end_i, adj_start_i + length) + elif start_i > 0 or ( + search_attr_name is not None and search_attr_value is not None + ): + end_i = adj_start_i + adj_start_i = doc[adj_start_i].sent.start + end_i = doc[end_i].sent.end + for sent in doc[adj_start_i:end_i].sents: if "tree_right" in (c.attribute for c in cols): tree_right = self.render_dep_tree(sent, True) if "tree_left" in (c.attribute for c in cols): @@ -393,163 +410,44 @@ class Visualizer: ) return return_str - def render_text(self, doc: Doc, attrs: List[AttributeFormat]) -> str: - """Renders a text interspersed with attribute labels. - TODO: specify a specific portion of the document to display. - """ - return_str = "" - text_attrs = [a for a in attrs if a.attribute == "text"] - text_attr = text_attrs[0] if len(text_attrs) > 0 else AttributeFormat("text") - for token in doc: - this_token_strs = [""] - for attr in (a for a in attrs if a.attribute != "text"): - attr_text = attr.render(token, render_all_colors_in_vals=True) - if attr_text is not None and len(attr_text) > 0: - this_token_strs.append(" " + attr_text) - if len(this_token_strs) == 1: - this_token_strs[0] = token.text - else: - this_token_strs[0] = text_attr.render( - token, render_all_colors_in_vals=True - ) - this_token_strs.append(token.whitespace_) - return_str += "".join(this_token_strs) - return return_str +def get_token_value(token: Token, attribute: str) -> str: + """ + Get value *token.x.y.z*. + + token: the token + attribute: the attribute name, e.g. *x.y.z*. + """ + obj = token + parts = attribute.split(".") + for part in parts[:-1]: + obj = getattr(obj, part) + return str(getattr(obj, parts[-1])) - def render_instances( - self, - doc: Doc, - *, - search_attrs: List[AttributeFormat], - display_cols: List[AttributeFormat], - group: bool, - spacing: int, - surrounding_tokens_height: int, - surrounding_tokens_fg_color: Union[str, int], - surrounding_tokens_bg_color: Union[str, int], - ) -> str: - """Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'. - TODO: specify a specific portion of the document to display. - search_attrs: the attribute(s) or attribute value(s) that cause a row to be displayed for a token. - display_cols: the attributes that should be displayed in each row. - group: True if the rows should be ordered by the search attribute values, - False if they should retain their in-document order. - spacing: the number of spaces between each column. - surrounding_tokens_height: a number of rows that should be displayed with information about tokens - before and after matched tokens. Consecutive matching tokens, e.g. - tokens belonging to the same named entity, are rendered together as a single group. - surrounding_tokens_fg_color: a foreground color to use for surrounding token rows. - surrounding_tokens_bg_color: a background color to use for surrounding token rows. - Note that if surrounding_tokens_bg_color is None, any background color defined for the attribute - will be used instead, which is unlikely to be the desired result. - """ +def get_adjusted_start_i( + doc: Doc, + start_i: int, + cols: List[AttributeFormat], + search_attr_name: str, + search_attr_value: str, +): + """ + Get the position at which to start rendering a document, which may be + adjusted by a search for a specific attribute value. + + doc: the document + start_i: the user-specified start index + cols: the list of attribute columns being displayed + search_attr_name: the name of the attribute for which values are being searched, + i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed. + search_attr_value: the attribute value for which to search. - def filter(token: Token) -> bool: - for attr in search_attrs: - value = attr.render(token, ignore_colors=True) - if len(value) == 0: - return False - return True - - matched_tokens = [token for token in doc if filter(token)] - tokens_to_display_inds: List[int] = [] - for token in matched_tokens: - for ind in range( - token.i - surrounding_tokens_height, - token.i + surrounding_tokens_height + 1, - ): - if ind >= 0 and ind < len(doc): - tokens_to_display_inds.append(ind) - widths = [] - for col in display_cols: - if len(tokens_to_display_inds) > 0: - width = max( - len(col.render(doc[i], ignore_colors=True)) - for i in tokens_to_display_inds - ) - else: - width = 0 - if col.max_width is not None: - width = min(width, col.max_width) - width = max(width, len(col.name)) - widths.append(width) - if group: - matched_tokens.sort( - key=( - lambda token: [ - attr.render(token, ignore_colors=True) for attr in search_attrs - ] - ) - ) - - rows = [] - token_ind_to_display = -1 - for matched_token_ind, matched_token in enumerate(matched_tokens): - if surrounding_tokens_height > 0: - surrounding_start_ind = max( - 0, matched_token.i - surrounding_tokens_height - ) - if token_ind_to_display + 1 == matched_token.i: - surrounding_start_ind = token_ind_to_display + 1 - surrounding_end_ind = min( - len(doc), matched_token.i + surrounding_tokens_height + 1 - ) - if ( - matched_token_ind + 1 < len(matched_tokens) - and matched_token.i + 1 == matched_tokens[matched_token_ind + 1].i - ): - surrounding_end_ind = matched_token.i + 1 - - else: - surrounding_start_ind = matched_token.i - surrounding_end_ind = surrounding_start_ind + 1 - for token_ind_to_display in range( - surrounding_start_ind, surrounding_end_ind - ): - if token_ind_to_display == matched_token.i: - rows.append( - [ - col.render( - matched_token, - right_pad_to_len=widths[col_ind], - ) - for col_ind, col in enumerate(display_cols) - ] - ) - else: - rows.append( - [ - col.render( - doc[token_ind_to_display], - whole_row_fg_color=surrounding_tokens_fg_color, - whole_row_bg_color=surrounding_tokens_bg_color, - right_pad_to_len=widths[col_ind], - ) - for col_ind, col in enumerate(display_cols) - ] - ) - if ( - matched_token_ind + 1 < len(matched_tokens) - and token_ind_to_display + 1 != matched_tokens[matched_token_ind + 1].i - ): - rows.append([]) - header: Optional[List[str]] - if len([1 for c in display_cols if len(c.name) > 0]) > 0: - header = [c.name for c in display_cols] - else: - header = None - aligns = [c.aligns for c in display_cols] - fg_colors = [c.fg_color for c in display_cols] - bg_colors = [c.bg_color for c in display_cols] - return wasabi.table( - rows, - header=header, - divider=True, - aligns=aligns, - widths=widths, - fg_colors=fg_colors, - bg_colors=bg_colors, - spacing=spacing, - ) + """ + for col in cols: + if col.name == search_attr_name or col.attribute == search_attr_name: + for token in doc[start_i:]: + if get_token_value(token, col.attribute) == search_attr_value: + return token.i + else: + return len(doc)