From e713aa0938f69236c83801a01d47ebfde75f5a8c Mon Sep 17 00:00:00 2001 From: Richard Hudson Date: Thu, 23 Dec 2021 16:13:40 +0100 Subject: [PATCH] Add surrounding tokens functionality --- setup.cfg | 2 +- spacy/tests/test_visualization.py | 266 +++++++++++++++++++++++++++--- spacy/visualization.py | 195 ++++++++++++++++++---- 3 files changed, 408 insertions(+), 55 deletions(-) diff --git a/setup.cfg b/setup.cfg index 72f4b39da..d4b0fc54e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -48,7 +48,7 @@ install_requires = preshed>=3.0.2,<3.1.0 thinc>=8.0.12,<8.1.0 blis>=0.4.0,<0.8.0 - wasabi>=0.8.1,<1.1.0 + wasabi>=0.9.0,<1.1.0 srsly>=2.4.1,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.5.0 diff --git a/spacy/tests/test_visualization.py b/spacy/tests/test_visualization.py index 32e9547fc..7d3661f8b 100644 --- a/spacy/tests/test_visualization.py +++ b/spacy/tests/test_visualization.py @@ -227,6 +227,26 @@ def test_visualization_render_native_attribute_int(en_vocab): assert AttributeFormat("head.i").render(doc[2]) == "3" +def test_visualization_render_native_attribute_int_with_right_padding(en_vocab): + doc = Doc( + en_vocab, + words=[ + "I", + "saw", + "a", + "horse", + "yesterday", + "that", + "was", + "injured", + ".", + ], + heads=[1, None, 3, 1, 1, 7, 7, 3, 1], + deps=["dep"] * 9, + ) + assert AttributeFormat("head.i").render(doc[2], right_pad_to_length=3) == "3 " + + def test_visualization_render_native_attribute_str(en_vocab): doc = Doc( en_vocab, @@ -278,6 +298,64 @@ def test_visualization_render_colors(en_vocab): ) +def test_visualization_render_whole_row_colors(en_vocab): + doc = Doc( + en_vocab, + words=[ + "I", + "saw", + "a", + "horse", + "yesterday", + "that", + "was", + "injured", + ".", + ], + heads=[1, None, 3, 1, 1, 7, 7, 3, 1], + deps=["dep"] * 9, + ) + + assert ( + AttributeFormat( + "dep_", + ).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9) + == "\x1b[38;5;8;48;5;9mdep\x1b[0m" + if supports_ansi + else "dep" + ) + + +def test_visualization_render_whole_row_colors_with_value_dependent_colors(en_vocab): + doc = Doc( + en_vocab, + words=[ + "I", + "saw", + "a", + "horse", + "yesterday", + "that", + "was", + "injured", + ".", + ], + heads=[1, None, 3, 1, 1, 7, 7, 3, 1], + deps=["dep"] * 9, + ) + + assert ( + AttributeFormat( + "dep_", + value_dependent_fg_colors={"dep": 2}, + value_dependent_bg_colors={"dep": 11}, + ).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9) + == "\x1b[38;5;8;48;5;9mdep\x1b[0m" + if supports_ansi + else "dep" + ) + + def test_visualization_render_colors_only_fg(en_vocab): doc = Doc( en_vocab, @@ -470,7 +548,9 @@ def test_visualization_minimal_render_table_one_sentence( AttributeFormat("ent_type_"), ] assert ( - Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip() + Visualizer() + .render_table(fully_featured_doc_one_sentence, formats, spacing=3) + .strip() == """ ╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON ║ ╚> case 's 's PART POS Poss=yes @@ -499,7 +579,7 @@ def test_visualization_minimal_render_table_empty_text_no_headers( AttributeFormat("morph"), AttributeFormat("ent_type_"), ] - assert Visualizer().render_table(Doc(en_vocab), formats).strip() == "" + assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == "" def test_visualization_minimal_render_table_empty_text_headers( @@ -515,7 +595,7 @@ def test_visualization_minimal_render_table_empty_text_headers( AttributeFormat("morph"), AttributeFormat("ent_type_", name="ent"), ] - assert Visualizer().render_table(Doc(en_vocab), formats).strip() == "" + assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == "" def test_visualization_minimal_render_table_permitted_values( @@ -532,7 +612,9 @@ def test_visualization_minimal_render_table_permitted_values( AttributeFormat("ent_type_"), ] assert ( - Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip() + Visualizer() + .render_table(fully_featured_doc_one_sentence, formats, spacing=3) + .strip() == """ ╔>╔═ poss Sarah PROPN NNP NounType=prop|Number=sing PERSON ║ ╚> case 's PART POS Poss=yes @@ -595,7 +677,9 @@ def test_visualization_minimal_render_table_two_sentences( ] assert ( - Visualizer().render_table(fully_featured_doc_two_sentences, formats).strip() + Visualizer() + .render_table(fully_featured_doc_two_sentences, formats, spacing=3) + .strip() == """ ╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON ║ ╚> case 's 's PART POS Poss=yes @@ -638,10 +722,41 @@ def test_visualization_rich_render_table_one_sentence( ), ] assert ( - Visualizer().render_table(fully_featured_doc_one_sentence, formats) - == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" + Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3) + == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if supports_ansi - else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n" + else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n" + ) + + +def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than_maximum( + fully_featured_doc_one_sentence, +): + formats = [ + AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2), + AttributeFormat("dep_", name="dep", fg_color=2), + AttributeFormat("i", name="index", aligns="r"), + AttributeFormat( + "text", + name="text", + fg_color=196, + value_dependent_fg_colors={"'s": 50}, + value_dependent_bg_colors={"'s": 12}, + ), + AttributeFormat("lemma_", name="lemma"), + AttributeFormat("pos_", name="pos", fg_color=100), + AttributeFormat("tag_", name="tag", fg_color=100), + AttributeFormat("morph", name="morph", fg_color=100, max_width=15), + AttributeFormat( + "ent_type_", + name="ent", + ), + ] + assert ( + Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3) + == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" + if supports_ansi + else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" ) @@ -666,10 +781,10 @@ def test_visualization_rich_render_table_two_sentences( ), ] assert ( - Visualizer().render_table(fully_featured_doc_two_sentences, formats) - == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" + Visualizer().render_table(fully_featured_doc_two_sentences, formats, spacing=3) + == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" if supports_ansi - else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" + else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- ----- --- --------------- ---\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she PRON PRP Case=Nom|Gender \n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love VERB VBD Tense=Past|Verb \n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it PRON PRP Case=Acc|Gender \n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . PUNCT . PunctType=peri \n\n" ) @@ -735,15 +850,18 @@ def test_visualization_minimal_render_instances_two_sentences_type_non_grouping( ] search_attributes = [AttributeFormat("ent_type_")] - assert ( Visualizer().render_instances( fully_featured_doc_two_sentences, search_attributes=search_attributes, display_columns=display_columns, group=False, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, ) - == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \npobj London london PROPN NNP NounType=prop|Number=sing GPE \n" + == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n" ) @@ -768,12 +886,48 @@ def test_visualization_minimal_render_instances_two_sentences_value_non_grouping search_attributes=search_attributes, display_columns=display_columns, group=False, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, ) == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" ) -def test_visualization_minimal_render_instances_two_sentences_missing_value_non_grouping( +def test_visualization_minimal_render_instances_two_sentences_value_surrounding_sentences_non_grouping( + fully_featured_doc_two_sentences, +): + display_columns = [ + AttributeFormat("dep_"), + AttributeFormat("text"), + AttributeFormat("lemma_"), + AttributeFormat("pos_"), + AttributeFormat("tag_"), + AttributeFormat("morph"), + AttributeFormat("ent_type_"), + ] + + search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])] + + assert ( + Visualizer().render_instances( + fully_featured_doc_two_sentences, + search_attributes=search_attributes, + display_columns=display_columns, + group=False, + spacing=3, + surrounding_tokens_height=2, + surrounding_tokens_fg_color=11, + surrounding_tokens_bg_color=None, + ) + == "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;11mcase\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;11mnsubj\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n" + if supports_ansi + else "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \n" + ) + + +def test_visualization_render_instances_two_sentences_missing_value_non_grouping( fully_featured_doc_two_sentences, ): display_columns = [ @@ -794,12 +948,46 @@ def test_visualization_minimal_render_instances_two_sentences_missing_value_non_ search_attributes=search_attributes, display_columns=display_columns, group=False, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, ) == "\ndep text \n--- ---- \n" ) -def test_visualization_minimal_render_instances_two_sentences_type_grouping( +def test_visualization_render_instances_two_sentences_missing_value_surrounding_sentences_non_grouping( + fully_featured_doc_two_sentences, +): + display_columns = [ + AttributeFormat("dep_", name="dep"), + AttributeFormat("text", name="text"), + AttributeFormat("lemma_"), + AttributeFormat("pos_"), + AttributeFormat("tag_"), + AttributeFormat("morph"), + AttributeFormat("ent_type_"), + ] + + search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])] + + assert ( + Visualizer().render_instances( + fully_featured_doc_two_sentences, + search_attributes=search_attributes, + display_columns=display_columns, + group=False, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, + ) + == "\ndep text \n--- ---- \n" + ) + + +def test_visualization_render_instances_two_sentences_type_grouping( fully_featured_doc_two_sentences, ): display_columns = [ @@ -820,12 +1008,16 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping( search_attributes=search_attributes, display_columns=display_columns, group=True, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, ) - == "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + == "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" ) -def test_visualization_minimal_render_instances_two_sentences_type_grouping_colors( +def test_visualization_render_instances_two_sentences_type_grouping_colors( fully_featured_doc_two_sentences, ): display_columns = [ @@ -846,8 +1038,44 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping_colo search_attributes=search_attributes, display_columns=display_columns, group=True, + spacing=3, + surrounding_tokens_height=0, + surrounding_tokens_fg_color=None, + surrounding_tokens_bg_color=None, ) - == "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + == "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n" if supports_ansi - else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n" + ) + + +def test_visualization_render_instances_two_sentences_type_grouping_colors_with_surrounding_sentences( + fully_featured_doc_two_sentences, +): + display_columns = [ + AttributeFormat("dep_", fg_color=20), + AttributeFormat("text", bg_color=30), + AttributeFormat("lemma_"), + AttributeFormat("pos_"), + AttributeFormat("tag_"), + AttributeFormat("morph"), + AttributeFormat("ent_type_"), + ] + + search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")] + + assert ( + Visualizer().render_instances( + fully_featured_doc_two_sentences, + search_attributes=search_attributes, + display_columns=display_columns, + group=True, + spacing=3, + surrounding_tokens_height=3, + surrounding_tokens_fg_color=11, + surrounding_tokens_bg_color=None, + ) + == "\n\x1b[38;5;20m\x1b[38;5;11mcompound\x1b[0m\x1b[0m \x1b[48;5;30m\x1b[38;5;11mSilicon\x1b[0m\x1b[0m \x1b[38;5;11msilicon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mValley\x1b[0m \x1b[0m \x1b[38;5;11mvalley\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mShe\x1b[0m \x1b[0m \x1b[38;5;11mshe\x1b[0m \x1b[38;5;11mPRON\x1b[0m \x1b[38;5;11mPRP\x1b[0m \x1b[38;5;11mCase=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mloved\x1b[0m \x1b[0m \x1b[38;5;11mlove\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=Past|VerbForm=Fin\x1b[0m \n\n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mto\x1b[0m \x1b[0m \x1b[38;5;11mto\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mLondon\x1b[0m \x1b[0m \x1b[38;5;11mlondon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;20m\x1b[38;5;11mcase\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m's\x1b[0m \x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n" + if supports_ansi + else "\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \nnsubj She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \nROOT loved love VERB VBD Tense=Past|VerbForm=Fin \n\nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \nprep to to ADP IN \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \n" ) diff --git a/spacy/visualization.py b/spacy/visualization.py index 3e0b6e6ae..0075d9f41 100644 --- a/spacy/visualization.py +++ b/spacy/visualization.py @@ -1,9 +1,7 @@ -from os import linesep, truncate +from os import linesep from typing import Union import wasabi -from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS from spacy.tokens import Span, Token, Doc -from spacy.util import working_dir SPACE = 0 @@ -42,6 +40,10 @@ ROOT_LEFT_CHARS = { class AttributeFormat: + """ + Instructions for rendering information about a token property, e.g. lemma_, ent_type_. + """ + def __init__( self, attribute: str, @@ -55,6 +57,20 @@ class AttributeFormat: value_dependent_fg_colors: dict[str, Union[str, int]] = None, value_dependent_bg_colors: dict[str, Union[str, int]] = None, ): + """ + attribute: the token attribute, e.g. lemma_, ._.holmes.lemma + name: the name to display e.g. in column headers + aligns: where appropriate the column alignment 'l' (left, + default), 'r' (right) or 'c' (center). + max_width: a maximum width to which values of the attribute should be truncated. + fg_color: the foreground color that should be used to display instances of the attribute + bg_color: the background color that should be used to display instances of the attribute + permitted_values: a tuple of values of the attribute that should be displayed. If + permitted_values is not None and a value of the attribute is not + in permitted_values, the empty string is rendered instead of the value. + value_dependent_fg_colors: a dictionary from values to foreground colors that should be used to display those values. + value_dependent_bg_colors: a dictionary from values to background colors that should be used to display those values. + """ self.attribute = attribute self.name = name self.aligns = aligns @@ -70,9 +86,19 @@ class AttributeFormat: self, token: Token, *, + right_pad_to_length: int = None, ignore_colors: bool = False, render_all_colors_within_values: bool = False, + whole_row_fg_color: Union[int, str] = None, + whole_row_bg_color: Union[int, str] = None, ) -> str: + """ + ignore_colors: no colors should be rendered, typically because the values are required to calculate widths + render_all_colors_within_values: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi. + This argument is set to True when rendering a text to signal that colors should be rendered here. + whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors. + whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_fg_colors. + """ obj = token parts = self.attribute.split(".") for part in parts[:-1]: @@ -86,18 +112,26 @@ class AttributeFormat: value = value[: self.max_width] fg_color = None bg_color = None + if right_pad_to_length is not None: + right_padding = " " * (right_pad_to_length - len(value)) + else: + right_padding = "" if not ignore_colors and len(value) > 0: - if self.value_dependent_fg_colors is not None: + if whole_row_fg_color is not None: + fg_color = whole_row_fg_color + elif self.value_dependent_fg_colors is not None: fg_color = self.value_dependent_fg_colors.get(value, None) if fg_color is None and render_all_colors_within_values: fg_color = self.fg_color if self.value_dependent_bg_colors is not None: bg_color = self.value_dependent_bg_colors.get(value, None) - if bg_color is None and render_all_colors_within_values: + if whole_row_bg_color is not None: + bg_color = whole_row_bg_color + elif bg_color is None and render_all_colors_within_values: bg_color = self.bg_color if fg_color is not None or bg_color is not None: value = self.printer.text(value, color=fg_color, bg_color=bg_color) - return value + return value + right_padding class Visualizer: @@ -111,7 +145,7 @@ class Visualizer: root_right: True if the tree should be rendered with the root on the right-hand side, False if the tree should be rendered with the root on the left-hand side. - Adapted from https://github.com/KoichiYasuoka/deplacy + Algorithm adapted from https://github.com/KoichiYasuoka/deplacy """ # Check sent is really a sentence @@ -328,8 +362,17 @@ class Visualizer: ] def render_table( - self, doc: Doc, columns: list[AttributeFormat], spacing: int = 3 + self, doc: Doc, columns: list[AttributeFormat], spacing: int ) -> str: + """Renders a document as a table. + TODO: specify a specific portion of the document to display. + + columns: the attribute formats of the columns to display. + tree_right and tree_left are magic values for the + attributes that render dependency trees where the + roots are on the left or right respectively. + spacing: the number of spaces between each column in the table. + """ return_string = "" for sent in doc.sents: if "tree_right" in (c.attribute for c in columns): @@ -346,7 +389,8 @@ class Visualizer: else: if len(sent) > 0: width = max( - len(column.render(token, ignore_colors=True)) for token in sent + len(column.render(token, ignore_colors=True)) + for token in sent ) else: width = 0 @@ -360,7 +404,7 @@ class Visualizer: if column.attribute == "tree_right" else tree_left[token_index] if column.attribute == "tree_left" - else column.render(token) + else column.render(token, right_pad_to_length=widths[column_index]) for column_index, column in enumerate(columns) ] for token_index, token in enumerate(sent) @@ -388,6 +432,10 @@ class Visualizer: return return_string def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str: + """Renders a text interspersed with attribute labels. + TODO: specify a specific portion of the document to display. + + """ return_string = "" text_attributes = [a for a in attributes if a.attribute == "text"] text_attribute = ( @@ -417,8 +465,28 @@ class Visualizer: search_attributes: list[AttributeFormat], display_columns: list[AttributeFormat], group: bool, - spacing: int = 3, + spacing: int, + surrounding_tokens_height: int, + surrounding_tokens_fg_color: Union[str, int], + surrounding_tokens_bg_color: Union[str, int], ) -> str: + """Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'. + TODO: specify a specific portion of the document to display. + + search_attributes: the attribute(s) or attribute value(s) that cause a row to be displayed for a token. + display_columns: the attributes that should be displayed in each row. + group: True if the rows should be ordered by the search attribute values, + False if they should retain their in-document order. + spacing: the number of spaces between each column. + surrounding_tokens_height: a number of rows that should be displayed with information about tokens + before and after matched tokens. Consecutive matching tokens, e.g. + tokens belonging to the same named entity, are rendered together as a single group. + surrounding_tokens_fg_color: a foreground color to use for surrounding token rows. + surrounding_tokens_bg_color: a background color to use for surrounding token rows. + Note that if surrounding_tokens_bg_color is None, any background color defined for the attribute + will be used instead, which is unlikely to be the desired result. + """ + def filter(token: Token) -> bool: for attribute in search_attributes: value = attribute.render(token, ignore_colors=True) @@ -426,20 +494,22 @@ class Visualizer: return False return True - tokens = [token for token in doc if filter(token)] - if group: - tokens.sort( - key=( - lambda token: [attribute.render(token, ignore_colors=True) - for attribute in search_attributes] - ) + matched_tokens = [token for token in doc if filter(token)] + tokens_to_display_indices = [ + index + for token in matched_tokens + for index in range( + token.i - surrounding_tokens_height, + token.i + surrounding_tokens_height + 1, ) - + if index >= 0 and index < len(doc) + ] widths = [] for column in display_columns: - if len(tokens) > 0: + if len(tokens_to_display_indices) > 0: width = max( - len(column.render(token, ignore_colors=True)) for token in tokens + len(column.render(doc[i], ignore_colors=True)) + for i in tokens_to_display_indices ) else: width = 0 @@ -447,22 +517,77 @@ class Visualizer: width = min(width, column.max_width) width = max(width, len(column.name)) widths.append(width) - data = [ - [ - column.render(token) - for column_index, column in enumerate(display_columns) - ] - for token in tokens - ] - if len([1 for c in display_columns if len(c.name) > 0]) > 0: - header = [c.name for c in display_columns] + if group: + matched_tokens.sort( + key=( + lambda token: [ + attribute.render(token, ignore_colors=True) + for attribute in search_attributes + ] + ) + ) + + rows = [] + token_index_to_display = -1 + for matched_token_index, matched_token in enumerate(matched_tokens): + if surrounding_tokens_height > 0: + surrounding_start_index = max( + 0, matched_token.i - surrounding_tokens_height + ) + if token_index_to_display + 1 == matched_token.i: + surrounding_start_index = token_index_to_display + 1 + surrounding_end_index = min( + len(doc), matched_token.i + surrounding_tokens_height + 1 + ) + if ( + matched_token_index + 1 < len(matched_tokens) + and matched_token.i + 1 == matched_tokens[matched_token_index + 1].i + ): + surrounding_end_index = matched_token.i + 1 + else: - header = None - aligns = [c.aligns for c in display_columns] - fg_colors = [c.fg_color for c in display_columns] - bg_colors = [c.bg_color for c in display_columns] + surrounding_start_index = matched_token.i + surrounding_end_index = surrounding_start_index + 1 + for token_index_to_display in range( + surrounding_start_index, surrounding_end_index + ): + if token_index_to_display == matched_token.i: + rows.append( + [ + column.render( + matched_token, + right_pad_to_length=widths[column_index], + ) + for column_index, column in enumerate(display_columns) + ] + ) + else: + rows.append( + [ + column.render( + doc[token_index_to_display], + whole_row_fg_color=surrounding_tokens_fg_color, + whole_row_bg_color=surrounding_tokens_bg_color, + right_pad_to_length=widths[column_index], + ) + for column_index, column in enumerate(display_columns) + ] + ) + if ( + matched_token_index + 1 < len(matched_tokens) + and token_index_to_display + 1 + != matched_tokens[matched_token_index + 1].i + ): + rows.append([]) + if len([1 for c in display_columns if len(c.name) > 0]) > 0: + header = [c.name for c in display_columns] + else: + header = None + aligns = [c.aligns for c in display_columns] + fg_colors = [c.fg_color for c in display_columns] + bg_colors = [c.bg_color for c in display_columns] return wasabi.table( - data, + rows, header=header, divider=True, aligns=aligns,