First working version

This commit is contained in:
richardpaulhudson 2023-01-26 19:21:27 +01:00
parent 0ea623990e
commit 9243341f74
3 changed files with 432 additions and 340 deletions

View File

@ -1,6 +1,6 @@
import pytest import pytest
from wasabi.util import supports_ansi from wasabi.util import supports_ansi
from spacy.visualization import AttributeFormat, Visualizer from spacy.visualization import AttributeFormat, render_dep_tree, render_table
from spacy.tokens import Span, Doc, Token from spacy.tokens import Span, Doc, Token
@ -45,7 +45,7 @@ def test_viz_dep_tree_basic(en_vocab):
heads=[2, 2, 3, None, 6, 6, 3, 3, 3], heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
deps=["dep"] * 9, deps=["dep"] * 9,
) )
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True) dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"<╣ ", "<╣ ",
@ -57,7 +57,7 @@ def test_viz_dep_tree_basic(en_vocab):
"<══╣", "<══╣",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False) dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
" ╠>", " ╠>",
@ -92,7 +92,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6], heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
deps=["dep"] * 12, deps=["dep"] * 12,
) )
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], True) dep_tree = render_dep_tree(doc[3 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"<╣ ", "<╣ ",
@ -104,7 +104,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
"<══╣", "<══╣",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], False) dep_tree = render_dep_tree(doc[3 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
" ╠>", " ╠>",
@ -120,7 +120,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
def test_viz_dep_tree_non_projective(horse_doc): def test_viz_dep_tree_non_projective(horse_doc):
"""Test dependency tree display with a non-projective dependency.""" """Test dependency tree display with a non-projective dependency."""
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], True) dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"═╩═══╗", "═╩═══╗",
@ -132,7 +132,7 @@ def test_viz_dep_tree_non_projective(horse_doc):
"═╝<╝ ║", "═╝<╝ ║",
"<════╝", "<════╝",
] ]
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], False) dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
"╔═══╩═", "╔═══╩═",
@ -163,7 +163,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
heads=[5, 5, 0, 5, 5, None, 4, 5], heads=[5, 5, 0, 5, 5, None, 4, 5],
deps=["dep"] * 8, deps=["dep"] * 8,
) )
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True) dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"═╗<╗", "═╗<╗",
" ║<╣", " ║<╣",
@ -174,7 +174,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
"<╝ ║", "<╝ ║",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False) dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
"╔>╔═", "╔>╔═",
"╠>║ ", "╠>║ ",
@ -190,7 +190,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
def test_viz_dep_tree_input_not_span(horse_doc): def test_viz_dep_tree_input_not_span(horse_doc):
"""Test dependency tree display behaviour when the input is not a Span.""" """Test dependency tree display behaviour when the input is not a Span."""
with pytest.raises(ValueError): with pytest.raises(ValueError):
Visualizer.render_dep_tree(horse_doc[1:3], True) render_dep_tree(horse_doc[1:3], True)
def test_viz_render_native_attributes(horse_doc): def test_viz_render_native_attributes(horse_doc):
@ -199,7 +199,10 @@ def test_viz_render_native_attributes(horse_doc):
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep" assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
with pytest.raises(AttributeError): with pytest.raises(AttributeError):
AttributeFormat("depp").render(horse_doc[2]) AttributeFormat("depp").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_left").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_right").render(horse_doc[2])
def test_viz_render_colors(horse_doc): def test_viz_render_colors(horse_doc):
assert ( assert (
@ -265,7 +268,7 @@ def test_viz_minimal_render_table_one_sentence(
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip() render_table(fully_featured_doc_one_sentence, formats, spacing=3).strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes > case 's 's PART POS Poss=yes
@ -295,7 +298,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"), AttributeFormat("morph"),
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
# headers # headers
formats = [ formats = [
@ -308,7 +311,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"), AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"), AttributeFormat("ent_type_", name="ent"),
] ]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_viz_minimal_render_table_spacing( def test_viz_minimal_render_table_spacing(
@ -325,7 +328,7 @@ def test_viz_minimal_render_table_spacing(
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip() render_table(fully_featured_doc_one_sentence, formats, spacing=1).strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes > case 's 's PART POS Poss=yes
@ -356,8 +359,7 @@ def test_viz_minimal_render_table_two_sentences(
] ]
assert ( assert (
Visualizer() render_table(fully_featured_doc_two_sentences, formats, spacing=3)
.render(fully_featured_doc_two_sentences, formats, spacing=3)
.strip() .strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
@ -401,7 +403,7 @@ def test_viz_rich_render_table_one_sentence(
), ),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n" else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
@ -429,7 +431,7 @@ def test_viz_rich_render_table_one_sentence(
), ),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
if SUPPORTS_ANSI if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
@ -456,9 +458,9 @@ def test_viz_rich_render_table_two_sentences(
value_dep_bg_colors={"PERSON": 12}, value_dep_bg_colors={"PERSON": 12},
), ),
] ]
print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) print(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
print( print(
repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) repr(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
) )
target = ( target = (
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
@ -466,17 +468,17 @@ def test_viz_rich_render_table_two_sentences(
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
) )
assert ( assert (
Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3) render_table(fully_featured_doc_two_sentences, formats, spacing=3)
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300 fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9 fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
) )
== target == target
@ -504,13 +506,13 @@ def test_viz_rich_render_table_start(
), ),
] ]
print( print(
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
) )
print( print(
repr( repr(
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
) )
@ -521,13 +523,13 @@ def test_viz_rich_render_table_start(
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -538,7 +540,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -549,7 +551,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -559,7 +561,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -571,7 +573,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -581,7 +583,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -591,7 +593,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -601,7 +603,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -611,7 +613,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -650,25 +652,25 @@ def test_viz_rich_render_table_end(
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2 fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3 fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, length=3 fully_featured_doc_two_sentences, formats, spacing=3, length=3
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,

View File

@ -38,6 +38,7 @@ from .underscore import Underscore, get_ext_args
from ._retokenize import Retokenizer from ._retokenize import Retokenizer
from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
from ..util import get_words_and_spaces from ..util import get_words_and_spaces
from ..visualization import render_document
DEF PADDING = 5 DEF PADDING = 5
@ -1751,6 +1752,45 @@ cdef class Doc:
attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS) attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS)
return tuple(attrs) return tuple(attrs)
def inspect(
self,
search_attr_name=None,
search_attr_value=None,
*,
start_i=0,
length=None
):
"""Prints a tabular representation of the document or part of the document.
If part of the document is specified using any of the four optional
parameters, the sentences surrounding that part of the document are rendered;
if none of the four optional parameters are specified, the whole document is
rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
print(
render_document(
self,
search_attr_name,
search_attr_value,
start_i=start_i,
length=length
)
)
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2: cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
cdef int i = token_by_char(tokens, length, start_char) cdef int i = token_by_char(tokens, length, start_char)

View File

@ -4,7 +4,6 @@ from re import search
from typing import Dict, List, Optional, Union, cast from typing import Dict, List, Optional, Union, cast
import wasabi import wasabi
from wasabi.util import supports_ansi from wasabi.util import supports_ansi
from spacy.tokens import Span, Token, Doc
SUPPORTS_ANSI = supports_ansi() SUPPORTS_ANSI = supports_ansi()
@ -57,7 +56,6 @@ class AttributeFormat:
max_width: Optional[int] = None, max_width: Optional[int] = None,
fg_color: Optional[Union[str, int]] = None, fg_color: Optional[Union[str, int]] = None,
bg_color: Optional[Union[str, int]] = None, bg_color: Optional[Union[str, int]] = None,
permitted_vals: Optional[tuple] = None,
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None, value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None, value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
): ):
@ -78,13 +76,17 @@ class AttributeFormat:
self.max_width = max_width self.max_width = max_width
self.fg_color = fg_color self.fg_color = fg_color
self.bg_color = bg_color self.bg_color = bg_color
self.value_dep_fg_colors = value_dep_fg_colors self.value_dep_fg_colors = (
self.value_dep_bg_colors = value_dep_bg_colors value_dep_fg_colors if value_dep_fg_colors is not None else {}
)
self.value_dep_bg_colors = (
value_dep_bg_colors if value_dep_bg_colors is not None else {}
)
self.printer = wasabi.Printer(no_print=True) self.printer = wasabi.Printer(no_print=True)
def render( def render(
self, self,
token: Token, token,
*, *,
right_pad_to_len: Optional[int] = None, right_pad_to_len: Optional[int] = None,
ignore_colors: bool = False, ignore_colors: bool = False,
@ -93,7 +95,7 @@ class AttributeFormat:
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding. right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
""" """
value = get_token_value(token, self.attribute) value = _get_token_value(token, self.attribute)
if self.max_width is not None: if self.max_width is not None:
value = value[: self.max_width] value = value[: self.max_width]
fg_color = None fg_color = None
@ -103,18 +105,16 @@ class AttributeFormat:
else: else:
right_padding = "" right_padding = ""
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0: if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
if self.value_dep_fg_colors is not None: if len(self.value_dep_fg_colors) > 0:
fg_color = self.value_dep_fg_colors.get(value, None) fg_color = self.value_dep_fg_colors.get(value, None)
if self.value_dep_bg_colors is not None: if len(self.value_dep_bg_colors) > 0:
bg_color = self.value_dep_bg_colors.get(value, None) bg_color = self.value_dep_bg_colors.get(value, None)
if fg_color is not None or bg_color is not None: if fg_color is not None or bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color) value = self.printer.text(value, color=fg_color, bg_color=bg_color)
return value + right_padding return value + right_padding
class Visualizer: def render_dep_tree(sent, root_right: bool) -> List[str]:
@staticmethod
def render_dep_tree(sent: Span, root_right: bool) -> List[str]:
""" """
Returns an ASCII rendering of the document with a dependency tree for each sentence. The Returns an ASCII rendering of the document with a dependency tree for each sentence. The
dependency tree output for a given token has the same index within the output list of dependency tree output for a given token has the same index within the output list of
@ -192,12 +192,7 @@ class Visualizer:
# children's lines. # children's lines.
if len(children_lists[working_token_ind]) > 0: if len(children_lists[working_token_ind]) > 0:
horiz_line_lens[working_token_ind] = ( horiz_line_lens[working_token_ind] = (
max( max([horiz_line_lens[i] for i in children_lists[working_token_ind]])
[
horiz_line_lens[i]
for i in children_lists[working_token_ind]
]
)
+ 1 + 1
) )
else: else:
@ -209,8 +204,7 @@ class Visualizer:
): ):
alt_ind: int alt_ind: int
if ( if (
inbetween_ind inbetween_ind in children_lists[cast(int, heads[working_token_ind])]
in children_lists[cast(int, heads[working_token_ind])]
and inbetween_ind not in children_lists[working_token_ind] and inbetween_ind not in children_lists[working_token_ind]
): ):
alt_ind = horiz_line_lens[inbetween_ind] alt_ind = horiz_line_lens[inbetween_ind]
@ -241,9 +235,7 @@ class Visualizer:
# Draw the horizontal line for the governing token # Draw the horizontal line for the governing token
for working_horiz_pos in range(char_horiz_line_len - 1): for working_horiz_pos in range(char_horiz_line_len - 1):
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE: if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
char_matrix[head_token_ind][ char_matrix[head_token_ind][working_horiz_pos] |= FULL_HORIZONTAL_LINE
working_horiz_pos
] |= FULL_HORIZONTAL_LINE
# Draw the vertical line for the relation # Draw the vertical line for the relation
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel): for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
@ -299,29 +291,25 @@ class Visualizer:
for vert_pos in range(sent.end - sent.start) for vert_pos in range(sent.end - sent.start)
] ]
def render(
self, def render_table(
doc: Doc, doc,
cols: List[AttributeFormat], cols: List[AttributeFormat],
spacing: int = 2, spacing: int = 3,
start_i: int = 0,
length: Optional[int] = None,
search_attr_name: Optional[str] = None, search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None, search_attr_value: Optional[str] = None,
start_i: int = 0,
length: Optional[int] = None,
) -> str: ) -> str:
"""Renders a document as a table. """Renders a document as a table, allowing the caller to specify various
TODO: specify a specific portion of the document to display. display options.
doc: the document.
cols: the attribute formats of the columns to display. cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the tree_right and tree_left are magic values for the
attributes that render dependency trees where the attributes that render dependency trees where the
roots are on the left or right respectively. roots are on the left or right respectively.
spacing: the number of spaces between each column in the table. spacing: the number of spaces between each column in the table.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
search_attr_name: the name of an attribute to search for in order to search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_", determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either or *None* if no search is to be carried out. If either
@ -332,10 +320,19 @@ class Visualizer:
or *None* if no search is to be carried out. If either or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*, of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*. the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
""" """
return_str = "" return_str = ""
if search_attr_name is not None and search_attr_value is not None: if (
adj_start_i = get_adjusted_start_i( search_attr_name is not None
and search_attr_name not in ("tree_right", "tree_left")
and search_attr_value is not None
):
adj_start_i = _get_adjusted_start_i(
doc, start_i, cols, search_attr_name, search_attr_value doc, start_i, cols, search_attr_name, search_attr_value
) )
else: else:
@ -353,9 +350,9 @@ class Visualizer:
end_i = doc[end_i].sent.end end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents: for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols): if "tree_right" in (c.attribute for c in cols):
tree_right = self.render_dep_tree(sent, True) tree_right = render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols): if "tree_left" in (c.attribute for c in cols):
tree_left = self.render_dep_tree(sent, False) tree_left = render_dep_tree(sent, False)
widths = [] widths = []
for col in cols: for col in cols:
# get the values without any color codes # get the values without any color codes
@ -411,7 +408,60 @@ class Visualizer:
return return_str return return_str
def get_token_value(token: Token, attribute: str) -> str: def render_document(
doc,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
*,
start_i: int = 0,
length: Optional[int] = None,
) -> str:
"""Renders a document as a table using standard display options.
doc: the document.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
cols = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=4),
AttributeFormat("dep_", name="dep_"),
AttributeFormat("ent_type_", name="ent_type_"),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat("text", name="text", max_width=20),
AttributeFormat("lemma_", name="lemma_", max_width=20),
AttributeFormat("pos_", name="pos_"),
AttributeFormat("tag_", name="tag_"),
AttributeFormat("morph", name="morph_", max_width=60),
]
if search_attr_name is not None and search_attr_value is not None:
for col in cols:
if col.attribute == search_attr_name or col.name == search_attr_name:
col.value_dep_fg_colors[search_attr_value] = 1
return render_table(
doc=doc,
cols=cols,
spacing=3,
search_attr_name=search_attr_name,
search_attr_value=search_attr_value,
start_i=start_i,
length=length,
)
def _get_token_value(token, attribute: str) -> str:
""" """
Get value *token.x.y.z*. Get value *token.x.y.z*.
@ -422,11 +472,11 @@ def get_token_value(token: Token, attribute: str) -> str:
parts = attribute.split(".") parts = attribute.split(".")
for part in parts[:-1]: for part in parts[:-1]:
obj = getattr(obj, part) obj = getattr(obj, part)
return str(getattr(obj, parts[-1])) return str(getattr(obj, parts[-1])).strip()
def get_adjusted_start_i( def _get_adjusted_start_i(
doc: Doc, doc,
start_i: int, start_i: int,
cols: List[AttributeFormat], cols: List[AttributeFormat],
search_attr_name: str, search_attr_name: str,
@ -447,7 +497,7 @@ def get_adjusted_start_i(
for col in cols: for col in cols:
if col.name == search_attr_name or col.attribute == search_attr_name: if col.name == search_attr_name or col.attribute == search_attr_name:
for token in doc[start_i:]: for token in doc[start_i:]:
if get_token_value(token, col.attribute) == search_attr_value: if _get_token_value(token, col.attribute) == search_attr_value:
return token.i return token.i
else: else:
return len(doc) return len(doc)