First working version

This commit is contained in:
richardpaulhudson 2023-01-26 19:21:27 +01:00
parent 0ea623990e
commit 9243341f74
3 changed files with 432 additions and 340 deletions

View File

@ -1,6 +1,6 @@
import pytest import pytest
from wasabi.util import supports_ansi from wasabi.util import supports_ansi
from spacy.visualization import AttributeFormat, Visualizer from spacy.visualization import AttributeFormat, render_dep_tree, render_table
from spacy.tokens import Span, Doc, Token from spacy.tokens import Span, Doc, Token
@ -45,7 +45,7 @@ def test_viz_dep_tree_basic(en_vocab):
heads=[2, 2, 3, None, 6, 6, 3, 3, 3], heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
deps=["dep"] * 9, deps=["dep"] * 9,
) )
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True) dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"<╣ ", "<╣ ",
@ -57,7 +57,7 @@ def test_viz_dep_tree_basic(en_vocab):
"<══╣", "<══╣",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False) dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
" ╠>", " ╠>",
@ -92,7 +92,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6], heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
deps=["dep"] * 12, deps=["dep"] * 12,
) )
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], True) dep_tree = render_dep_tree(doc[3 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"<╣ ", "<╣ ",
@ -104,7 +104,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
"<══╣", "<══╣",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], False) dep_tree = render_dep_tree(doc[3 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
" ╠>", " ╠>",
@ -120,7 +120,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
def test_viz_dep_tree_non_projective(horse_doc): def test_viz_dep_tree_non_projective(horse_doc):
"""Test dependency tree display with a non-projective dependency.""" """Test dependency tree display with a non-projective dependency."""
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], True) dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], True)
assert dep_tree == [ assert dep_tree == [
"<╗ ", "<╗ ",
"═╩═══╗", "═╩═══╗",
@ -132,7 +132,7 @@ def test_viz_dep_tree_non_projective(horse_doc):
"═╝<╝ ║", "═╝<╝ ║",
"<════╝", "<════╝",
] ]
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], False) dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], False)
assert dep_tree == [ assert dep_tree == [
" ╔>", " ╔>",
"╔═══╩═", "╔═══╩═",
@ -163,7 +163,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
heads=[5, 5, 0, 5, 5, None, 4, 5], heads=[5, 5, 0, 5, 5, None, 4, 5],
deps=["dep"] * 8, deps=["dep"] * 8,
) )
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True) dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [ assert dep_tree == [
"═╗<╗", "═╗<╗",
" ║<╣", " ║<╣",
@ -174,7 +174,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
"<╝ ║", "<╝ ║",
"<══╝", "<══╝",
] ]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False) dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [ assert dep_tree == [
"╔>╔═", "╔>╔═",
"╠>║ ", "╠>║ ",
@ -190,7 +190,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
def test_viz_dep_tree_input_not_span(horse_doc): def test_viz_dep_tree_input_not_span(horse_doc):
"""Test dependency tree display behaviour when the input is not a Span.""" """Test dependency tree display behaviour when the input is not a Span."""
with pytest.raises(ValueError): with pytest.raises(ValueError):
Visualizer.render_dep_tree(horse_doc[1:3], True) render_dep_tree(horse_doc[1:3], True)
def test_viz_render_native_attributes(horse_doc): def test_viz_render_native_attributes(horse_doc):
@ -199,7 +199,10 @@ def test_viz_render_native_attributes(horse_doc):
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep" assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
with pytest.raises(AttributeError): with pytest.raises(AttributeError):
AttributeFormat("depp").render(horse_doc[2]) AttributeFormat("depp").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_left").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_right").render(horse_doc[2])
def test_viz_render_colors(horse_doc): def test_viz_render_colors(horse_doc):
assert ( assert (
@ -265,7 +268,7 @@ def test_viz_minimal_render_table_one_sentence(
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip() render_table(fully_featured_doc_one_sentence, formats, spacing=3).strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes > case 's 's PART POS Poss=yes
@ -295,7 +298,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"), AttributeFormat("morph"),
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
# headers # headers
formats = [ formats = [
@ -308,7 +311,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"), AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"), AttributeFormat("ent_type_", name="ent"),
] ]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == "" assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_viz_minimal_render_table_spacing( def test_viz_minimal_render_table_spacing(
@ -325,7 +328,7 @@ def test_viz_minimal_render_table_spacing(
AttributeFormat("ent_type_"), AttributeFormat("ent_type_"),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip() render_table(fully_featured_doc_one_sentence, formats, spacing=1).strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes > case 's 's PART POS Poss=yes
@ -356,8 +359,7 @@ def test_viz_minimal_render_table_two_sentences(
] ]
assert ( assert (
Visualizer() render_table(fully_featured_doc_two_sentences, formats, spacing=3)
.render(fully_featured_doc_two_sentences, formats, spacing=3)
.strip() .strip()
== """ == """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON > poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
@ -401,7 +403,7 @@ def test_viz_rich_render_table_one_sentence(
), ),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n" else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
@ -429,7 +431,7 @@ def test_viz_rich_render_table_one_sentence(
), ),
] ]
assert ( assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3) render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" == "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
if SUPPORTS_ANSI if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n" else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
@ -456,9 +458,9 @@ def test_viz_rich_render_table_two_sentences(
value_dep_bg_colors={"PERSON": 12}, value_dep_bg_colors={"PERSON": 12},
), ),
] ]
print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) print(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
print( print(
repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)) repr(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
) )
target = ( target = (
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n" "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
@ -466,17 +468,17 @@ def test_viz_rich_render_table_two_sentences(
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
) )
assert ( assert (
Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3) render_table(fully_featured_doc_two_sentences, formats, spacing=3)
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300 fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9 fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
) )
== target == target
@ -504,13 +506,13 @@ def test_viz_rich_render_table_start(
), ),
] ]
print( print(
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
) )
print( print(
repr( repr(
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
) )
@ -521,13 +523,13 @@ def test_viz_rich_render_table_start(
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n" else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11 fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -538,7 +540,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -549,7 +551,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -559,7 +561,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -571,7 +573,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -581,7 +583,7 @@ def test_viz_rich_render_table_start(
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -591,7 +593,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -601,7 +603,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -611,7 +613,7 @@ def test_viz_rich_render_table_start(
== "" == ""
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,
@ -650,25 +652,25 @@ def test_viz_rich_render_table_end(
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2 fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3 fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, formats, spacing=3, length=3 fully_featured_doc_two_sentences, formats, spacing=3, length=3
) )
== target == target
) )
assert ( assert (
Visualizer().render( render_table(
fully_featured_doc_two_sentences, fully_featured_doc_two_sentences,
formats, formats,
spacing=3, spacing=3,

View File

@ -38,6 +38,7 @@ from .underscore import Underscore, get_ext_args
from ._retokenize import Retokenizer from ._retokenize import Retokenizer
from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
from ..util import get_words_and_spaces from ..util import get_words_and_spaces
from ..visualization import render_document
DEF PADDING = 5 DEF PADDING = 5
@ -1751,6 +1752,45 @@ cdef class Doc:
attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS) attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS)
return tuple(attrs) return tuple(attrs)
def inspect(
self,
search_attr_name=None,
search_attr_value=None,
*,
start_i=0,
length=None
):
"""Prints a tabular representation of the document or part of the document.
If part of the document is specified using any of the four optional
parameters, the sentences surrounding that part of the document are rendered;
if none of the four optional parameters are specified, the whole document is
rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
print(
render_document(
self,
search_attr_name,
search_attr_value,
start_i=start_i,
length=length
)
)
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2: cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
cdef int i = token_by_char(tokens, length, start_char) cdef int i = token_by_char(tokens, length, start_char)

View File

@ -4,7 +4,6 @@ from re import search
from typing import Dict, List, Optional, Union, cast from typing import Dict, List, Optional, Union, cast
import wasabi import wasabi
from wasabi.util import supports_ansi from wasabi.util import supports_ansi
from spacy.tokens import Span, Token, Doc
SUPPORTS_ANSI = supports_ansi() SUPPORTS_ANSI = supports_ansi()
@ -57,7 +56,6 @@ class AttributeFormat:
max_width: Optional[int] = None, max_width: Optional[int] = None,
fg_color: Optional[Union[str, int]] = None, fg_color: Optional[Union[str, int]] = None,
bg_color: Optional[Union[str, int]] = None, bg_color: Optional[Union[str, int]] = None,
permitted_vals: Optional[tuple] = None,
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None, value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None, value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
): ):
@ -78,13 +76,17 @@ class AttributeFormat:
self.max_width = max_width self.max_width = max_width
self.fg_color = fg_color self.fg_color = fg_color
self.bg_color = bg_color self.bg_color = bg_color
self.value_dep_fg_colors = value_dep_fg_colors self.value_dep_fg_colors = (
self.value_dep_bg_colors = value_dep_bg_colors value_dep_fg_colors if value_dep_fg_colors is not None else {}
)
self.value_dep_bg_colors = (
value_dep_bg_colors if value_dep_bg_colors is not None else {}
)
self.printer = wasabi.Printer(no_print=True) self.printer = wasabi.Printer(no_print=True)
def render( def render(
self, self,
token: Token, token,
*, *,
right_pad_to_len: Optional[int] = None, right_pad_to_len: Optional[int] = None,
ignore_colors: bool = False, ignore_colors: bool = False,
@ -93,7 +95,7 @@ class AttributeFormat:
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding. right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
""" """
value = get_token_value(token, self.attribute) value = _get_token_value(token, self.attribute)
if self.max_width is not None: if self.max_width is not None:
value = value[: self.max_width] value = value[: self.max_width]
fg_color = None fg_color = None
@ -103,315 +105,363 @@ class AttributeFormat:
else: else:
right_padding = "" right_padding = ""
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0: if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
if self.value_dep_fg_colors is not None: if len(self.value_dep_fg_colors) > 0:
fg_color = self.value_dep_fg_colors.get(value, None) fg_color = self.value_dep_fg_colors.get(value, None)
if self.value_dep_bg_colors is not None: if len(self.value_dep_bg_colors) > 0:
bg_color = self.value_dep_bg_colors.get(value, None) bg_color = self.value_dep_bg_colors.get(value, None)
if fg_color is not None or bg_color is not None: if fg_color is not None or bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color) value = self.printer.text(value, color=fg_color, bg_color=bg_color)
return value + right_padding return value + right_padding
class Visualizer: def render_dep_tree(sent, root_right: bool) -> List[str]:
@staticmethod """
def render_dep_tree(sent: Span, root_right: bool) -> List[str]: Returns an ASCII rendering of the document with a dependency tree for each sentence. The
""" dependency tree output for a given token has the same index within the output list of
Returns an ASCII rendering of the document with a dependency tree for each sentence. The strings as that token within the input document.
dependency tree output for a given token has the same index within the output list of
strings as that token within the input document.
root_right: True if the tree should be rendered with the root on the right-hand side, root_right: True if the tree should be rendered with the root on the right-hand side,
False if the tree should be rendered with the root on the left-hand side. False if the tree should be rendered with the root on the left-hand side.
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
""" """
# Check sent is really a sentence # Check sent is really a sentence
if sent.start != sent[0].sent.start or sent.end != sent[0].sent.end: if sent.start != sent[0].sent.start or sent.end != sent[0].sent.end:
raise ValueError(f"Span is not a sentence: '{sent}'") raise ValueError(f"Span is not a sentence: '{sent}'")
heads: List[Optional[int]] = [] heads: List[Optional[int]] = []
for token in sent: for token in sent:
if token.dep_.lower() == "root" or token.head.i == token.i: if token.dep_.lower() == "root" or token.head.i == token.i:
heads.append(None) heads.append(None)
else: else:
heads.append(token.head.i - sent.start) heads.append(token.head.i - sent.start)
# Check there are no head references outside the sentence # Check there are no head references outside the sentence
heads_outside_sent = [ heads_outside_sent = [
1 for h in heads if h is not None and (h < 0 or h > sent.end - sent.start) 1 for h in heads if h is not None and (h < 0 or h > sent.end - sent.start)
] ]
if len(heads_outside_sent) > 0: if len(heads_outside_sent) > 0:
raise ValueError(f"Head reference outside sentence in sentence '{sent}'") raise ValueError(f"Head reference outside sentence in sentence '{sent}'")
children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)] children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)]
for child, head in enumerate(heads): for child, head in enumerate(heads):
if head is not None: if head is not None:
children_lists[head].append(child) children_lists[head].append(child)
all_ind_ord_by_col: List[int] = [] all_ind_ord_by_col: List[int] = []
# start with the root column # start with the root column
inds_in_this_col = [i for i, h in enumerate(heads) if h is None] inds_in_this_col = [i for i, h in enumerate(heads) if h is None]
while len(inds_in_this_col) > 0: while len(inds_in_this_col) > 0:
all_ind_ord_by_col = inds_in_this_col + all_ind_ord_by_col all_ind_ord_by_col = inds_in_this_col + all_ind_ord_by_col
inds_in_next_col = [] inds_in_next_col = []
# The calculation order of the horizontal lengths of the children # The calculation order of the horizontal lengths of the children
# on either given side of a head must ensure that children # on either given side of a head must ensure that children
# closer to the head are processed first. # closer to the head are processed first.
for ind_in_this_col in inds_in_this_col: for ind_in_this_col in inds_in_this_col:
following_child_inds = [ following_child_inds = [
i for i in children_lists[ind_in_this_col] if i > ind_in_this_col i for i in children_lists[ind_in_this_col] if i > ind_in_this_col
] ]
inds_in_next_col.extend(following_child_inds) inds_in_next_col.extend(following_child_inds)
preceding_child_inds = [ preceding_child_inds = [
i for i in children_lists[ind_in_this_col] if i < ind_in_this_col i for i in children_lists[ind_in_this_col] if i < ind_in_this_col
] ]
preceding_child_inds.reverse() preceding_child_inds.reverse()
inds_in_next_col.extend(preceding_child_inds) inds_in_next_col.extend(preceding_child_inds)
inds_in_this_col = inds_in_next_col inds_in_this_col = inds_in_next_col
horiz_line_lens: List[int] = [] horiz_line_lens: List[int] = []
for i in range(sent.end - sent.start): for i in range(sent.end - sent.start):
if heads[i] is None: if heads[i] is None:
horiz_line_lens.append(-1) horiz_line_lens.append(-1)
elif len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1: elif len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1:
# governed by direct neighbour and has no children itself # governed by direct neighbour and has no children itself
horiz_line_lens.append(1) horiz_line_lens.append(1)
else: else:
horiz_line_lens.append(0) horiz_line_lens.append(0)
while 0 in horiz_line_lens: while 0 in horiz_line_lens:
for working_token_ind in (
i for i in all_ind_ord_by_col if horiz_line_lens[i] == 0
):
# render relation between this token and its head
first_ind_in_rel = min(
working_token_ind,
cast(int, heads[working_token_ind]),
)
second_ind_in_rel = max(
working_token_ind,
cast(int, heads[working_token_ind]),
)
# If this token has children, they will already have been rendered.
# The line needs to be one character longer than the longest of the
# children's lines.
if len(children_lists[working_token_ind]) > 0:
horiz_line_lens[working_token_ind] = (
max(
[
horiz_line_lens[i]
for i in children_lists[working_token_ind]
]
)
+ 1
)
else:
horiz_line_lens[working_token_ind] = 1
for inbetween_ind in (
i
for i in range(first_ind_in_rel + 1, second_ind_in_rel)
if horiz_line_lens[i] != 0
):
alt_ind: int
if (
inbetween_ind
in children_lists[cast(int, heads[working_token_ind])]
and inbetween_ind not in children_lists[working_token_ind]
):
alt_ind = horiz_line_lens[inbetween_ind]
else:
alt_ind = horiz_line_lens[inbetween_ind] + 1
if alt_ind > horiz_line_lens[working_token_ind]:
horiz_line_lens[working_token_ind] = alt_ind
max_horiz_line_len = max(horiz_line_lens)
char_matrix = [
[SPACE] * max_horiz_line_len * 2 for _ in range(sent.start, sent.end)
]
for working_token_ind in range(sent.end - sent.start):
head_token_ind = heads[working_token_ind]
if head_token_ind is None:
continue
first_ind_in_rel = min(working_token_ind, head_token_ind)
second_ind_in_rel = max(working_token_ind, head_token_ind)
char_horiz_line_len = 2 * horiz_line_lens[working_token_ind]
# Draw the corners of the relation
char_matrix[first_ind_in_rel][char_horiz_line_len - 1] |= (
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
)
char_matrix[second_ind_in_rel][char_horiz_line_len - 1] |= (
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
)
# Draw the horizontal line for the governing token
for working_horiz_pos in range(char_horiz_line_len - 1):
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
char_matrix[head_token_ind][
working_horiz_pos
] |= FULL_HORIZONTAL_LINE
# Draw the vertical line for the relation
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
if (
char_matrix[working_vert_pos][char_horiz_line_len - 1]
!= FULL_HORIZONTAL_LINE
):
char_matrix[working_vert_pos][
char_horiz_line_len - 1
] |= FULL_VERTICAL_LINE
for working_token_ind in ( for working_token_ind in (
i for i in range(sent.end - sent.start) if heads[i] is not None i for i in all_ind_ord_by_col if horiz_line_lens[i] == 0
): ):
for working_horiz_pos in range( # render relation between this token and its head
2 * horiz_line_lens[working_token_ind] - 2, -1, -1 first_ind_in_rel = min(
): working_token_ind,
if ( cast(int, heads[working_token_ind]),
(
char_matrix[working_token_ind][working_horiz_pos]
== FULL_VERTICAL_LINE
)
and working_horiz_pos > 1
and char_matrix[working_token_ind][working_horiz_pos - 2] == SPACE
):
# Cross over the existing vertical line, which is owing to a non-projective tree
continue
if char_matrix[working_token_ind][working_horiz_pos] != SPACE:
# Draw the arrowhead to the right of what is already there
char_matrix[working_token_ind][working_horiz_pos + 1] = ARROWHEAD
break
if working_horiz_pos == 0:
# Draw the arrowhead at the boundary of the diagram
char_matrix[working_token_ind][working_horiz_pos] = ARROWHEAD
else:
# Fill in the horizontal line for the governed token
char_matrix[working_token_ind][
working_horiz_pos
] |= FULL_HORIZONTAL_LINE
if root_right:
return [
"".join(
ROOT_RIGHT_CHARS[char_matrix[vert_pos][horiz_pos]]
for horiz_pos in range((max_horiz_line_len * 2))
)
for vert_pos in range(sent.end - sent.start)
]
else:
return [
"".join(
ROOT_LEFT_CHARS[char_matrix[vert_pos][horiz_pos]]
for horiz_pos in range((max_horiz_line_len * 2))
)[::-1]
for vert_pos in range(sent.end - sent.start)
]
def render(
self,
doc: Doc,
cols: List[AttributeFormat],
spacing: int = 2,
start_i: int = 0,
length: Optional[int] = None,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
) -> str:
"""Renders a document as a table.
TODO: specify a specific portion of the document to display.
cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
"""
return_str = ""
if search_attr_name is not None and search_attr_value is not None:
adj_start_i = get_adjusted_start_i(
doc, start_i, cols, search_attr_name, search_attr_value
) )
else: second_ind_in_rel = max(
adj_start_i = start_i working_token_ind,
if adj_start_i >= len(doc): cast(int, heads[working_token_ind]),
return return_str )
end_i = len(doc) - 1 # If this token has children, they will already have been rendered.
if length is not None: # The line needs to be one character longer than the longest of the
end_i = min(end_i, adj_start_i + length) # children's lines.
elif start_i > 0 or ( if len(children_lists[working_token_ind]) > 0:
search_attr_name is not None and search_attr_value is not None horiz_line_lens[working_token_ind] = (
): max([horiz_line_lens[i] for i in children_lists[working_token_ind]])
end_i = adj_start_i + 1
adj_start_i = doc[adj_start_i].sent.start )
end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols):
tree_right = self.render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols):
tree_left = self.render_dep_tree(sent, False)
widths = []
for col in cols:
# get the values without any color codes
if col.attribute == "tree_left":
width = len(tree_left[0]) # type: ignore
elif col.attribute == "tree_right":
width = len(tree_right[0]) # type: ignore
else:
if len(sent) > 0:
width = max(
len(col.render(token, ignore_colors=True)) for token in sent
)
else:
width = 0
if col.max_width is not None:
width = min(width, col.max_width)
width = max(width, len(col.name))
widths.append(width)
data: List[List[str]] = []
for token_index, token in enumerate(sent):
inner_data: List[str] = []
for col_index, col in enumerate(cols):
if col.attribute == "tree_right":
inner_data.append(tree_right[token_index])
elif col.attribute == "tree_left":
inner_data.append(tree_left[token_index])
else:
inner_data.append(
col.render(token, right_pad_to_len=widths[col_index])
)
data.append(inner_data)
header: Optional[List[str]]
if len([1 for c in cols if len(c.name) > 0]) > 0:
header = [c.name for c in cols]
else: else:
header = None horiz_line_lens[working_token_ind] = 1
aligns = [c.aligns for c in cols] for inbetween_ind in (
fg_colors = [c.fg_color for c in cols] i
bg_colors = [c.bg_color for c in cols] for i in range(first_ind_in_rel + 1, second_ind_in_rel)
return_str += ( if horiz_line_lens[i] != 0
wasabi.table( ):
data, alt_ind: int
header=header, if (
divider=True, inbetween_ind in children_lists[cast(int, heads[working_token_ind])]
aligns=aligns, and inbetween_ind not in children_lists[working_token_ind]
widths=widths, ):
fg_colors=fg_colors, alt_ind = horiz_line_lens[inbetween_ind]
bg_colors=bg_colors, else:
spacing=spacing, alt_ind = horiz_line_lens[inbetween_ind] + 1
if alt_ind > horiz_line_lens[working_token_ind]:
horiz_line_lens[working_token_ind] = alt_ind
max_horiz_line_len = max(horiz_line_lens)
char_matrix = [
[SPACE] * max_horiz_line_len * 2 for _ in range(sent.start, sent.end)
]
for working_token_ind in range(sent.end - sent.start):
head_token_ind = heads[working_token_ind]
if head_token_ind is None:
continue
first_ind_in_rel = min(working_token_ind, head_token_ind)
second_ind_in_rel = max(working_token_ind, head_token_ind)
char_horiz_line_len = 2 * horiz_line_lens[working_token_ind]
# Draw the corners of the relation
char_matrix[first_ind_in_rel][char_horiz_line_len - 1] |= (
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
)
char_matrix[second_ind_in_rel][char_horiz_line_len - 1] |= (
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
)
# Draw the horizontal line for the governing token
for working_horiz_pos in range(char_horiz_line_len - 1):
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
char_matrix[head_token_ind][working_horiz_pos] |= FULL_HORIZONTAL_LINE
# Draw the vertical line for the relation
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
if (
char_matrix[working_vert_pos][char_horiz_line_len - 1]
!= FULL_HORIZONTAL_LINE
):
char_matrix[working_vert_pos][
char_horiz_line_len - 1
] |= FULL_VERTICAL_LINE
for working_token_ind in (
i for i in range(sent.end - sent.start) if heads[i] is not None
):
for working_horiz_pos in range(
2 * horiz_line_lens[working_token_ind] - 2, -1, -1
):
if (
(
char_matrix[working_token_ind][working_horiz_pos]
== FULL_VERTICAL_LINE
) )
+ "\n" and working_horiz_pos > 1
and char_matrix[working_token_ind][working_horiz_pos - 2] == SPACE
):
# Cross over the existing vertical line, which is owing to a non-projective tree
continue
if char_matrix[working_token_ind][working_horiz_pos] != SPACE:
# Draw the arrowhead to the right of what is already there
char_matrix[working_token_ind][working_horiz_pos + 1] = ARROWHEAD
break
if working_horiz_pos == 0:
# Draw the arrowhead at the boundary of the diagram
char_matrix[working_token_ind][working_horiz_pos] = ARROWHEAD
else:
# Fill in the horizontal line for the governed token
char_matrix[working_token_ind][
working_horiz_pos
] |= FULL_HORIZONTAL_LINE
if root_right:
return [
"".join(
ROOT_RIGHT_CHARS[char_matrix[vert_pos][horiz_pos]]
for horiz_pos in range((max_horiz_line_len * 2))
) )
for vert_pos in range(sent.end - sent.start)
]
else:
return [
"".join(
ROOT_LEFT_CHARS[char_matrix[vert_pos][horiz_pos]]
for horiz_pos in range((max_horiz_line_len * 2))
)[::-1]
for vert_pos in range(sent.end - sent.start)
]
def render_table(
doc,
cols: List[AttributeFormat],
spacing: int = 3,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
start_i: int = 0,
length: Optional[int] = None,
) -> str:
"""Renders a document as a table, allowing the caller to specify various
display options.
doc: the document.
cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
return_str = ""
if (
search_attr_name is not None
and search_attr_name not in ("tree_right", "tree_left")
and search_attr_value is not None
):
adj_start_i = _get_adjusted_start_i(
doc, start_i, cols, search_attr_name, search_attr_value
)
else:
adj_start_i = start_i
if adj_start_i >= len(doc):
return return_str return return_str
end_i = len(doc) - 1
if length is not None:
end_i = min(end_i, adj_start_i + length)
elif start_i > 0 or (
search_attr_name is not None and search_attr_value is not None
):
end_i = adj_start_i
adj_start_i = doc[adj_start_i].sent.start
end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols):
tree_right = render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols):
tree_left = render_dep_tree(sent, False)
widths = []
for col in cols:
# get the values without any color codes
if col.attribute == "tree_left":
width = len(tree_left[0]) # type: ignore
elif col.attribute == "tree_right":
width = len(tree_right[0]) # type: ignore
else:
if len(sent) > 0:
width = max(
len(col.render(token, ignore_colors=True)) for token in sent
)
else:
width = 0
if col.max_width is not None:
width = min(width, col.max_width)
width = max(width, len(col.name))
widths.append(width)
data: List[List[str]] = []
for token_index, token in enumerate(sent):
inner_data: List[str] = []
for col_index, col in enumerate(cols):
if col.attribute == "tree_right":
inner_data.append(tree_right[token_index])
elif col.attribute == "tree_left":
inner_data.append(tree_left[token_index])
else:
inner_data.append(
col.render(token, right_pad_to_len=widths[col_index])
)
data.append(inner_data)
header: Optional[List[str]]
if len([1 for c in cols if len(c.name) > 0]) > 0:
header = [c.name for c in cols]
else:
header = None
aligns = [c.aligns for c in cols]
fg_colors = [c.fg_color for c in cols]
bg_colors = [c.bg_color for c in cols]
return_str += (
wasabi.table(
data,
header=header,
divider=True,
aligns=aligns,
widths=widths,
fg_colors=fg_colors,
bg_colors=bg_colors,
spacing=spacing,
)
+ "\n"
)
return return_str
def get_token_value(token: Token, attribute: str) -> str: def render_document(
doc,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
*,
start_i: int = 0,
length: Optional[int] = None,
) -> str:
"""Renders a document as a table using standard display options.
doc: the document.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
cols = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=4),
AttributeFormat("dep_", name="dep_"),
AttributeFormat("ent_type_", name="ent_type_"),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat("text", name="text", max_width=20),
AttributeFormat("lemma_", name="lemma_", max_width=20),
AttributeFormat("pos_", name="pos_"),
AttributeFormat("tag_", name="tag_"),
AttributeFormat("morph", name="morph_", max_width=60),
]
if search_attr_name is not None and search_attr_value is not None:
for col in cols:
if col.attribute == search_attr_name or col.name == search_attr_name:
col.value_dep_fg_colors[search_attr_value] = 1
return render_table(
doc=doc,
cols=cols,
spacing=3,
search_attr_name=search_attr_name,
search_attr_value=search_attr_value,
start_i=start_i,
length=length,
)
def _get_token_value(token, attribute: str) -> str:
""" """
Get value *token.x.y.z*. Get value *token.x.y.z*.
@ -422,11 +472,11 @@ def get_token_value(token: Token, attribute: str) -> str:
parts = attribute.split(".") parts = attribute.split(".")
for part in parts[:-1]: for part in parts[:-1]:
obj = getattr(obj, part) obj = getattr(obj, part)
return str(getattr(obj, parts[-1])) return str(getattr(obj, parts[-1])).strip()
def get_adjusted_start_i( def _get_adjusted_start_i(
doc: Doc, doc,
start_i: int, start_i: int,
cols: List[AttributeFormat], cols: List[AttributeFormat],
search_attr_name: str, search_attr_name: str,
@ -447,7 +497,7 @@ def get_adjusted_start_i(
for col in cols: for col in cols:
if col.name == search_attr_name or col.attribute == search_attr_name: if col.name == search_attr_name or col.attribute == search_attr_name:
for token in doc[start_i:]: for token in doc[start_i:]:
if get_token_value(token, col.attribute) == search_attr_value: if _get_token_value(token, col.attribute) == search_attr_value:
return token.i return token.i
else: else:
return len(doc) return len(doc)