First working version

This commit is contained in:
richardpaulhudson 2023-01-26 19:21:27 +01:00
parent 0ea623990e
commit 9243341f74
3 changed files with 432 additions and 340 deletions

View File

@ -1,6 +1,6 @@
import pytest
from wasabi.util import supports_ansi
from spacy.visualization import AttributeFormat, Visualizer
from spacy.visualization import AttributeFormat, render_dep_tree, render_table
from spacy.tokens import Span, Doc, Token
@ -45,7 +45,7 @@ def test_viz_dep_tree_basic(en_vocab):
heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
deps=["dep"] * 9,
)
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [
"<╗ ",
"<╣ ",
@ -57,7 +57,7 @@ def test_viz_dep_tree_basic(en_vocab):
"<══╣",
"<══╝",
]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [
" ╔>",
" ╠>",
@ -92,7 +92,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
deps=["dep"] * 12,
)
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], True)
dep_tree = render_dep_tree(doc[3 : len(doc)], True)
assert dep_tree == [
"<╗ ",
"<╣ ",
@ -104,7 +104,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
"<══╣",
"<══╝",
]
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], False)
dep_tree = render_dep_tree(doc[3 : len(doc)], False)
assert dep_tree == [
" ╔>",
" ╠>",
@ -120,7 +120,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
def test_viz_dep_tree_non_projective(horse_doc):
"""Test dependency tree display with a non-projective dependency."""
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], True)
dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], True)
assert dep_tree == [
"<╗ ",
"═╩═══╗",
@ -132,7 +132,7 @@ def test_viz_dep_tree_non_projective(horse_doc):
"═╝<╝ ║",
"<════╝",
]
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], False)
dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], False)
assert dep_tree == [
" ╔>",
"╔═══╩═",
@ -163,7 +163,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
heads=[5, 5, 0, 5, 5, None, 4, 5],
deps=["dep"] * 8,
)
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
dep_tree = render_dep_tree(doc[0 : len(doc)], True)
assert dep_tree == [
"═╗<╗",
" ║<╣",
@ -174,7 +174,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
"<╝ ║",
"<══╝",
]
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
dep_tree = render_dep_tree(doc[0 : len(doc)], False)
assert dep_tree == [
"╔>╔═",
"╠>║ ",
@ -190,7 +190,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
def test_viz_dep_tree_input_not_span(horse_doc):
"""Test dependency tree display behaviour when the input is not a Span."""
with pytest.raises(ValueError):
Visualizer.render_dep_tree(horse_doc[1:3], True)
render_dep_tree(horse_doc[1:3], True)
def test_viz_render_native_attributes(horse_doc):
@ -199,7 +199,10 @@ def test_viz_render_native_attributes(horse_doc):
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
with pytest.raises(AttributeError):
AttributeFormat("depp").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_left").render(horse_doc[2])
with pytest.raises(AttributeError):
AttributeFormat("tree_right").render(horse_doc[2])
def test_viz_render_colors(horse_doc):
assert (
@ -265,7 +268,7 @@ def test_viz_minimal_render_table_one_sentence(
AttributeFormat("ent_type_"),
]
assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip()
render_table(fully_featured_doc_one_sentence, formats, spacing=3).strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -295,7 +298,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
# headers
formats = [
@ -308,7 +311,7 @@ def test_viz_minimal_render_table_empty_text(
AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"),
]
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_viz_minimal_render_table_spacing(
@ -325,7 +328,7 @@ def test_viz_minimal_render_table_spacing(
AttributeFormat("ent_type_"),
]
assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip()
render_table(fully_featured_doc_one_sentence, formats, spacing=1).strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -356,8 +359,7 @@ def test_viz_minimal_render_table_two_sentences(
]
assert (
Visualizer()
.render(fully_featured_doc_two_sentences, formats, spacing=3)
render_table(fully_featured_doc_two_sentences, formats, spacing=3)
.strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
@ -401,7 +403,7 @@ def test_viz_rich_render_table_one_sentence(
),
]
assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
@ -429,7 +431,7 @@ def test_viz_rich_render_table_one_sentence(
),
]
assert (
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
if SUPPORTS_ANSI
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
@ -456,9 +458,9 @@ def test_viz_rich_render_table_two_sentences(
value_dep_bg_colors={"PERSON": 12},
),
]
print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
print(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
print(
repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
repr(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
)
target = (
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
@ -466,17 +468,17 @@ def test_viz_rich_render_table_two_sentences(
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
)
assert (
Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)
render_table(fully_featured_doc_two_sentences, formats, spacing=3)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
)
== target
@ -504,13 +506,13 @@ def test_viz_rich_render_table_start(
),
]
print(
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
)
print(
repr(
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
)
@ -521,13 +523,13 @@ def test_viz_rich_render_table_start(
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -538,7 +540,7 @@ def test_viz_rich_render_table_start(
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -549,7 +551,7 @@ def test_viz_rich_render_table_start(
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -559,7 +561,7 @@ def test_viz_rich_render_table_start(
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -571,7 +573,7 @@ def test_viz_rich_render_table_start(
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -581,7 +583,7 @@ def test_viz_rich_render_table_start(
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -591,7 +593,7 @@ def test_viz_rich_render_table_start(
== ""
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -601,7 +603,7 @@ def test_viz_rich_render_table_start(
== ""
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -611,7 +613,7 @@ def test_viz_rich_render_table_start(
== ""
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,
@ -650,25 +652,25 @@ def test_viz_rich_render_table_end(
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences, formats, spacing=3, length=3
)
== target
)
assert (
Visualizer().render(
render_table(
fully_featured_doc_two_sentences,
formats,
spacing=3,

View File

@ -38,6 +38,7 @@ from .underscore import Underscore, get_ext_args
from ._retokenize import Retokenizer
from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
from ..util import get_words_and_spaces
from ..visualization import render_document
DEF PADDING = 5
@ -1751,6 +1752,45 @@ cdef class Doc:
attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS)
return tuple(attrs)
def inspect(
self,
search_attr_name=None,
search_attr_value=None,
*,
start_i=0,
length=None
):
"""Prints a tabular representation of the document or part of the document.
If part of the document is specified using any of the four optional
parameters, the sentences surrounding that part of the document are rendered;
if none of the four optional parameters are specified, the whole document is
rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
print(
render_document(
self,
search_attr_name,
search_attr_value,
start_i=start_i,
length=length
)
)
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
cdef int i = token_by_char(tokens, length, start_char)

View File

@ -4,7 +4,6 @@ from re import search
from typing import Dict, List, Optional, Union, cast
import wasabi
from wasabi.util import supports_ansi
from spacy.tokens import Span, Token, Doc
SUPPORTS_ANSI = supports_ansi()
@ -57,7 +56,6 @@ class AttributeFormat:
max_width: Optional[int] = None,
fg_color: Optional[Union[str, int]] = None,
bg_color: Optional[Union[str, int]] = None,
permitted_vals: Optional[tuple] = None,
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
):
@ -78,13 +76,17 @@ class AttributeFormat:
self.max_width = max_width
self.fg_color = fg_color
self.bg_color = bg_color
self.value_dep_fg_colors = value_dep_fg_colors
self.value_dep_bg_colors = value_dep_bg_colors
self.value_dep_fg_colors = (
value_dep_fg_colors if value_dep_fg_colors is not None else {}
)
self.value_dep_bg_colors = (
value_dep_bg_colors if value_dep_bg_colors is not None else {}
)
self.printer = wasabi.Printer(no_print=True)
def render(
self,
token: Token,
token,
*,
right_pad_to_len: Optional[int] = None,
ignore_colors: bool = False,
@ -93,7 +95,7 @@ class AttributeFormat:
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
"""
value = get_token_value(token, self.attribute)
value = _get_token_value(token, self.attribute)
if self.max_width is not None:
value = value[: self.max_width]
fg_color = None
@ -103,18 +105,16 @@ class AttributeFormat:
else:
right_padding = ""
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
if self.value_dep_fg_colors is not None:
if len(self.value_dep_fg_colors) > 0:
fg_color = self.value_dep_fg_colors.get(value, None)
if self.value_dep_bg_colors is not None:
if len(self.value_dep_bg_colors) > 0:
bg_color = self.value_dep_bg_colors.get(value, None)
if fg_color is not None or bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
return value + right_padding
class Visualizer:
@staticmethod
def render_dep_tree(sent: Span, root_right: bool) -> List[str]:
def render_dep_tree(sent, root_right: bool) -> List[str]:
"""
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
dependency tree output for a given token has the same index within the output list of
@ -192,12 +192,7 @@ class Visualizer:
# children's lines.
if len(children_lists[working_token_ind]) > 0:
horiz_line_lens[working_token_ind] = (
max(
[
horiz_line_lens[i]
for i in children_lists[working_token_ind]
]
)
max([horiz_line_lens[i] for i in children_lists[working_token_ind]])
+ 1
)
else:
@ -209,8 +204,7 @@ class Visualizer:
):
alt_ind: int
if (
inbetween_ind
in children_lists[cast(int, heads[working_token_ind])]
inbetween_ind in children_lists[cast(int, heads[working_token_ind])]
and inbetween_ind not in children_lists[working_token_ind]
):
alt_ind = horiz_line_lens[inbetween_ind]
@ -241,9 +235,7 @@ class Visualizer:
# Draw the horizontal line for the governing token
for working_horiz_pos in range(char_horiz_line_len - 1):
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
char_matrix[head_token_ind][
working_horiz_pos
] |= FULL_HORIZONTAL_LINE
char_matrix[head_token_ind][working_horiz_pos] |= FULL_HORIZONTAL_LINE
# Draw the vertical line for the relation
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
@ -299,29 +291,25 @@ class Visualizer:
for vert_pos in range(sent.end - sent.start)
]
def render(
self,
doc: Doc,
def render_table(
doc,
cols: List[AttributeFormat],
spacing: int = 2,
start_i: int = 0,
length: Optional[int] = None,
spacing: int = 3,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
) -> str:
"""Renders a document as a table.
TODO: specify a specific portion of the document to display.
start_i: int = 0,
length: Optional[int] = None,
) -> str:
"""Renders a document as a table, allowing the caller to specify various
display options.
doc: the document.
cols: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
@ -332,10 +320,19 @@ class Visualizer:
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
return_str = ""
if search_attr_name is not None and search_attr_value is not None:
adj_start_i = get_adjusted_start_i(
if (
search_attr_name is not None
and search_attr_name not in ("tree_right", "tree_left")
and search_attr_value is not None
):
adj_start_i = _get_adjusted_start_i(
doc, start_i, cols, search_attr_name, search_attr_value
)
else:
@ -353,9 +350,9 @@ class Visualizer:
end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols):
tree_right = self.render_dep_tree(sent, True)
tree_right = render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols):
tree_left = self.render_dep_tree(sent, False)
tree_left = render_dep_tree(sent, False)
widths = []
for col in cols:
# get the values without any color codes
@ -411,7 +408,60 @@ class Visualizer:
return return_str
def get_token_value(token: Token, attribute: str) -> str:
def render_document(
doc,
search_attr_name: Optional[str] = None,
search_attr_value: Optional[str] = None,
*,
start_i: int = 0,
length: Optional[int] = None,
) -> str:
"""Renders a document as a table using standard display options.
doc: the document.
search_attr_name: the name of an attribute to search for in order to
determine where to start rendering, e.g. "lemma_",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
search_attr_value: the value of an attribute to search for in order to
determine where to start rendering, e.g. "be",
or *None* if no search is to be carried out. If either
of *search_attr_name* and *search_attr_value* is *None*,
the behaviour is as if both were *None*.
start_i: the token index at which to start searching, or at
whose sentence to start rendering. Default: 0.
length: the number of tokens after *start_i* at whose sentence
to stop rendering. If *None*, the rest of the
document is rendered.
"""
cols = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=4),
AttributeFormat("dep_", name="dep_"),
AttributeFormat("ent_type_", name="ent_type_"),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat("text", name="text", max_width=20),
AttributeFormat("lemma_", name="lemma_", max_width=20),
AttributeFormat("pos_", name="pos_"),
AttributeFormat("tag_", name="tag_"),
AttributeFormat("morph", name="morph_", max_width=60),
]
if search_attr_name is not None and search_attr_value is not None:
for col in cols:
if col.attribute == search_attr_name or col.name == search_attr_name:
col.value_dep_fg_colors[search_attr_value] = 1
return render_table(
doc=doc,
cols=cols,
spacing=3,
search_attr_name=search_attr_name,
search_attr_value=search_attr_value,
start_i=start_i,
length=length,
)
def _get_token_value(token, attribute: str) -> str:
"""
Get value *token.x.y.z*.
@ -422,11 +472,11 @@ def get_token_value(token: Token, attribute: str) -> str:
parts = attribute.split(".")
for part in parts[:-1]:
obj = getattr(obj, part)
return str(getattr(obj, parts[-1]))
return str(getattr(obj, parts[-1])).strip()
def get_adjusted_start_i(
doc: Doc,
def _get_adjusted_start_i(
doc,
start_i: int,
cols: List[AttributeFormat],
search_attr_name: str,
@ -447,7 +497,7 @@ def get_adjusted_start_i(
for col in cols:
if col.name == search_attr_name or col.attribute == search_attr_name:
for token in doc[start_i:]:
if get_token_value(token, col.attribute) == search_attr_value:
if _get_token_value(token, col.attribute) == search_attr_value:
return token.i
else:
return len(doc)