Add surrounding tokens functionality

This commit is contained in:
Richard Hudson 2021-12-23 16:13:40 +01:00
parent ed788c5def
commit e713aa0938
3 changed files with 408 additions and 55 deletions

View File

@ -48,7 +48,7 @@ install_requires =
preshed>=3.0.2,<3.1.0
thinc>=8.0.12,<8.1.0
blis>=0.4.0,<0.8.0
wasabi>=0.8.1,<1.1.0
wasabi>=0.9.0,<1.1.0
srsly>=2.4.1,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.5.0

View File

@ -227,6 +227,26 @@ def test_visualization_render_native_attribute_int(en_vocab):
assert AttributeFormat("head.i").render(doc[2]) == "3"
def test_visualization_render_native_attribute_int_with_right_padding(en_vocab):
doc = Doc(
en_vocab,
words=[
"I",
"saw",
"a",
"horse",
"yesterday",
"that",
"was",
"injured",
".",
],
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
deps=["dep"] * 9,
)
assert AttributeFormat("head.i").render(doc[2], right_pad_to_length=3) == "3 "
def test_visualization_render_native_attribute_str(en_vocab):
doc = Doc(
en_vocab,
@ -278,6 +298,64 @@ def test_visualization_render_colors(en_vocab):
)
def test_visualization_render_whole_row_colors(en_vocab):
doc = Doc(
en_vocab,
words=[
"I",
"saw",
"a",
"horse",
"yesterday",
"that",
"was",
"injured",
".",
],
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
deps=["dep"] * 9,
)
assert (
AttributeFormat(
"dep_",
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
if supports_ansi
else "dep"
)
def test_visualization_render_whole_row_colors_with_value_dependent_colors(en_vocab):
doc = Doc(
en_vocab,
words=[
"I",
"saw",
"a",
"horse",
"yesterday",
"that",
"was",
"injured",
".",
],
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
deps=["dep"] * 9,
)
assert (
AttributeFormat(
"dep_",
value_dependent_fg_colors={"dep": 2},
value_dependent_bg_colors={"dep": 11},
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
if supports_ansi
else "dep"
)
def test_visualization_render_colors_only_fg(en_vocab):
doc = Doc(
en_vocab,
@ -470,7 +548,9 @@ def test_visualization_minimal_render_table_one_sentence(
AttributeFormat("ent_type_"),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
Visualizer()
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
.strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -499,7 +579,7 @@ def test_visualization_minimal_render_table_empty_text_no_headers(
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_visualization_minimal_render_table_empty_text_headers(
@ -515,7 +595,7 @@ def test_visualization_minimal_render_table_empty_text_headers(
AttributeFormat("morph"),
AttributeFormat("ent_type_", name="ent"),
]
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
def test_visualization_minimal_render_table_permitted_values(
@ -532,7 +612,9 @@ def test_visualization_minimal_render_table_permitted_values(
AttributeFormat("ent_type_"),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
Visualizer()
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
.strip()
== """
> poss Sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's PART POS Poss=yes
@ -595,7 +677,9 @@ def test_visualization_minimal_render_table_two_sentences(
]
assert (
Visualizer().render_table(fully_featured_doc_two_sentences, formats).strip()
Visualizer()
.render_table(fully_featured_doc_two_sentences, formats, spacing=3)
.strip()
== """
> poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
> case 's 's PART POS Poss=yes
@ -638,10 +722,41 @@ def test_visualization_rich_render_table_one_sentence(
),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if supports_ansi
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n"
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
)
def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than_maximum(
fully_featured_doc_one_sentence,
):
formats = [
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
AttributeFormat("dep_", name="dep", fg_color=2),
AttributeFormat("i", name="index", aligns="r"),
AttributeFormat(
"text",
name="text",
fg_color=196,
value_dependent_fg_colors={"'s": 50},
value_dependent_bg_colors={"'s": 12},
),
AttributeFormat("lemma_", name="lemma"),
AttributeFormat("pos_", name="pos", fg_color=100),
AttributeFormat("tag_", name="tag", fg_color=100),
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
AttributeFormat(
"ent_type_",
name="ent",
),
]
assert (
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
if supports_ansi
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
)
@ -666,10 +781,10 @@ def test_visualization_rich_render_table_two_sentences(
),
]
assert (
Visualizer().render_table(fully_featured_doc_two_sentences, formats)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
Visualizer().render_table(fully_featured_doc_two_sentences, formats, spacing=3)
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
if supports_ansi
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- ----- --- --------------- ---\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she PRON PRP Case=Nom|Gender \n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love VERB VBD Tense=Past|Verb \n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it PRON PRP Case=Acc|Gender \n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . PUNCT . PunctType=peri \n\n"
)
@ -735,15 +850,18 @@ def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
]
search_attributes = [AttributeFormat("ent_type_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
)
@ -768,12 +886,48 @@ def test_visualization_minimal_render_instances_two_sentences_value_non_grouping
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
def test_visualization_minimal_render_instances_two_sentences_missing_value_non_grouping(
def test_visualization_minimal_render_instances_two_sentences_value_surrounding_sentences_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_"),
AttributeFormat("text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=2,
surrounding_tokens_fg_color=11,
surrounding_tokens_bg_color=None,
)
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;11mcase\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;11mnsubj\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n"
if supports_ansi
else "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \n"
)
def test_visualization_render_instances_two_sentences_missing_value_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
@ -794,12 +948,46 @@ def test_visualization_minimal_render_instances_two_sentences_missing_value_non_
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\ndep text \n--- ---- \n"
)
def test_visualization_minimal_render_instances_two_sentences_type_grouping(
def test_visualization_render_instances_two_sentences_missing_value_surrounding_sentences_non_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_", name="dep"),
AttributeFormat("text", name="text"),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=False,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\ndep text \n--- ---- \n"
)
def test_visualization_render_instances_two_sentences_type_grouping(
fully_featured_doc_two_sentences,
):
display_columns = [
@ -820,12 +1008,16 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping(
search_attributes=search_attributes,
display_columns=display_columns,
group=True,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
def test_visualization_minimal_render_instances_two_sentences_type_grouping_colors(
def test_visualization_render_instances_two_sentences_type_grouping_colors(
fully_featured_doc_two_sentences,
):
display_columns = [
@ -846,8 +1038,44 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping_colo
search_attributes=search_attributes,
display_columns=display_columns,
group=True,
spacing=3,
surrounding_tokens_height=0,
surrounding_tokens_fg_color=None,
surrounding_tokens_bg_color=None,
)
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
if supports_ansi
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
)
def test_visualization_render_instances_two_sentences_type_grouping_colors_with_surrounding_sentences(
fully_featured_doc_two_sentences,
):
display_columns = [
AttributeFormat("dep_", fg_color=20),
AttributeFormat("text", bg_color=30),
AttributeFormat("lemma_"),
AttributeFormat("pos_"),
AttributeFormat("tag_"),
AttributeFormat("morph"),
AttributeFormat("ent_type_"),
]
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
assert (
Visualizer().render_instances(
fully_featured_doc_two_sentences,
search_attributes=search_attributes,
display_columns=display_columns,
group=True,
spacing=3,
surrounding_tokens_height=3,
surrounding_tokens_fg_color=11,
surrounding_tokens_bg_color=None,
)
== "\n\x1b[38;5;20m\x1b[38;5;11mcompound\x1b[0m\x1b[0m \x1b[48;5;30m\x1b[38;5;11mSilicon\x1b[0m\x1b[0m \x1b[38;5;11msilicon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mValley\x1b[0m \x1b[0m \x1b[38;5;11mvalley\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mShe\x1b[0m \x1b[0m \x1b[38;5;11mshe\x1b[0m \x1b[38;5;11mPRON\x1b[0m \x1b[38;5;11mPRP\x1b[0m \x1b[38;5;11mCase=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mloved\x1b[0m \x1b[0m \x1b[38;5;11mlove\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=Past|VerbForm=Fin\x1b[0m \n\n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mto\x1b[0m \x1b[0m \x1b[38;5;11mto\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mLondon\x1b[0m \x1b[0m \x1b[38;5;11mlondon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;20m\x1b[38;5;11mcase\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m's\x1b[0m \x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n"
if supports_ansi
else "\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \nnsubj She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \nROOT loved love VERB VBD Tense=Past|VerbForm=Fin \n\nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \nprep to to ADP IN \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \n"
)

View File

@ -1,9 +1,7 @@
from os import linesep, truncate
from os import linesep
from typing import Union
import wasabi
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
from spacy.tokens import Span, Token, Doc
from spacy.util import working_dir
SPACE = 0
@ -42,6 +40,10 @@ ROOT_LEFT_CHARS = {
class AttributeFormat:
"""
Instructions for rendering information about a token property, e.g. lemma_, ent_type_.
"""
def __init__(
self,
attribute: str,
@ -55,6 +57,20 @@ class AttributeFormat:
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
):
"""
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma
name: the name to display e.g. in column headers
aligns: where appropriate the column alignment 'l' (left,
default), 'r' (right) or 'c' (center).
max_width: a maximum width to which values of the attribute should be truncated.
fg_color: the foreground color that should be used to display instances of the attribute
bg_color: the background color that should be used to display instances of the attribute
permitted_values: a tuple of values of the attribute that should be displayed. If
permitted_values is not None and a value of the attribute is not
in permitted_values, the empty string is rendered instead of the value.
value_dependent_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
value_dependent_bg_colors: a dictionary from values to background colors that should be used to display those values.
"""
self.attribute = attribute
self.name = name
self.aligns = aligns
@ -70,9 +86,19 @@ class AttributeFormat:
self,
token: Token,
*,
right_pad_to_length: int = None,
ignore_colors: bool = False,
render_all_colors_within_values: bool = False,
whole_row_fg_color: Union[int, str] = None,
whole_row_bg_color: Union[int, str] = None,
) -> str:
"""
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
render_all_colors_within_values: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi.
This argument is set to True when rendering a text to signal that colors should be rendered here.
whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors.
whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_fg_colors.
"""
obj = token
parts = self.attribute.split(".")
for part in parts[:-1]:
@ -86,18 +112,26 @@ class AttributeFormat:
value = value[: self.max_width]
fg_color = None
bg_color = None
if right_pad_to_length is not None:
right_padding = " " * (right_pad_to_length - len(value))
else:
right_padding = ""
if not ignore_colors and len(value) > 0:
if self.value_dependent_fg_colors is not None:
if whole_row_fg_color is not None:
fg_color = whole_row_fg_color
elif self.value_dependent_fg_colors is not None:
fg_color = self.value_dependent_fg_colors.get(value, None)
if fg_color is None and render_all_colors_within_values:
fg_color = self.fg_color
if self.value_dependent_bg_colors is not None:
bg_color = self.value_dependent_bg_colors.get(value, None)
if bg_color is None and render_all_colors_within_values:
if whole_row_bg_color is not None:
bg_color = whole_row_bg_color
elif bg_color is None and render_all_colors_within_values:
bg_color = self.bg_color
if fg_color is not None or bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
return value
return value + right_padding
class Visualizer:
@ -111,7 +145,7 @@ class Visualizer:
root_right: True if the tree should be rendered with the root on the right-hand side,
False if the tree should be rendered with the root on the left-hand side.
Adapted from https://github.com/KoichiYasuoka/deplacy
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
"""
# Check sent is really a sentence
@ -328,8 +362,17 @@ class Visualizer:
]
def render_table(
self, doc: Doc, columns: list[AttributeFormat], spacing: int = 3
self, doc: Doc, columns: list[AttributeFormat], spacing: int
) -> str:
"""Renders a document as a table.
TODO: specify a specific portion of the document to display.
columns: the attribute formats of the columns to display.
tree_right and tree_left are magic values for the
attributes that render dependency trees where the
roots are on the left or right respectively.
spacing: the number of spaces between each column in the table.
"""
return_string = ""
for sent in doc.sents:
if "tree_right" in (c.attribute for c in columns):
@ -346,7 +389,8 @@ class Visualizer:
else:
if len(sent) > 0:
width = max(
len(column.render(token, ignore_colors=True)) for token in sent
len(column.render(token, ignore_colors=True))
for token in sent
)
else:
width = 0
@ -360,7 +404,7 @@ class Visualizer:
if column.attribute == "tree_right"
else tree_left[token_index]
if column.attribute == "tree_left"
else column.render(token)
else column.render(token, right_pad_to_length=widths[column_index])
for column_index, column in enumerate(columns)
]
for token_index, token in enumerate(sent)
@ -388,6 +432,10 @@ class Visualizer:
return return_string
def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
"""Renders a text interspersed with attribute labels.
TODO: specify a specific portion of the document to display.
"""
return_string = ""
text_attributes = [a for a in attributes if a.attribute == "text"]
text_attribute = (
@ -417,8 +465,28 @@ class Visualizer:
search_attributes: list[AttributeFormat],
display_columns: list[AttributeFormat],
group: bool,
spacing: int = 3,
spacing: int,
surrounding_tokens_height: int,
surrounding_tokens_fg_color: Union[str, int],
surrounding_tokens_bg_color: Union[str, int],
) -> str:
"""Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'.
TODO: specify a specific portion of the document to display.
search_attributes: the attribute(s) or attribute value(s) that cause a row to be displayed for a token.
display_columns: the attributes that should be displayed in each row.
group: True if the rows should be ordered by the search attribute values,
False if they should retain their in-document order.
spacing: the number of spaces between each column.
surrounding_tokens_height: a number of rows that should be displayed with information about tokens
before and after matched tokens. Consecutive matching tokens, e.g.
tokens belonging to the same named entity, are rendered together as a single group.
surrounding_tokens_fg_color: a foreground color to use for surrounding token rows.
surrounding_tokens_bg_color: a background color to use for surrounding token rows.
Note that if surrounding_tokens_bg_color is None, any background color defined for the attribute
will be used instead, which is unlikely to be the desired result.
"""
def filter(token: Token) -> bool:
for attribute in search_attributes:
value = attribute.render(token, ignore_colors=True)
@ -426,20 +494,22 @@ class Visualizer:
return False
return True
tokens = [token for token in doc if filter(token)]
if group:
tokens.sort(
key=(
lambda token: [attribute.render(token, ignore_colors=True)
for attribute in search_attributes]
)
matched_tokens = [token for token in doc if filter(token)]
tokens_to_display_indices = [
index
for token in matched_tokens
for index in range(
token.i - surrounding_tokens_height,
token.i + surrounding_tokens_height + 1,
)
if index >= 0 and index < len(doc)
]
widths = []
for column in display_columns:
if len(tokens) > 0:
if len(tokens_to_display_indices) > 0:
width = max(
len(column.render(token, ignore_colors=True)) for token in tokens
len(column.render(doc[i], ignore_colors=True))
for i in tokens_to_display_indices
)
else:
width = 0
@ -447,22 +517,77 @@ class Visualizer:
width = min(width, column.max_width)
width = max(width, len(column.name))
widths.append(width)
data = [
[
column.render(token)
for column_index, column in enumerate(display_columns)
]
for token in tokens
]
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
header = [c.name for c in display_columns]
if group:
matched_tokens.sort(
key=(
lambda token: [
attribute.render(token, ignore_colors=True)
for attribute in search_attributes
]
)
)
rows = []
token_index_to_display = -1
for matched_token_index, matched_token in enumerate(matched_tokens):
if surrounding_tokens_height > 0:
surrounding_start_index = max(
0, matched_token.i - surrounding_tokens_height
)
if token_index_to_display + 1 == matched_token.i:
surrounding_start_index = token_index_to_display + 1
surrounding_end_index = min(
len(doc), matched_token.i + surrounding_tokens_height + 1
)
if (
matched_token_index + 1 < len(matched_tokens)
and matched_token.i + 1 == matched_tokens[matched_token_index + 1].i
):
surrounding_end_index = matched_token.i + 1
else:
header = None
aligns = [c.aligns for c in display_columns]
fg_colors = [c.fg_color for c in display_columns]
bg_colors = [c.bg_color for c in display_columns]
surrounding_start_index = matched_token.i
surrounding_end_index = surrounding_start_index + 1
for token_index_to_display in range(
surrounding_start_index, surrounding_end_index
):
if token_index_to_display == matched_token.i:
rows.append(
[
column.render(
matched_token,
right_pad_to_length=widths[column_index],
)
for column_index, column in enumerate(display_columns)
]
)
else:
rows.append(
[
column.render(
doc[token_index_to_display],
whole_row_fg_color=surrounding_tokens_fg_color,
whole_row_bg_color=surrounding_tokens_bg_color,
right_pad_to_length=widths[column_index],
)
for column_index, column in enumerate(display_columns)
]
)
if (
matched_token_index + 1 < len(matched_tokens)
and token_index_to_display + 1
!= matched_tokens[matched_token_index + 1].i
):
rows.append([])
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
header = [c.name for c in display_columns]
else:
header = None
aligns = [c.aligns for c in display_columns]
fg_colors = [c.fg_color for c in display_columns]
bg_colors = [c.bg_color for c in display_columns]
return wasabi.table(
data,
rows,
header=header,
divider=True,
aligns=aligns,