mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Add surrounding tokens functionality
This commit is contained in:
parent
ed788c5def
commit
e713aa0938
|
@ -48,7 +48,7 @@ install_requires =
|
||||||
preshed>=3.0.2,<3.1.0
|
preshed>=3.0.2,<3.1.0
|
||||||
thinc>=8.0.12,<8.1.0
|
thinc>=8.0.12,<8.1.0
|
||||||
blis>=0.4.0,<0.8.0
|
blis>=0.4.0,<0.8.0
|
||||||
wasabi>=0.8.1,<1.1.0
|
wasabi>=0.9.0,<1.1.0
|
||||||
srsly>=2.4.1,<3.0.0
|
srsly>=2.4.1,<3.0.0
|
||||||
catalogue>=2.0.6,<2.1.0
|
catalogue>=2.0.6,<2.1.0
|
||||||
typer>=0.3.0,<0.5.0
|
typer>=0.3.0,<0.5.0
|
||||||
|
|
|
@ -227,6 +227,26 @@ def test_visualization_render_native_attribute_int(en_vocab):
|
||||||
assert AttributeFormat("head.i").render(doc[2]) == "3"
|
assert AttributeFormat("head.i").render(doc[2]) == "3"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_native_attribute_int_with_right_padding(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
assert AttributeFormat("head.i").render(doc[2], right_pad_to_length=3) == "3 "
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_native_attribute_str(en_vocab):
|
def test_visualization_render_native_attribute_str(en_vocab):
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -278,6 +298,64 @@ def test_visualization_render_colors(en_vocab):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_whole_row_colors(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
AttributeFormat(
|
||||||
|
"dep_",
|
||||||
|
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
||||||
|
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
||||||
|
if supports_ansi
|
||||||
|
else "dep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_whole_row_colors_with_value_dependent_colors(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
AttributeFormat(
|
||||||
|
"dep_",
|
||||||
|
value_dependent_fg_colors={"dep": 2},
|
||||||
|
value_dependent_bg_colors={"dep": 11},
|
||||||
|
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
||||||
|
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
||||||
|
if supports_ansi
|
||||||
|
else "dep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_colors_only_fg(en_vocab):
|
def test_visualization_render_colors_only_fg(en_vocab):
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -470,7 +548,9 @@ def test_visualization_minimal_render_table_one_sentence(
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
|
Visualizer()
|
||||||
|
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
|
.strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
║ ╚> case 's 's PART POS Poss=yes
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
@ -499,7 +579,7 @@ def test_visualization_minimal_render_table_empty_text_no_headers(
|
||||||
AttributeFormat("morph"),
|
AttributeFormat("morph"),
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
|
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_empty_text_headers(
|
def test_visualization_minimal_render_table_empty_text_headers(
|
||||||
|
@ -515,7 +595,7 @@ def test_visualization_minimal_render_table_empty_text_headers(
|
||||||
AttributeFormat("morph"),
|
AttributeFormat("morph"),
|
||||||
AttributeFormat("ent_type_", name="ent"),
|
AttributeFormat("ent_type_", name="ent"),
|
||||||
]
|
]
|
||||||
assert Visualizer().render_table(Doc(en_vocab), formats).strip() == ""
|
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_permitted_values(
|
def test_visualization_minimal_render_table_permitted_values(
|
||||||
|
@ -532,7 +612,9 @@ def test_visualization_minimal_render_table_permitted_values(
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
|
Visualizer()
|
||||||
|
.render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
|
.strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
║ ╚> case 's PART POS Poss=yes
|
║ ╚> case 's PART POS Poss=yes
|
||||||
|
@ -595,7 +677,9 @@ def test_visualization_minimal_render_table_two_sentences(
|
||||||
]
|
]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_table(fully_featured_doc_two_sentences, formats).strip()
|
Visualizer()
|
||||||
|
.render_table(fully_featured_doc_two_sentences, formats, spacing=3)
|
||||||
|
.strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
║ ╚> case 's 's PART POS Poss=yes
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
@ -638,10 +722,41 @@ def test_visualization_rich_render_table_one_sentence(
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_table(fully_featured_doc_one_sentence, formats)
|
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
if supports_ansi
|
if supports_ansi
|
||||||
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n"
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than_maximum(
|
||||||
|
fully_featured_doc_one_sentence,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
|
||||||
|
AttributeFormat("dep_", name="dep", fg_color=2),
|
||||||
|
AttributeFormat("i", name="index", aligns="r"),
|
||||||
|
AttributeFormat(
|
||||||
|
"text",
|
||||||
|
name="text",
|
||||||
|
fg_color=196,
|
||||||
|
value_dependent_fg_colors={"'s": 50},
|
||||||
|
value_dependent_bg_colors={"'s": 12},
|
||||||
|
),
|
||||||
|
AttributeFormat("lemma_", name="lemma"),
|
||||||
|
AttributeFormat("pos_", name="pos", fg_color=100),
|
||||||
|
AttributeFormat("tag_", name="tag", fg_color=100),
|
||||||
|
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
|
||||||
|
AttributeFormat(
|
||||||
|
"ent_type_",
|
||||||
|
name="ent",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
Visualizer().render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
||||||
|
if supports_ansi
|
||||||
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -666,10 +781,10 @@ def test_visualization_rich_render_table_two_sentences(
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_table(fully_featured_doc_two_sentences, formats)
|
Visualizer().render_table(fully_featured_doc_two_sentences, formats, spacing=3)
|
||||||
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
if supports_ansi
|
if supports_ansi
|
||||||
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- ----- --- --------------- ---\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she PRON PRP Case=Nom|Gender \n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love VERB VBD Tense=Past|Verb \n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it PRON PRP Case=Acc|Gender \n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . PUNCT . PunctType=peri \n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -735,15 +850,18 @@ def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
|
||||||
]
|
]
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_")]
|
search_attributes = [AttributeFormat("ent_type_")]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attributes=search_attributes,
|
||||||
display_columns=display_columns,
|
display_columns=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
)
|
)
|
||||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
|
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -768,12 +886,48 @@ def test_visualization_minimal_render_instances_two_sentences_value_non_grouping
|
||||||
search_attributes=search_attributes,
|
search_attributes=search_attributes,
|
||||||
display_columns=display_columns,
|
display_columns=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
)
|
)
|
||||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_instances_two_sentences_missing_value_non_grouping(
|
def test_visualization_minimal_render_instances_two_sentences_value_surrounding_sentences_non_grouping(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
display_columns = [
|
||||||
|
AttributeFormat("dep_"),
|
||||||
|
AttributeFormat("text"),
|
||||||
|
AttributeFormat("lemma_"),
|
||||||
|
AttributeFormat("pos_"),
|
||||||
|
AttributeFormat("tag_"),
|
||||||
|
AttributeFormat("morph"),
|
||||||
|
AttributeFormat("ent_type_"),
|
||||||
|
]
|
||||||
|
|
||||||
|
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().render_instances(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
search_attributes=search_attributes,
|
||||||
|
display_columns=display_columns,
|
||||||
|
group=False,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=2,
|
||||||
|
surrounding_tokens_fg_color=11,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
|
)
|
||||||
|
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;11mcase\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;11mnsubj\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n"
|
||||||
|
if supports_ansi
|
||||||
|
else "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_instances_two_sentences_missing_value_non_grouping(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
display_columns = [
|
display_columns = [
|
||||||
|
@ -794,12 +948,46 @@ def test_visualization_minimal_render_instances_two_sentences_missing_value_non_
|
||||||
search_attributes=search_attributes,
|
search_attributes=search_attributes,
|
||||||
display_columns=display_columns,
|
display_columns=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
)
|
)
|
||||||
== "\ndep text \n--- ---- \n"
|
== "\ndep text \n--- ---- \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_instances_two_sentences_type_grouping(
|
def test_visualization_render_instances_two_sentences_missing_value_surrounding_sentences_non_grouping(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
display_columns = [
|
||||||
|
AttributeFormat("dep_", name="dep"),
|
||||||
|
AttributeFormat("text", name="text"),
|
||||||
|
AttributeFormat("lemma_"),
|
||||||
|
AttributeFormat("pos_"),
|
||||||
|
AttributeFormat("tag_"),
|
||||||
|
AttributeFormat("morph"),
|
||||||
|
AttributeFormat("ent_type_"),
|
||||||
|
]
|
||||||
|
|
||||||
|
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().render_instances(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
search_attributes=search_attributes,
|
||||||
|
display_columns=display_columns,
|
||||||
|
group=False,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
|
)
|
||||||
|
== "\ndep text \n--- ---- \n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_instances_two_sentences_type_grouping(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
display_columns = [
|
display_columns = [
|
||||||
|
@ -820,12 +1008,16 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping(
|
||||||
search_attributes=search_attributes,
|
search_attributes=search_attributes,
|
||||||
display_columns=display_columns,
|
display_columns=display_columns,
|
||||||
group=True,
|
group=True,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
)
|
)
|
||||||
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_instances_two_sentences_type_grouping_colors(
|
def test_visualization_render_instances_two_sentences_type_grouping_colors(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
display_columns = [
|
display_columns = [
|
||||||
|
@ -846,8 +1038,44 @@ def test_visualization_minimal_render_instances_two_sentences_type_grouping_colo
|
||||||
search_attributes=search_attributes,
|
search_attributes=search_attributes,
|
||||||
display_columns=display_columns,
|
display_columns=display_columns,
|
||||||
group=True,
|
group=True,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=0,
|
||||||
|
surrounding_tokens_fg_color=None,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
)
|
)
|
||||||
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
== "\n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
if supports_ansi
|
if supports_ansi
|
||||||
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_render_instances_two_sentences_type_grouping_colors_with_surrounding_sentences(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
display_columns = [
|
||||||
|
AttributeFormat("dep_", fg_color=20),
|
||||||
|
AttributeFormat("text", bg_color=30),
|
||||||
|
AttributeFormat("lemma_"),
|
||||||
|
AttributeFormat("pos_"),
|
||||||
|
AttributeFormat("tag_"),
|
||||||
|
AttributeFormat("morph"),
|
||||||
|
AttributeFormat("ent_type_"),
|
||||||
|
]
|
||||||
|
|
||||||
|
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().render_instances(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
search_attributes=search_attributes,
|
||||||
|
display_columns=display_columns,
|
||||||
|
group=True,
|
||||||
|
spacing=3,
|
||||||
|
surrounding_tokens_height=3,
|
||||||
|
surrounding_tokens_fg_color=11,
|
||||||
|
surrounding_tokens_bg_color=None,
|
||||||
|
)
|
||||||
|
== "\n\x1b[38;5;20m\x1b[38;5;11mcompound\x1b[0m\x1b[0m \x1b[48;5;30m\x1b[38;5;11mSilicon\x1b[0m\x1b[0m \x1b[38;5;11msilicon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mValley\x1b[0m \x1b[0m \x1b[38;5;11mvalley\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mShe\x1b[0m \x1b[0m \x1b[38;5;11mshe\x1b[0m \x1b[38;5;11mPRON\x1b[0m \x1b[38;5;11mPRP\x1b[0m \x1b[38;5;11mCase=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mloved\x1b[0m \x1b[0m \x1b[38;5;11mlove\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=Past|VerbForm=Fin\x1b[0m \n\n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mto\x1b[0m \x1b[0m \x1b[38;5;11mto\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mLondon\x1b[0m \x1b[0m \x1b[38;5;11mlondon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;20m\x1b[38;5;11mcase\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m's\x1b[0m \x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n"
|
||||||
|
if supports_ansi
|
||||||
|
else "\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \nnsubj She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \nROOT loved love VERB VBD Tense=Past|VerbForm=Fin \n\nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \nprep to to ADP IN \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \n"
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,9 +1,7 @@
|
||||||
from os import linesep, truncate
|
from os import linesep
|
||||||
from typing import Union
|
from typing import Union
|
||||||
import wasabi
|
import wasabi
|
||||||
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
|
|
||||||
from spacy.tokens import Span, Token, Doc
|
from spacy.tokens import Span, Token, Doc
|
||||||
from spacy.util import working_dir
|
|
||||||
|
|
||||||
|
|
||||||
SPACE = 0
|
SPACE = 0
|
||||||
|
@ -42,6 +40,10 @@ ROOT_LEFT_CHARS = {
|
||||||
|
|
||||||
|
|
||||||
class AttributeFormat:
|
class AttributeFormat:
|
||||||
|
"""
|
||||||
|
Instructions for rendering information about a token property, e.g. lemma_, ent_type_.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
attribute: str,
|
attribute: str,
|
||||||
|
@ -55,6 +57,20 @@ class AttributeFormat:
|
||||||
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
|
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
|
||||||
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
|
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma
|
||||||
|
name: the name to display e.g. in column headers
|
||||||
|
aligns: where appropriate the column alignment 'l' (left,
|
||||||
|
default), 'r' (right) or 'c' (center).
|
||||||
|
max_width: a maximum width to which values of the attribute should be truncated.
|
||||||
|
fg_color: the foreground color that should be used to display instances of the attribute
|
||||||
|
bg_color: the background color that should be used to display instances of the attribute
|
||||||
|
permitted_values: a tuple of values of the attribute that should be displayed. If
|
||||||
|
permitted_values is not None and a value of the attribute is not
|
||||||
|
in permitted_values, the empty string is rendered instead of the value.
|
||||||
|
value_dependent_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
|
||||||
|
value_dependent_bg_colors: a dictionary from values to background colors that should be used to display those values.
|
||||||
|
"""
|
||||||
self.attribute = attribute
|
self.attribute = attribute
|
||||||
self.name = name
|
self.name = name
|
||||||
self.aligns = aligns
|
self.aligns = aligns
|
||||||
|
@ -70,9 +86,19 @@ class AttributeFormat:
|
||||||
self,
|
self,
|
||||||
token: Token,
|
token: Token,
|
||||||
*,
|
*,
|
||||||
|
right_pad_to_length: int = None,
|
||||||
ignore_colors: bool = False,
|
ignore_colors: bool = False,
|
||||||
render_all_colors_within_values: bool = False,
|
render_all_colors_within_values: bool = False,
|
||||||
|
whole_row_fg_color: Union[int, str] = None,
|
||||||
|
whole_row_bg_color: Union[int, str] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""
|
||||||
|
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
||||||
|
render_all_colors_within_values: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi.
|
||||||
|
This argument is set to True when rendering a text to signal that colors should be rendered here.
|
||||||
|
whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors.
|
||||||
|
whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_fg_colors.
|
||||||
|
"""
|
||||||
obj = token
|
obj = token
|
||||||
parts = self.attribute.split(".")
|
parts = self.attribute.split(".")
|
||||||
for part in parts[:-1]:
|
for part in parts[:-1]:
|
||||||
|
@ -86,18 +112,26 @@ class AttributeFormat:
|
||||||
value = value[: self.max_width]
|
value = value[: self.max_width]
|
||||||
fg_color = None
|
fg_color = None
|
||||||
bg_color = None
|
bg_color = None
|
||||||
|
if right_pad_to_length is not None:
|
||||||
|
right_padding = " " * (right_pad_to_length - len(value))
|
||||||
|
else:
|
||||||
|
right_padding = ""
|
||||||
if not ignore_colors and len(value) > 0:
|
if not ignore_colors and len(value) > 0:
|
||||||
if self.value_dependent_fg_colors is not None:
|
if whole_row_fg_color is not None:
|
||||||
|
fg_color = whole_row_fg_color
|
||||||
|
elif self.value_dependent_fg_colors is not None:
|
||||||
fg_color = self.value_dependent_fg_colors.get(value, None)
|
fg_color = self.value_dependent_fg_colors.get(value, None)
|
||||||
if fg_color is None and render_all_colors_within_values:
|
if fg_color is None and render_all_colors_within_values:
|
||||||
fg_color = self.fg_color
|
fg_color = self.fg_color
|
||||||
if self.value_dependent_bg_colors is not None:
|
if self.value_dependent_bg_colors is not None:
|
||||||
bg_color = self.value_dependent_bg_colors.get(value, None)
|
bg_color = self.value_dependent_bg_colors.get(value, None)
|
||||||
if bg_color is None and render_all_colors_within_values:
|
if whole_row_bg_color is not None:
|
||||||
|
bg_color = whole_row_bg_color
|
||||||
|
elif bg_color is None and render_all_colors_within_values:
|
||||||
bg_color = self.bg_color
|
bg_color = self.bg_color
|
||||||
if fg_color is not None or bg_color is not None:
|
if fg_color is not None or bg_color is not None:
|
||||||
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
||||||
return value
|
return value + right_padding
|
||||||
|
|
||||||
|
|
||||||
class Visualizer:
|
class Visualizer:
|
||||||
|
@ -111,7 +145,7 @@ class Visualizer:
|
||||||
root_right: True if the tree should be rendered with the root on the right-hand side,
|
root_right: True if the tree should be rendered with the root on the right-hand side,
|
||||||
False if the tree should be rendered with the root on the left-hand side.
|
False if the tree should be rendered with the root on the left-hand side.
|
||||||
|
|
||||||
Adapted from https://github.com/KoichiYasuoka/deplacy
|
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check sent is really a sentence
|
# Check sent is really a sentence
|
||||||
|
@ -328,8 +362,17 @@ class Visualizer:
|
||||||
]
|
]
|
||||||
|
|
||||||
def render_table(
|
def render_table(
|
||||||
self, doc: Doc, columns: list[AttributeFormat], spacing: int = 3
|
self, doc: Doc, columns: list[AttributeFormat], spacing: int
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""Renders a document as a table.
|
||||||
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
|
columns: the attribute formats of the columns to display.
|
||||||
|
tree_right and tree_left are magic values for the
|
||||||
|
attributes that render dependency trees where the
|
||||||
|
roots are on the left or right respectively.
|
||||||
|
spacing: the number of spaces between each column in the table.
|
||||||
|
"""
|
||||||
return_string = ""
|
return_string = ""
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
if "tree_right" in (c.attribute for c in columns):
|
if "tree_right" in (c.attribute for c in columns):
|
||||||
|
@ -346,7 +389,8 @@ class Visualizer:
|
||||||
else:
|
else:
|
||||||
if len(sent) > 0:
|
if len(sent) > 0:
|
||||||
width = max(
|
width = max(
|
||||||
len(column.render(token, ignore_colors=True)) for token in sent
|
len(column.render(token, ignore_colors=True))
|
||||||
|
for token in sent
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
width = 0
|
width = 0
|
||||||
|
@ -360,7 +404,7 @@ class Visualizer:
|
||||||
if column.attribute == "tree_right"
|
if column.attribute == "tree_right"
|
||||||
else tree_left[token_index]
|
else tree_left[token_index]
|
||||||
if column.attribute == "tree_left"
|
if column.attribute == "tree_left"
|
||||||
else column.render(token)
|
else column.render(token, right_pad_to_length=widths[column_index])
|
||||||
for column_index, column in enumerate(columns)
|
for column_index, column in enumerate(columns)
|
||||||
]
|
]
|
||||||
for token_index, token in enumerate(sent)
|
for token_index, token in enumerate(sent)
|
||||||
|
@ -388,6 +432,10 @@ class Visualizer:
|
||||||
return return_string
|
return return_string
|
||||||
|
|
||||||
def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
|
def render_text(self, doc: Doc, attributes: list[AttributeFormat]) -> str:
|
||||||
|
"""Renders a text interspersed with attribute labels.
|
||||||
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
|
"""
|
||||||
return_string = ""
|
return_string = ""
|
||||||
text_attributes = [a for a in attributes if a.attribute == "text"]
|
text_attributes = [a for a in attributes if a.attribute == "text"]
|
||||||
text_attribute = (
|
text_attribute = (
|
||||||
|
@ -417,8 +465,28 @@ class Visualizer:
|
||||||
search_attributes: list[AttributeFormat],
|
search_attributes: list[AttributeFormat],
|
||||||
display_columns: list[AttributeFormat],
|
display_columns: list[AttributeFormat],
|
||||||
group: bool,
|
group: bool,
|
||||||
spacing: int = 3,
|
spacing: int,
|
||||||
|
surrounding_tokens_height: int,
|
||||||
|
surrounding_tokens_fg_color: Union[str, int],
|
||||||
|
surrounding_tokens_bg_color: Union[str, int],
|
||||||
) -> str:
|
) -> str:
|
||||||
|
"""Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'.
|
||||||
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
|
search_attributes: the attribute(s) or attribute value(s) that cause a row to be displayed for a token.
|
||||||
|
display_columns: the attributes that should be displayed in each row.
|
||||||
|
group: True if the rows should be ordered by the search attribute values,
|
||||||
|
False if they should retain their in-document order.
|
||||||
|
spacing: the number of spaces between each column.
|
||||||
|
surrounding_tokens_height: a number of rows that should be displayed with information about tokens
|
||||||
|
before and after matched tokens. Consecutive matching tokens, e.g.
|
||||||
|
tokens belonging to the same named entity, are rendered together as a single group.
|
||||||
|
surrounding_tokens_fg_color: a foreground color to use for surrounding token rows.
|
||||||
|
surrounding_tokens_bg_color: a background color to use for surrounding token rows.
|
||||||
|
Note that if surrounding_tokens_bg_color is None, any background color defined for the attribute
|
||||||
|
will be used instead, which is unlikely to be the desired result.
|
||||||
|
"""
|
||||||
|
|
||||||
def filter(token: Token) -> bool:
|
def filter(token: Token) -> bool:
|
||||||
for attribute in search_attributes:
|
for attribute in search_attributes:
|
||||||
value = attribute.render(token, ignore_colors=True)
|
value = attribute.render(token, ignore_colors=True)
|
||||||
|
@ -426,20 +494,22 @@ class Visualizer:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
tokens = [token for token in doc if filter(token)]
|
matched_tokens = [token for token in doc if filter(token)]
|
||||||
if group:
|
tokens_to_display_indices = [
|
||||||
tokens.sort(
|
index
|
||||||
key=(
|
for token in matched_tokens
|
||||||
lambda token: [attribute.render(token, ignore_colors=True)
|
for index in range(
|
||||||
for attribute in search_attributes]
|
token.i - surrounding_tokens_height,
|
||||||
)
|
token.i + surrounding_tokens_height + 1,
|
||||||
)
|
)
|
||||||
|
if index >= 0 and index < len(doc)
|
||||||
|
]
|
||||||
widths = []
|
widths = []
|
||||||
for column in display_columns:
|
for column in display_columns:
|
||||||
if len(tokens) > 0:
|
if len(tokens_to_display_indices) > 0:
|
||||||
width = max(
|
width = max(
|
||||||
len(column.render(token, ignore_colors=True)) for token in tokens
|
len(column.render(doc[i], ignore_colors=True))
|
||||||
|
for i in tokens_to_display_indices
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
width = 0
|
width = 0
|
||||||
|
@ -447,22 +517,77 @@ class Visualizer:
|
||||||
width = min(width, column.max_width)
|
width = min(width, column.max_width)
|
||||||
width = max(width, len(column.name))
|
width = max(width, len(column.name))
|
||||||
widths.append(width)
|
widths.append(width)
|
||||||
data = [
|
if group:
|
||||||
[
|
matched_tokens.sort(
|
||||||
column.render(token)
|
key=(
|
||||||
for column_index, column in enumerate(display_columns)
|
lambda token: [
|
||||||
]
|
attribute.render(token, ignore_colors=True)
|
||||||
for token in tokens
|
for attribute in search_attributes
|
||||||
]
|
]
|
||||||
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
|
)
|
||||||
header = [c.name for c in display_columns]
|
)
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
token_index_to_display = -1
|
||||||
|
for matched_token_index, matched_token in enumerate(matched_tokens):
|
||||||
|
if surrounding_tokens_height > 0:
|
||||||
|
surrounding_start_index = max(
|
||||||
|
0, matched_token.i - surrounding_tokens_height
|
||||||
|
)
|
||||||
|
if token_index_to_display + 1 == matched_token.i:
|
||||||
|
surrounding_start_index = token_index_to_display + 1
|
||||||
|
surrounding_end_index = min(
|
||||||
|
len(doc), matched_token.i + surrounding_tokens_height + 1
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
matched_token_index + 1 < len(matched_tokens)
|
||||||
|
and matched_token.i + 1 == matched_tokens[matched_token_index + 1].i
|
||||||
|
):
|
||||||
|
surrounding_end_index = matched_token.i + 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
header = None
|
surrounding_start_index = matched_token.i
|
||||||
aligns = [c.aligns for c in display_columns]
|
surrounding_end_index = surrounding_start_index + 1
|
||||||
fg_colors = [c.fg_color for c in display_columns]
|
for token_index_to_display in range(
|
||||||
bg_colors = [c.bg_color for c in display_columns]
|
surrounding_start_index, surrounding_end_index
|
||||||
|
):
|
||||||
|
if token_index_to_display == matched_token.i:
|
||||||
|
rows.append(
|
||||||
|
[
|
||||||
|
column.render(
|
||||||
|
matched_token,
|
||||||
|
right_pad_to_length=widths[column_index],
|
||||||
|
)
|
||||||
|
for column_index, column in enumerate(display_columns)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
rows.append(
|
||||||
|
[
|
||||||
|
column.render(
|
||||||
|
doc[token_index_to_display],
|
||||||
|
whole_row_fg_color=surrounding_tokens_fg_color,
|
||||||
|
whole_row_bg_color=surrounding_tokens_bg_color,
|
||||||
|
right_pad_to_length=widths[column_index],
|
||||||
|
)
|
||||||
|
for column_index, column in enumerate(display_columns)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
if (
|
||||||
|
matched_token_index + 1 < len(matched_tokens)
|
||||||
|
and token_index_to_display + 1
|
||||||
|
!= matched_tokens[matched_token_index + 1].i
|
||||||
|
):
|
||||||
|
rows.append([])
|
||||||
|
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
|
||||||
|
header = [c.name for c in display_columns]
|
||||||
|
else:
|
||||||
|
header = None
|
||||||
|
aligns = [c.aligns for c in display_columns]
|
||||||
|
fg_colors = [c.fg_color for c in display_columns]
|
||||||
|
bg_colors = [c.bg_color for c in display_columns]
|
||||||
return wasabi.table(
|
return wasabi.table(
|
||||||
data,
|
rows,
|
||||||
header=header,
|
header=header,
|
||||||
divider=True,
|
divider=True,
|
||||||
aligns=aligns,
|
aligns=aligns,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user