mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-03 03:40:24 +03:00
First working version
This commit is contained in:
parent
0ea623990e
commit
9243341f74
|
@ -1,6 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
from wasabi.util import supports_ansi
|
from wasabi.util import supports_ansi
|
||||||
from spacy.visualization import AttributeFormat, Visualizer
|
from spacy.visualization import AttributeFormat, render_dep_tree, render_table
|
||||||
from spacy.tokens import Span, Doc, Token
|
from spacy.tokens import Span, Doc, Token
|
||||||
|
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ def test_viz_dep_tree_basic(en_vocab):
|
||||||
heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
|
heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
|
||||||
deps=["dep"] * 9,
|
deps=["dep"] * 9,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
|
dep_tree = render_dep_tree(doc[0 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"<╣ ",
|
"<╣ ",
|
||||||
|
@ -57,7 +57,7 @@ def test_viz_dep_tree_basic(en_vocab):
|
||||||
"<══╣",
|
"<══╣",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
|
dep_tree = render_dep_tree(doc[0 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
" ╠>",
|
" ╠>",
|
||||||
|
@ -92,7 +92,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
|
||||||
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
|
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
|
||||||
deps=["dep"] * 12,
|
deps=["dep"] * 12,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], True)
|
dep_tree = render_dep_tree(doc[3 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"<╣ ",
|
"<╣ ",
|
||||||
|
@ -104,7 +104,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
|
||||||
"<══╣",
|
"<══╣",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], False)
|
dep_tree = render_dep_tree(doc[3 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
" ╠>",
|
" ╠>",
|
||||||
|
@ -120,7 +120,7 @@ def test_viz_dep_tree_non_initial_sent(en_vocab):
|
||||||
|
|
||||||
def test_viz_dep_tree_non_projective(horse_doc):
|
def test_viz_dep_tree_non_projective(horse_doc):
|
||||||
"""Test dependency tree display with a non-projective dependency."""
|
"""Test dependency tree display with a non-projective dependency."""
|
||||||
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], True)
|
dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"═╩═══╗",
|
"═╩═══╗",
|
||||||
|
@ -132,7 +132,7 @@ def test_viz_dep_tree_non_projective(horse_doc):
|
||||||
"═╝<╝ ║",
|
"═╝<╝ ║",
|
||||||
"<════╝",
|
"<════╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], False)
|
dep_tree = render_dep_tree(horse_doc[0 : len(horse_doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
"╔═══╩═",
|
"╔═══╩═",
|
||||||
|
@ -163,7 +163,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
|
||||||
heads=[5, 5, 0, 5, 5, None, 4, 5],
|
heads=[5, 5, 0, 5, 5, None, 4, 5],
|
||||||
deps=["dep"] * 8,
|
deps=["dep"] * 8,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
|
dep_tree = render_dep_tree(doc[0 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"═╗<╗",
|
"═╗<╗",
|
||||||
" ║<╣",
|
" ║<╣",
|
||||||
|
@ -174,7 +174,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
|
||||||
"<╝ ║",
|
"<╝ ║",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
|
dep_tree = render_dep_tree(doc[0 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"╔>╔═",
|
"╔>╔═",
|
||||||
"╠>║ ",
|
"╠>║ ",
|
||||||
|
@ -190,7 +190,7 @@ def test_viz_dep_tree_highly_nonprojective(pl_vocab):
|
||||||
def test_viz_dep_tree_input_not_span(horse_doc):
|
def test_viz_dep_tree_input_not_span(horse_doc):
|
||||||
"""Test dependency tree display behaviour when the input is not a Span."""
|
"""Test dependency tree display behaviour when the input is not a Span."""
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
Visualizer.render_dep_tree(horse_doc[1:3], True)
|
render_dep_tree(horse_doc[1:3], True)
|
||||||
|
|
||||||
|
|
||||||
def test_viz_render_native_attributes(horse_doc):
|
def test_viz_render_native_attributes(horse_doc):
|
||||||
|
@ -199,7 +199,10 @@ def test_viz_render_native_attributes(horse_doc):
|
||||||
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
|
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
|
||||||
with pytest.raises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
AttributeFormat("depp").render(horse_doc[2])
|
AttributeFormat("depp").render(horse_doc[2])
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
AttributeFormat("tree_left").render(horse_doc[2])
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
AttributeFormat("tree_right").render(horse_doc[2])
|
||||||
|
|
||||||
def test_viz_render_colors(horse_doc):
|
def test_viz_render_colors(horse_doc):
|
||||||
assert (
|
assert (
|
||||||
|
@ -265,7 +268,7 @@ def test_viz_minimal_render_table_one_sentence(
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3).strip()
|
render_table(fully_featured_doc_one_sentence, formats, spacing=3).strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
║ ╚> case 's 's PART POS Poss=yes
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
@ -295,7 +298,7 @@ def test_viz_minimal_render_table_empty_text(
|
||||||
AttributeFormat("morph"),
|
AttributeFormat("morph"),
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
|
assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
# headers
|
# headers
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -308,7 +311,7 @@ def test_viz_minimal_render_table_empty_text(
|
||||||
AttributeFormat("morph"),
|
AttributeFormat("morph"),
|
||||||
AttributeFormat("ent_type_", name="ent"),
|
AttributeFormat("ent_type_", name="ent"),
|
||||||
]
|
]
|
||||||
assert Visualizer().render(Doc(en_vocab), formats, spacing=3).strip() == ""
|
assert render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
|
|
||||||
def test_viz_minimal_render_table_spacing(
|
def test_viz_minimal_render_table_spacing(
|
||||||
|
@ -325,7 +328,7 @@ def test_viz_minimal_render_table_spacing(
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=1).strip()
|
render_table(fully_featured_doc_one_sentence, formats, spacing=1).strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
║ ╚> case 's 's PART POS Poss=yes
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
@ -356,8 +359,7 @@ def test_viz_minimal_render_table_two_sentences(
|
||||||
]
|
]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer()
|
render_table(fully_featured_doc_two_sentences, formats, spacing=3)
|
||||||
.render(fully_featured_doc_two_sentences, formats, spacing=3)
|
|
||||||
.strip()
|
.strip()
|
||||||
== """
|
== """
|
||||||
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
|
@ -401,7 +403,7 @@ def test_viz_rich_render_table_one_sentence(
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
|
render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
|
||||||
|
@ -429,7 +431,7 @@ def test_viz_rich_render_table_one_sentence(
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(fully_featured_doc_one_sentence, formats, spacing=3)
|
render_table(fully_featured_doc_one_sentence, formats, spacing=3)
|
||||||
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index \x1b[38;5;196mtext \x1b[0m lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- \x1b[38;5;196m-------\x1b[0m ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 \x1b[38;5;196mSarah \x1b[0m sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 \x1b[38;5;196m\x1b[38;5;50;48;5;12m's\x1b[0m \x1b[0m 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 \x1b[38;5;196msister \x1b[0m sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 \x1b[38;5;196mflew \x1b[0m fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 \x1b[38;5;196mto \x1b[0m to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 \x1b[38;5;196mSilicon\x1b[0m silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 \x1b[38;5;196mValley \x1b[0m valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 \x1b[38;5;196mvia \x1b[0m via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 \x1b[38;5;196mLondon \x1b[0m london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 \x1b[38;5;196m. \x1b[0m . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag \x1b[38;5;100mmorph \x1b[0m ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- \x1b[38;5;100m-------------------------\x1b[0m ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS \x1b[38;5;100mPoss=yes \x1b[0m \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN \x1b[38;5;100mNumber=sing \x1b[0m \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD \x1b[38;5;100mTense=past|VerbForm=fin \x1b[0m \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \x1b[38;5;100m \x1b[0m \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP \x1b[38;5;100mNounType=prop|Number=sing\x1b[0m GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . \x1b[38;5;100mPunctType=peri \x1b[0m \n\n"
|
||||||
|
@ -456,9 +458,9 @@ def test_viz_rich_render_table_two_sentences(
|
||||||
value_dep_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
print(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
|
print(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
|
||||||
print(
|
print(
|
||||||
repr(Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3))
|
repr(render_table(fully_featured_doc_two_sentences, formats, spacing=3))
|
||||||
)
|
)
|
||||||
target = (
|
target = (
|
||||||
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
"\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
|
@ -466,17 +468,17 @@ def test_viz_rich_render_table_two_sentences(
|
||||||
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- --------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|Verb \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|N GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|N GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(fully_featured_doc_two_sentences, formats, spacing=3)
|
render_table(fully_featured_doc_two_sentences, formats, spacing=3)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=300
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=3, length=9
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
|
@ -504,13 +506,13 @@ def test_viz_rich_render_table_start(
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
print(
|
print(
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
repr(
|
repr(
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@ -521,13 +523,13 @@ def test_viz_rich_render_table_start(
|
||||||
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
else "\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- --------------- ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|Verb \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=11
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -538,7 +540,7 @@ def test_viz_rich_render_table_start(
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -549,7 +551,7 @@ def test_viz_rich_render_table_start(
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -559,7 +561,7 @@ def test_viz_rich_render_table_start(
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -571,7 +573,7 @@ def test_viz_rich_render_table_start(
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -581,7 +583,7 @@ def test_viz_rich_render_table_start(
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -591,7 +593,7 @@ def test_viz_rich_render_table_start(
|
||||||
== ""
|
== ""
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -601,7 +603,7 @@ def test_viz_rich_render_table_start(
|
||||||
== ""
|
== ""
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -611,7 +613,7 @@ def test_viz_rich_render_table_start(
|
||||||
== ""
|
== ""
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
@ -650,25 +652,25 @@ def test_viz_rich_render_table_end(
|
||||||
)
|
)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
|
fully_featured_doc_two_sentences, formats, spacing=3, start_i=2, length=3
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences, formats, spacing=3, length=3
|
fully_featured_doc_two_sentences, formats, spacing=3, length=3
|
||||||
)
|
)
|
||||||
== target
|
== target
|
||||||
)
|
)
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render(
|
render_table(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
formats,
|
formats,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
|
|
|
@ -38,6 +38,7 @@ from .underscore import Underscore, get_ext_args
|
||||||
from ._retokenize import Retokenizer
|
from ._retokenize import Retokenizer
|
||||||
from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
|
from ._serialize import ALL_ATTRS as DOCBIN_ALL_ATTRS
|
||||||
from ..util import get_words_and_spaces
|
from ..util import get_words_and_spaces
|
||||||
|
from ..visualization import render_document
|
||||||
|
|
||||||
DEF PADDING = 5
|
DEF PADDING = 5
|
||||||
|
|
||||||
|
@ -1751,6 +1752,45 @@ cdef class Doc:
|
||||||
attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS)
|
attrs.extend(intify_attr(x) for x in DOCBIN_ALL_ATTRS)
|
||||||
return tuple(attrs)
|
return tuple(attrs)
|
||||||
|
|
||||||
|
def inspect(
|
||||||
|
self,
|
||||||
|
search_attr_name=None,
|
||||||
|
search_attr_value=None,
|
||||||
|
*,
|
||||||
|
start_i=0,
|
||||||
|
length=None
|
||||||
|
):
|
||||||
|
"""Prints a tabular representation of the document or part of the document.
|
||||||
|
If part of the document is specified using any of the four optional
|
||||||
|
parameters, the sentences surrounding that part of the document are rendered;
|
||||||
|
if none of the four optional parameters are specified, the whole document is
|
||||||
|
rendered.
|
||||||
|
|
||||||
|
search_attr_name: the name of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "lemma_",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
search_attr_value: the value of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "be",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
start_i: the token index at which to start searching, or at
|
||||||
|
whose sentence to start rendering. Default: 0.
|
||||||
|
length: the number of tokens after *start_i* at whose sentence
|
||||||
|
to stop rendering. If *None*, the rest of the
|
||||||
|
document is rendered.
|
||||||
|
"""
|
||||||
|
print(
|
||||||
|
render_document(
|
||||||
|
self,
|
||||||
|
search_attr_name,
|
||||||
|
search_attr_value,
|
||||||
|
start_i=start_i,
|
||||||
|
length=length
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
|
cdef int token_by_start(const TokenC* tokens, int length, int start_char) except -2:
|
||||||
cdef int i = token_by_char(tokens, length, start_char)
|
cdef int i = token_by_char(tokens, length, start_char)
|
||||||
|
|
|
@ -4,7 +4,6 @@ from re import search
|
||||||
from typing import Dict, List, Optional, Union, cast
|
from typing import Dict, List, Optional, Union, cast
|
||||||
import wasabi
|
import wasabi
|
||||||
from wasabi.util import supports_ansi
|
from wasabi.util import supports_ansi
|
||||||
from spacy.tokens import Span, Token, Doc
|
|
||||||
|
|
||||||
SUPPORTS_ANSI = supports_ansi()
|
SUPPORTS_ANSI = supports_ansi()
|
||||||
|
|
||||||
|
@ -57,7 +56,6 @@ class AttributeFormat:
|
||||||
max_width: Optional[int] = None,
|
max_width: Optional[int] = None,
|
||||||
fg_color: Optional[Union[str, int]] = None,
|
fg_color: Optional[Union[str, int]] = None,
|
||||||
bg_color: Optional[Union[str, int]] = None,
|
bg_color: Optional[Union[str, int]] = None,
|
||||||
permitted_vals: Optional[tuple] = None,
|
|
||||||
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
||||||
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
||||||
):
|
):
|
||||||
|
@ -78,13 +76,17 @@ class AttributeFormat:
|
||||||
self.max_width = max_width
|
self.max_width = max_width
|
||||||
self.fg_color = fg_color
|
self.fg_color = fg_color
|
||||||
self.bg_color = bg_color
|
self.bg_color = bg_color
|
||||||
self.value_dep_fg_colors = value_dep_fg_colors
|
self.value_dep_fg_colors = (
|
||||||
self.value_dep_bg_colors = value_dep_bg_colors
|
value_dep_fg_colors if value_dep_fg_colors is not None else {}
|
||||||
|
)
|
||||||
|
self.value_dep_bg_colors = (
|
||||||
|
value_dep_bg_colors if value_dep_bg_colors is not None else {}
|
||||||
|
)
|
||||||
self.printer = wasabi.Printer(no_print=True)
|
self.printer = wasabi.Printer(no_print=True)
|
||||||
|
|
||||||
def render(
|
def render(
|
||||||
self,
|
self,
|
||||||
token: Token,
|
token,
|
||||||
*,
|
*,
|
||||||
right_pad_to_len: Optional[int] = None,
|
right_pad_to_len: Optional[int] = None,
|
||||||
ignore_colors: bool = False,
|
ignore_colors: bool = False,
|
||||||
|
@ -93,7 +95,7 @@ class AttributeFormat:
|
||||||
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
|
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
|
||||||
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
||||||
"""
|
"""
|
||||||
value = get_token_value(token, self.attribute)
|
value = _get_token_value(token, self.attribute)
|
||||||
if self.max_width is not None:
|
if self.max_width is not None:
|
||||||
value = value[: self.max_width]
|
value = value[: self.max_width]
|
||||||
fg_color = None
|
fg_color = None
|
||||||
|
@ -103,315 +105,363 @@ class AttributeFormat:
|
||||||
else:
|
else:
|
||||||
right_padding = ""
|
right_padding = ""
|
||||||
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
||||||
if self.value_dep_fg_colors is not None:
|
if len(self.value_dep_fg_colors) > 0:
|
||||||
fg_color = self.value_dep_fg_colors.get(value, None)
|
fg_color = self.value_dep_fg_colors.get(value, None)
|
||||||
if self.value_dep_bg_colors is not None:
|
if len(self.value_dep_bg_colors) > 0:
|
||||||
bg_color = self.value_dep_bg_colors.get(value, None)
|
bg_color = self.value_dep_bg_colors.get(value, None)
|
||||||
if fg_color is not None or bg_color is not None:
|
if fg_color is not None or bg_color is not None:
|
||||||
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
||||||
return value + right_padding
|
return value + right_padding
|
||||||
|
|
||||||
|
|
||||||
class Visualizer:
|
def render_dep_tree(sent, root_right: bool) -> List[str]:
|
||||||
@staticmethod
|
"""
|
||||||
def render_dep_tree(sent: Span, root_right: bool) -> List[str]:
|
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
||||||
"""
|
dependency tree output for a given token has the same index within the output list of
|
||||||
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
strings as that token within the input document.
|
||||||
dependency tree output for a given token has the same index within the output list of
|
|
||||||
strings as that token within the input document.
|
|
||||||
|
|
||||||
root_right: True if the tree should be rendered with the root on the right-hand side,
|
root_right: True if the tree should be rendered with the root on the right-hand side,
|
||||||
False if the tree should be rendered with the root on the left-hand side.
|
False if the tree should be rendered with the root on the left-hand side.
|
||||||
|
|
||||||
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
|
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check sent is really a sentence
|
# Check sent is really a sentence
|
||||||
if sent.start != sent[0].sent.start or sent.end != sent[0].sent.end:
|
if sent.start != sent[0].sent.start or sent.end != sent[0].sent.end:
|
||||||
raise ValueError(f"Span is not a sentence: '{sent}'")
|
raise ValueError(f"Span is not a sentence: '{sent}'")
|
||||||
heads: List[Optional[int]] = []
|
heads: List[Optional[int]] = []
|
||||||
for token in sent:
|
for token in sent:
|
||||||
if token.dep_.lower() == "root" or token.head.i == token.i:
|
if token.dep_.lower() == "root" or token.head.i == token.i:
|
||||||
heads.append(None)
|
heads.append(None)
|
||||||
else:
|
else:
|
||||||
heads.append(token.head.i - sent.start)
|
heads.append(token.head.i - sent.start)
|
||||||
# Check there are no head references outside the sentence
|
# Check there are no head references outside the sentence
|
||||||
heads_outside_sent = [
|
heads_outside_sent = [
|
||||||
1 for h in heads if h is not None and (h < 0 or h > sent.end - sent.start)
|
1 for h in heads if h is not None and (h < 0 or h > sent.end - sent.start)
|
||||||
]
|
]
|
||||||
if len(heads_outside_sent) > 0:
|
if len(heads_outside_sent) > 0:
|
||||||
raise ValueError(f"Head reference outside sentence in sentence '{sent}'")
|
raise ValueError(f"Head reference outside sentence in sentence '{sent}'")
|
||||||
children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)]
|
children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)]
|
||||||
for child, head in enumerate(heads):
|
for child, head in enumerate(heads):
|
||||||
if head is not None:
|
if head is not None:
|
||||||
children_lists[head].append(child)
|
children_lists[head].append(child)
|
||||||
all_ind_ord_by_col: List[int] = []
|
all_ind_ord_by_col: List[int] = []
|
||||||
# start with the root column
|
# start with the root column
|
||||||
inds_in_this_col = [i for i, h in enumerate(heads) if h is None]
|
inds_in_this_col = [i for i, h in enumerate(heads) if h is None]
|
||||||
while len(inds_in_this_col) > 0:
|
while len(inds_in_this_col) > 0:
|
||||||
all_ind_ord_by_col = inds_in_this_col + all_ind_ord_by_col
|
all_ind_ord_by_col = inds_in_this_col + all_ind_ord_by_col
|
||||||
inds_in_next_col = []
|
inds_in_next_col = []
|
||||||
# The calculation order of the horizontal lengths of the children
|
# The calculation order of the horizontal lengths of the children
|
||||||
# on either given side of a head must ensure that children
|
# on either given side of a head must ensure that children
|
||||||
# closer to the head are processed first.
|
# closer to the head are processed first.
|
||||||
for ind_in_this_col in inds_in_this_col:
|
for ind_in_this_col in inds_in_this_col:
|
||||||
following_child_inds = [
|
following_child_inds = [
|
||||||
i for i in children_lists[ind_in_this_col] if i > ind_in_this_col
|
i for i in children_lists[ind_in_this_col] if i > ind_in_this_col
|
||||||
]
|
]
|
||||||
inds_in_next_col.extend(following_child_inds)
|
inds_in_next_col.extend(following_child_inds)
|
||||||
preceding_child_inds = [
|
preceding_child_inds = [
|
||||||
i for i in children_lists[ind_in_this_col] if i < ind_in_this_col
|
i for i in children_lists[ind_in_this_col] if i < ind_in_this_col
|
||||||
]
|
]
|
||||||
preceding_child_inds.reverse()
|
preceding_child_inds.reverse()
|
||||||
inds_in_next_col.extend(preceding_child_inds)
|
inds_in_next_col.extend(preceding_child_inds)
|
||||||
inds_in_this_col = inds_in_next_col
|
inds_in_this_col = inds_in_next_col
|
||||||
horiz_line_lens: List[int] = []
|
horiz_line_lens: List[int] = []
|
||||||
for i in range(sent.end - sent.start):
|
for i in range(sent.end - sent.start):
|
||||||
if heads[i] is None:
|
if heads[i] is None:
|
||||||
horiz_line_lens.append(-1)
|
horiz_line_lens.append(-1)
|
||||||
elif len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1:
|
elif len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1:
|
||||||
# governed by direct neighbour and has no children itself
|
# governed by direct neighbour and has no children itself
|
||||||
horiz_line_lens.append(1)
|
horiz_line_lens.append(1)
|
||||||
else:
|
else:
|
||||||
horiz_line_lens.append(0)
|
horiz_line_lens.append(0)
|
||||||
while 0 in horiz_line_lens:
|
while 0 in horiz_line_lens:
|
||||||
for working_token_ind in (
|
|
||||||
i for i in all_ind_ord_by_col if horiz_line_lens[i] == 0
|
|
||||||
):
|
|
||||||
# render relation between this token and its head
|
|
||||||
first_ind_in_rel = min(
|
|
||||||
working_token_ind,
|
|
||||||
cast(int, heads[working_token_ind]),
|
|
||||||
)
|
|
||||||
second_ind_in_rel = max(
|
|
||||||
working_token_ind,
|
|
||||||
cast(int, heads[working_token_ind]),
|
|
||||||
)
|
|
||||||
# If this token has children, they will already have been rendered.
|
|
||||||
# The line needs to be one character longer than the longest of the
|
|
||||||
# children's lines.
|
|
||||||
if len(children_lists[working_token_ind]) > 0:
|
|
||||||
horiz_line_lens[working_token_ind] = (
|
|
||||||
max(
|
|
||||||
[
|
|
||||||
horiz_line_lens[i]
|
|
||||||
for i in children_lists[working_token_ind]
|
|
||||||
]
|
|
||||||
)
|
|
||||||
+ 1
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
horiz_line_lens[working_token_ind] = 1
|
|
||||||
for inbetween_ind in (
|
|
||||||
i
|
|
||||||
for i in range(first_ind_in_rel + 1, second_ind_in_rel)
|
|
||||||
if horiz_line_lens[i] != 0
|
|
||||||
):
|
|
||||||
alt_ind: int
|
|
||||||
if (
|
|
||||||
inbetween_ind
|
|
||||||
in children_lists[cast(int, heads[working_token_ind])]
|
|
||||||
and inbetween_ind not in children_lists[working_token_ind]
|
|
||||||
):
|
|
||||||
alt_ind = horiz_line_lens[inbetween_ind]
|
|
||||||
else:
|
|
||||||
alt_ind = horiz_line_lens[inbetween_ind] + 1
|
|
||||||
if alt_ind > horiz_line_lens[working_token_ind]:
|
|
||||||
horiz_line_lens[working_token_ind] = alt_ind
|
|
||||||
max_horiz_line_len = max(horiz_line_lens)
|
|
||||||
char_matrix = [
|
|
||||||
[SPACE] * max_horiz_line_len * 2 for _ in range(sent.start, sent.end)
|
|
||||||
]
|
|
||||||
for working_token_ind in range(sent.end - sent.start):
|
|
||||||
head_token_ind = heads[working_token_ind]
|
|
||||||
if head_token_ind is None:
|
|
||||||
continue
|
|
||||||
first_ind_in_rel = min(working_token_ind, head_token_ind)
|
|
||||||
second_ind_in_rel = max(working_token_ind, head_token_ind)
|
|
||||||
char_horiz_line_len = 2 * horiz_line_lens[working_token_ind]
|
|
||||||
|
|
||||||
# Draw the corners of the relation
|
|
||||||
char_matrix[first_ind_in_rel][char_horiz_line_len - 1] |= (
|
|
||||||
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
|
|
||||||
)
|
|
||||||
char_matrix[second_ind_in_rel][char_horiz_line_len - 1] |= (
|
|
||||||
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
|
|
||||||
)
|
|
||||||
|
|
||||||
# Draw the horizontal line for the governing token
|
|
||||||
for working_horiz_pos in range(char_horiz_line_len - 1):
|
|
||||||
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
|
|
||||||
char_matrix[head_token_ind][
|
|
||||||
working_horiz_pos
|
|
||||||
] |= FULL_HORIZONTAL_LINE
|
|
||||||
|
|
||||||
# Draw the vertical line for the relation
|
|
||||||
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
|
|
||||||
if (
|
|
||||||
char_matrix[working_vert_pos][char_horiz_line_len - 1]
|
|
||||||
!= FULL_HORIZONTAL_LINE
|
|
||||||
):
|
|
||||||
char_matrix[working_vert_pos][
|
|
||||||
char_horiz_line_len - 1
|
|
||||||
] |= FULL_VERTICAL_LINE
|
|
||||||
for working_token_ind in (
|
for working_token_ind in (
|
||||||
i for i in range(sent.end - sent.start) if heads[i] is not None
|
i for i in all_ind_ord_by_col if horiz_line_lens[i] == 0
|
||||||
):
|
):
|
||||||
for working_horiz_pos in range(
|
# render relation between this token and its head
|
||||||
2 * horiz_line_lens[working_token_ind] - 2, -1, -1
|
first_ind_in_rel = min(
|
||||||
):
|
working_token_ind,
|
||||||
if (
|
cast(int, heads[working_token_ind]),
|
||||||
(
|
|
||||||
char_matrix[working_token_ind][working_horiz_pos]
|
|
||||||
== FULL_VERTICAL_LINE
|
|
||||||
)
|
|
||||||
and working_horiz_pos > 1
|
|
||||||
and char_matrix[working_token_ind][working_horiz_pos - 2] == SPACE
|
|
||||||
):
|
|
||||||
# Cross over the existing vertical line, which is owing to a non-projective tree
|
|
||||||
continue
|
|
||||||
if char_matrix[working_token_ind][working_horiz_pos] != SPACE:
|
|
||||||
# Draw the arrowhead to the right of what is already there
|
|
||||||
char_matrix[working_token_ind][working_horiz_pos + 1] = ARROWHEAD
|
|
||||||
break
|
|
||||||
if working_horiz_pos == 0:
|
|
||||||
# Draw the arrowhead at the boundary of the diagram
|
|
||||||
char_matrix[working_token_ind][working_horiz_pos] = ARROWHEAD
|
|
||||||
else:
|
|
||||||
# Fill in the horizontal line for the governed token
|
|
||||||
char_matrix[working_token_ind][
|
|
||||||
working_horiz_pos
|
|
||||||
] |= FULL_HORIZONTAL_LINE
|
|
||||||
if root_right:
|
|
||||||
return [
|
|
||||||
"".join(
|
|
||||||
ROOT_RIGHT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
|
||||||
for horiz_pos in range((max_horiz_line_len * 2))
|
|
||||||
)
|
|
||||||
for vert_pos in range(sent.end - sent.start)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
return [
|
|
||||||
"".join(
|
|
||||||
ROOT_LEFT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
|
||||||
for horiz_pos in range((max_horiz_line_len * 2))
|
|
||||||
)[::-1]
|
|
||||||
for vert_pos in range(sent.end - sent.start)
|
|
||||||
]
|
|
||||||
|
|
||||||
def render(
|
|
||||||
self,
|
|
||||||
doc: Doc,
|
|
||||||
cols: List[AttributeFormat],
|
|
||||||
spacing: int = 2,
|
|
||||||
start_i: int = 0,
|
|
||||||
length: Optional[int] = None,
|
|
||||||
search_attr_name: Optional[str] = None,
|
|
||||||
search_attr_value: Optional[str] = None,
|
|
||||||
) -> str:
|
|
||||||
"""Renders a document as a table.
|
|
||||||
TODO: specify a specific portion of the document to display.
|
|
||||||
|
|
||||||
cols: the attribute formats of the columns to display.
|
|
||||||
tree_right and tree_left are magic values for the
|
|
||||||
attributes that render dependency trees where the
|
|
||||||
roots are on the left or right respectively.
|
|
||||||
spacing: the number of spaces between each column in the table.
|
|
||||||
start_i: the token index at which to start searching, or at
|
|
||||||
whose sentence to start rendering. Default: 0.
|
|
||||||
length: the number of tokens after *start_i* at whose sentence
|
|
||||||
to stop rendering. If *None*, the rest of the
|
|
||||||
document is rendered.
|
|
||||||
search_attr_name: the name of an attribute to search for in order to
|
|
||||||
determine where to start rendering, e.g. "lemma_",
|
|
||||||
or *None* if no search is to be carried out. If either
|
|
||||||
of *search_attr_name* and *search_attr_value* is *None*,
|
|
||||||
the behaviour is as if both were *None*.
|
|
||||||
search_attr_value: the value of an attribute to search for in order to
|
|
||||||
determine where to start rendering, e.g. "be",
|
|
||||||
or *None* if no search is to be carried out. If either
|
|
||||||
of *search_attr_name* and *search_attr_value* is *None*,
|
|
||||||
the behaviour is as if both were *None*.
|
|
||||||
"""
|
|
||||||
return_str = ""
|
|
||||||
if search_attr_name is not None and search_attr_value is not None:
|
|
||||||
adj_start_i = get_adjusted_start_i(
|
|
||||||
doc, start_i, cols, search_attr_name, search_attr_value
|
|
||||||
)
|
)
|
||||||
else:
|
second_ind_in_rel = max(
|
||||||
adj_start_i = start_i
|
working_token_ind,
|
||||||
if adj_start_i >= len(doc):
|
cast(int, heads[working_token_ind]),
|
||||||
return return_str
|
)
|
||||||
end_i = len(doc) - 1
|
# If this token has children, they will already have been rendered.
|
||||||
if length is not None:
|
# The line needs to be one character longer than the longest of the
|
||||||
end_i = min(end_i, adj_start_i + length)
|
# children's lines.
|
||||||
elif start_i > 0 or (
|
if len(children_lists[working_token_ind]) > 0:
|
||||||
search_attr_name is not None and search_attr_value is not None
|
horiz_line_lens[working_token_ind] = (
|
||||||
):
|
max([horiz_line_lens[i] for i in children_lists[working_token_ind]])
|
||||||
end_i = adj_start_i
|
+ 1
|
||||||
adj_start_i = doc[adj_start_i].sent.start
|
)
|
||||||
end_i = doc[end_i].sent.end
|
|
||||||
for sent in doc[adj_start_i:end_i].sents:
|
|
||||||
if "tree_right" in (c.attribute for c in cols):
|
|
||||||
tree_right = self.render_dep_tree(sent, True)
|
|
||||||
if "tree_left" in (c.attribute for c in cols):
|
|
||||||
tree_left = self.render_dep_tree(sent, False)
|
|
||||||
widths = []
|
|
||||||
for col in cols:
|
|
||||||
# get the values without any color codes
|
|
||||||
if col.attribute == "tree_left":
|
|
||||||
width = len(tree_left[0]) # type: ignore
|
|
||||||
elif col.attribute == "tree_right":
|
|
||||||
width = len(tree_right[0]) # type: ignore
|
|
||||||
else:
|
|
||||||
if len(sent) > 0:
|
|
||||||
width = max(
|
|
||||||
len(col.render(token, ignore_colors=True)) for token in sent
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
width = 0
|
|
||||||
if col.max_width is not None:
|
|
||||||
width = min(width, col.max_width)
|
|
||||||
width = max(width, len(col.name))
|
|
||||||
widths.append(width)
|
|
||||||
data: List[List[str]] = []
|
|
||||||
for token_index, token in enumerate(sent):
|
|
||||||
inner_data: List[str] = []
|
|
||||||
for col_index, col in enumerate(cols):
|
|
||||||
if col.attribute == "tree_right":
|
|
||||||
inner_data.append(tree_right[token_index])
|
|
||||||
elif col.attribute == "tree_left":
|
|
||||||
inner_data.append(tree_left[token_index])
|
|
||||||
else:
|
|
||||||
inner_data.append(
|
|
||||||
col.render(token, right_pad_to_len=widths[col_index])
|
|
||||||
)
|
|
||||||
data.append(inner_data)
|
|
||||||
header: Optional[List[str]]
|
|
||||||
if len([1 for c in cols if len(c.name) > 0]) > 0:
|
|
||||||
header = [c.name for c in cols]
|
|
||||||
else:
|
else:
|
||||||
header = None
|
horiz_line_lens[working_token_ind] = 1
|
||||||
aligns = [c.aligns for c in cols]
|
for inbetween_ind in (
|
||||||
fg_colors = [c.fg_color for c in cols]
|
i
|
||||||
bg_colors = [c.bg_color for c in cols]
|
for i in range(first_ind_in_rel + 1, second_ind_in_rel)
|
||||||
return_str += (
|
if horiz_line_lens[i] != 0
|
||||||
wasabi.table(
|
):
|
||||||
data,
|
alt_ind: int
|
||||||
header=header,
|
if (
|
||||||
divider=True,
|
inbetween_ind in children_lists[cast(int, heads[working_token_ind])]
|
||||||
aligns=aligns,
|
and inbetween_ind not in children_lists[working_token_ind]
|
||||||
widths=widths,
|
):
|
||||||
fg_colors=fg_colors,
|
alt_ind = horiz_line_lens[inbetween_ind]
|
||||||
bg_colors=bg_colors,
|
else:
|
||||||
spacing=spacing,
|
alt_ind = horiz_line_lens[inbetween_ind] + 1
|
||||||
|
if alt_ind > horiz_line_lens[working_token_ind]:
|
||||||
|
horiz_line_lens[working_token_ind] = alt_ind
|
||||||
|
max_horiz_line_len = max(horiz_line_lens)
|
||||||
|
char_matrix = [
|
||||||
|
[SPACE] * max_horiz_line_len * 2 for _ in range(sent.start, sent.end)
|
||||||
|
]
|
||||||
|
for working_token_ind in range(sent.end - sent.start):
|
||||||
|
head_token_ind = heads[working_token_ind]
|
||||||
|
if head_token_ind is None:
|
||||||
|
continue
|
||||||
|
first_ind_in_rel = min(working_token_ind, head_token_ind)
|
||||||
|
second_ind_in_rel = max(working_token_ind, head_token_ind)
|
||||||
|
char_horiz_line_len = 2 * horiz_line_lens[working_token_ind]
|
||||||
|
|
||||||
|
# Draw the corners of the relation
|
||||||
|
char_matrix[first_ind_in_rel][char_horiz_line_len - 1] |= (
|
||||||
|
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
|
||||||
|
)
|
||||||
|
char_matrix[second_ind_in_rel][char_horiz_line_len - 1] |= (
|
||||||
|
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Draw the horizontal line for the governing token
|
||||||
|
for working_horiz_pos in range(char_horiz_line_len - 1):
|
||||||
|
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
|
||||||
|
char_matrix[head_token_ind][working_horiz_pos] |= FULL_HORIZONTAL_LINE
|
||||||
|
|
||||||
|
# Draw the vertical line for the relation
|
||||||
|
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
|
||||||
|
if (
|
||||||
|
char_matrix[working_vert_pos][char_horiz_line_len - 1]
|
||||||
|
!= FULL_HORIZONTAL_LINE
|
||||||
|
):
|
||||||
|
char_matrix[working_vert_pos][
|
||||||
|
char_horiz_line_len - 1
|
||||||
|
] |= FULL_VERTICAL_LINE
|
||||||
|
for working_token_ind in (
|
||||||
|
i for i in range(sent.end - sent.start) if heads[i] is not None
|
||||||
|
):
|
||||||
|
for working_horiz_pos in range(
|
||||||
|
2 * horiz_line_lens[working_token_ind] - 2, -1, -1
|
||||||
|
):
|
||||||
|
if (
|
||||||
|
(
|
||||||
|
char_matrix[working_token_ind][working_horiz_pos]
|
||||||
|
== FULL_VERTICAL_LINE
|
||||||
)
|
)
|
||||||
+ "\n"
|
and working_horiz_pos > 1
|
||||||
|
and char_matrix[working_token_ind][working_horiz_pos - 2] == SPACE
|
||||||
|
):
|
||||||
|
# Cross over the existing vertical line, which is owing to a non-projective tree
|
||||||
|
continue
|
||||||
|
if char_matrix[working_token_ind][working_horiz_pos] != SPACE:
|
||||||
|
# Draw the arrowhead to the right of what is already there
|
||||||
|
char_matrix[working_token_ind][working_horiz_pos + 1] = ARROWHEAD
|
||||||
|
break
|
||||||
|
if working_horiz_pos == 0:
|
||||||
|
# Draw the arrowhead at the boundary of the diagram
|
||||||
|
char_matrix[working_token_ind][working_horiz_pos] = ARROWHEAD
|
||||||
|
else:
|
||||||
|
# Fill in the horizontal line for the governed token
|
||||||
|
char_matrix[working_token_ind][
|
||||||
|
working_horiz_pos
|
||||||
|
] |= FULL_HORIZONTAL_LINE
|
||||||
|
if root_right:
|
||||||
|
return [
|
||||||
|
"".join(
|
||||||
|
ROOT_RIGHT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
||||||
|
for horiz_pos in range((max_horiz_line_len * 2))
|
||||||
)
|
)
|
||||||
|
for vert_pos in range(sent.end - sent.start)
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
return [
|
||||||
|
"".join(
|
||||||
|
ROOT_LEFT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
||||||
|
for horiz_pos in range((max_horiz_line_len * 2))
|
||||||
|
)[::-1]
|
||||||
|
for vert_pos in range(sent.end - sent.start)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def render_table(
|
||||||
|
doc,
|
||||||
|
cols: List[AttributeFormat],
|
||||||
|
spacing: int = 3,
|
||||||
|
search_attr_name: Optional[str] = None,
|
||||||
|
search_attr_value: Optional[str] = None,
|
||||||
|
start_i: int = 0,
|
||||||
|
length: Optional[int] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Renders a document as a table, allowing the caller to specify various
|
||||||
|
display options.
|
||||||
|
|
||||||
|
doc: the document.
|
||||||
|
cols: the attribute formats of the columns to display.
|
||||||
|
tree_right and tree_left are magic values for the
|
||||||
|
attributes that render dependency trees where the
|
||||||
|
roots are on the left or right respectively.
|
||||||
|
spacing: the number of spaces between each column in the table.
|
||||||
|
search_attr_name: the name of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "lemma_",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
search_attr_value: the value of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "be",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
start_i: the token index at which to start searching, or at
|
||||||
|
whose sentence to start rendering. Default: 0.
|
||||||
|
length: the number of tokens after *start_i* at whose sentence
|
||||||
|
to stop rendering. If *None*, the rest of the
|
||||||
|
document is rendered.
|
||||||
|
"""
|
||||||
|
return_str = ""
|
||||||
|
if (
|
||||||
|
search_attr_name is not None
|
||||||
|
and search_attr_name not in ("tree_right", "tree_left")
|
||||||
|
and search_attr_value is not None
|
||||||
|
):
|
||||||
|
adj_start_i = _get_adjusted_start_i(
|
||||||
|
doc, start_i, cols, search_attr_name, search_attr_value
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
adj_start_i = start_i
|
||||||
|
if adj_start_i >= len(doc):
|
||||||
return return_str
|
return return_str
|
||||||
|
end_i = len(doc) - 1
|
||||||
|
if length is not None:
|
||||||
|
end_i = min(end_i, adj_start_i + length)
|
||||||
|
elif start_i > 0 or (
|
||||||
|
search_attr_name is not None and search_attr_value is not None
|
||||||
|
):
|
||||||
|
end_i = adj_start_i
|
||||||
|
adj_start_i = doc[adj_start_i].sent.start
|
||||||
|
end_i = doc[end_i].sent.end
|
||||||
|
for sent in doc[adj_start_i:end_i].sents:
|
||||||
|
if "tree_right" in (c.attribute for c in cols):
|
||||||
|
tree_right = render_dep_tree(sent, True)
|
||||||
|
if "tree_left" in (c.attribute for c in cols):
|
||||||
|
tree_left = render_dep_tree(sent, False)
|
||||||
|
widths = []
|
||||||
|
for col in cols:
|
||||||
|
# get the values without any color codes
|
||||||
|
if col.attribute == "tree_left":
|
||||||
|
width = len(tree_left[0]) # type: ignore
|
||||||
|
elif col.attribute == "tree_right":
|
||||||
|
width = len(tree_right[0]) # type: ignore
|
||||||
|
else:
|
||||||
|
if len(sent) > 0:
|
||||||
|
width = max(
|
||||||
|
len(col.render(token, ignore_colors=True)) for token in sent
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
width = 0
|
||||||
|
if col.max_width is not None:
|
||||||
|
width = min(width, col.max_width)
|
||||||
|
width = max(width, len(col.name))
|
||||||
|
widths.append(width)
|
||||||
|
data: List[List[str]] = []
|
||||||
|
for token_index, token in enumerate(sent):
|
||||||
|
inner_data: List[str] = []
|
||||||
|
for col_index, col in enumerate(cols):
|
||||||
|
if col.attribute == "tree_right":
|
||||||
|
inner_data.append(tree_right[token_index])
|
||||||
|
elif col.attribute == "tree_left":
|
||||||
|
inner_data.append(tree_left[token_index])
|
||||||
|
else:
|
||||||
|
inner_data.append(
|
||||||
|
col.render(token, right_pad_to_len=widths[col_index])
|
||||||
|
)
|
||||||
|
data.append(inner_data)
|
||||||
|
header: Optional[List[str]]
|
||||||
|
if len([1 for c in cols if len(c.name) > 0]) > 0:
|
||||||
|
header = [c.name for c in cols]
|
||||||
|
else:
|
||||||
|
header = None
|
||||||
|
aligns = [c.aligns for c in cols]
|
||||||
|
fg_colors = [c.fg_color for c in cols]
|
||||||
|
bg_colors = [c.bg_color for c in cols]
|
||||||
|
return_str += (
|
||||||
|
wasabi.table(
|
||||||
|
data,
|
||||||
|
header=header,
|
||||||
|
divider=True,
|
||||||
|
aligns=aligns,
|
||||||
|
widths=widths,
|
||||||
|
fg_colors=fg_colors,
|
||||||
|
bg_colors=bg_colors,
|
||||||
|
spacing=spacing,
|
||||||
|
)
|
||||||
|
+ "\n"
|
||||||
|
)
|
||||||
|
return return_str
|
||||||
|
|
||||||
|
|
||||||
def get_token_value(token: Token, attribute: str) -> str:
|
def render_document(
|
||||||
|
doc,
|
||||||
|
search_attr_name: Optional[str] = None,
|
||||||
|
search_attr_value: Optional[str] = None,
|
||||||
|
*,
|
||||||
|
start_i: int = 0,
|
||||||
|
length: Optional[int] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Renders a document as a table using standard display options.
|
||||||
|
|
||||||
|
doc: the document.
|
||||||
|
search_attr_name: the name of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "lemma_",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
search_attr_value: the value of an attribute to search for in order to
|
||||||
|
determine where to start rendering, e.g. "be",
|
||||||
|
or *None* if no search is to be carried out. If either
|
||||||
|
of *search_attr_name* and *search_attr_value* is *None*,
|
||||||
|
the behaviour is as if both were *None*.
|
||||||
|
start_i: the token index at which to start searching, or at
|
||||||
|
whose sentence to start rendering. Default: 0.
|
||||||
|
length: the number of tokens after *start_i* at whose sentence
|
||||||
|
to stop rendering. If *None*, the rest of the
|
||||||
|
document is rendered.
|
||||||
|
"""
|
||||||
|
cols = [
|
||||||
|
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=4),
|
||||||
|
AttributeFormat("dep_", name="dep_"),
|
||||||
|
AttributeFormat("ent_type_", name="ent_type_"),
|
||||||
|
AttributeFormat("i", name="index", aligns="r"),
|
||||||
|
AttributeFormat("text", name="text", max_width=20),
|
||||||
|
AttributeFormat("lemma_", name="lemma_", max_width=20),
|
||||||
|
AttributeFormat("pos_", name="pos_"),
|
||||||
|
AttributeFormat("tag_", name="tag_"),
|
||||||
|
AttributeFormat("morph", name="morph_", max_width=60),
|
||||||
|
]
|
||||||
|
if search_attr_name is not None and search_attr_value is not None:
|
||||||
|
for col in cols:
|
||||||
|
if col.attribute == search_attr_name or col.name == search_attr_name:
|
||||||
|
col.value_dep_fg_colors[search_attr_value] = 1
|
||||||
|
return render_table(
|
||||||
|
doc=doc,
|
||||||
|
cols=cols,
|
||||||
|
spacing=3,
|
||||||
|
search_attr_name=search_attr_name,
|
||||||
|
search_attr_value=search_attr_value,
|
||||||
|
start_i=start_i,
|
||||||
|
length=length,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_token_value(token, attribute: str) -> str:
|
||||||
"""
|
"""
|
||||||
Get value *token.x.y.z*.
|
Get value *token.x.y.z*.
|
||||||
|
|
||||||
|
@ -422,11 +472,11 @@ def get_token_value(token: Token, attribute: str) -> str:
|
||||||
parts = attribute.split(".")
|
parts = attribute.split(".")
|
||||||
for part in parts[:-1]:
|
for part in parts[:-1]:
|
||||||
obj = getattr(obj, part)
|
obj = getattr(obj, part)
|
||||||
return str(getattr(obj, parts[-1]))
|
return str(getattr(obj, parts[-1])).strip()
|
||||||
|
|
||||||
|
|
||||||
def get_adjusted_start_i(
|
def _get_adjusted_start_i(
|
||||||
doc: Doc,
|
doc,
|
||||||
start_i: int,
|
start_i: int,
|
||||||
cols: List[AttributeFormat],
|
cols: List[AttributeFormat],
|
||||||
search_attr_name: str,
|
search_attr_name: str,
|
||||||
|
@ -447,7 +497,7 @@ def get_adjusted_start_i(
|
||||||
for col in cols:
|
for col in cols:
|
||||||
if col.name == search_attr_name or col.attribute == search_attr_name:
|
if col.name == search_attr_name or col.attribute == search_attr_name:
|
||||||
for token in doc[start_i:]:
|
for token in doc[start_i:]:
|
||||||
if get_token_value(token, col.attribute) == search_attr_value:
|
if _get_token_value(token, col.attribute) == search_attr_value:
|
||||||
return token.i
|
return token.i
|
||||||
else:
|
else:
|
||||||
return len(doc)
|
return len(doc)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user