mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-03 03:40:24 +03:00
Improvements based on PR feedback
This commit is contained in:
parent
c8fd577ba4
commit
3557c613b1
|
@ -292,6 +292,11 @@ def pl_tokenizer():
|
||||||
return get_lang_class("pl")().tokenizer
|
return get_lang_class("pl")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="session")
|
||||||
|
def pl_vocab():
|
||||||
|
return get_lang_class("pl")().vocab
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def pt_tokenizer():
|
def pt_tokenizer():
|
||||||
return get_lang_class("pt")().tokenizer
|
return get_lang_class("pt")().tokenizer
|
||||||
|
|
|
@ -7,7 +7,27 @@ from spacy.tokens import Span, Doc, Token
|
||||||
SUPPORTS_ANSI = supports_ansi()
|
SUPPORTS_ANSI = supports_ansi()
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_dependency_tree_basic(en_vocab):
|
@pytest.fixture
|
||||||
|
def horse_doc(en_vocab):
|
||||||
|
return Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_viz_dep_tree_basic(en_vocab):
|
||||||
"""Test basic dependency tree display."""
|
"""Test basic dependency tree display."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -25,7 +45,7 @@ def test_visualization_dependency_tree_basic(en_vocab):
|
||||||
heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
|
heads=[2, 2, 3, None, 6, 6, 3, 3, 3],
|
||||||
deps=["dep"] * 9,
|
deps=["dep"] * 9,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], True)
|
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"<╣ ",
|
"<╣ ",
|
||||||
|
@ -37,7 +57,7 @@ def test_visualization_dependency_tree_basic(en_vocab):
|
||||||
"<══╣",
|
"<══╣",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], False)
|
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
" ╠>",
|
" ╠>",
|
||||||
|
@ -51,7 +71,7 @@ def test_visualization_dependency_tree_basic(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_dependency_tree_non_initial_sentence(en_vocab):
|
def test_viz_dep_tree_non_initial_sent(en_vocab):
|
||||||
"""Test basic dependency tree display."""
|
"""Test basic dependency tree display."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -72,7 +92,7 @@ def test_visualization_dependency_tree_non_initial_sentence(en_vocab):
|
||||||
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
|
heads=[0, None, 0, 5, 5, 6, None, 9, 9, 6, 6, 6],
|
||||||
deps=["dep"] * 12,
|
deps=["dep"] * 12,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[3 : len(doc)], True)
|
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"<╣ ",
|
"<╣ ",
|
||||||
|
@ -84,7 +104,7 @@ def test_visualization_dependency_tree_non_initial_sentence(en_vocab):
|
||||||
"<══╣",
|
"<══╣",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[3 : len(doc)], False)
|
dep_tree = Visualizer.render_dep_tree(doc[3 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
" ╠>",
|
" ╠>",
|
||||||
|
@ -98,25 +118,9 @@ def test_visualization_dependency_tree_non_initial_sentence(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_dependency_tree_non_projective(en_vocab):
|
def test_viz_dep_tree_non_projective(horse_doc):
|
||||||
"""Test dependency tree display with a non-projective dependency."""
|
"""Test dependency tree display with a non-projective dependency."""
|
||||||
doc = Doc(
|
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], True)
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], True)
|
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"═╩═══╗",
|
"═╩═══╗",
|
||||||
|
@ -128,7 +132,7 @@ def test_visualization_dependency_tree_non_projective(en_vocab):
|
||||||
"═╝<╝ ║",
|
"═╝<╝ ║",
|
||||||
"<════╝",
|
"<════╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], False)
|
dep_tree = Visualizer.render_dep_tree(horse_doc[0 : len(horse_doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
" ╔>",
|
" ╔>",
|
||||||
"╔═══╩═",
|
"╔═══╩═",
|
||||||
|
@ -142,33 +146,10 @@ def test_visualization_dependency_tree_non_projective(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_dependency_tree_input_not_span(en_vocab):
|
def test_viz_dep_tree_highly_nonprojective(pl_vocab):
|
||||||
"""Test dependency tree display behaviour when the input is not a Span."""
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
with pytest.raises(AssertionError):
|
|
||||||
Visualizer.render_dependency_tree(doc[1:3], True)
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_dependency_tree_highly_nonprojective(en_vocab):
|
|
||||||
"""Test a highly non-projective tree (colloquial Polish)."""
|
"""Test a highly non-projective tree (colloquial Polish)."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
pl_vocab,
|
||||||
words=[
|
words=[
|
||||||
"Owczarki",
|
"Owczarki",
|
||||||
"przecież",
|
"przecież",
|
||||||
|
@ -182,7 +163,7 @@ def test_visualization_dependency_tree_highly_nonprojective(en_vocab):
|
||||||
heads=[5, 5, 0, 5, 5, None, 4, 5],
|
heads=[5, 5, 0, 5, 5, None, 4, 5],
|
||||||
deps=["dep"] * 8,
|
deps=["dep"] * 8,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], True)
|
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], True)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"═╗<╗",
|
"═╗<╗",
|
||||||
" ║<╣",
|
" ║<╣",
|
||||||
|
@ -193,7 +174,7 @@ def test_visualization_dependency_tree_highly_nonprojective(en_vocab):
|
||||||
"<╝ ║",
|
"<╝ ║",
|
||||||
"<══╝",
|
"<══╝",
|
||||||
]
|
]
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], False)
|
dep_tree = Visualizer.render_dep_tree(doc[0 : len(doc)], False)
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"╔>╔═",
|
"╔>╔═",
|
||||||
"╠>║ ",
|
"╠>║ ",
|
||||||
|
@ -206,334 +187,99 @@ def test_visualization_dependency_tree_highly_nonprojective(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_native_attribute_int(en_vocab):
|
def test_viz_dep_tree_input_not_span(horse_doc):
|
||||||
doc = Doc(
|
"""Test dependency tree display behaviour when the input is not a Span."""
|
||||||
en_vocab,
|
with pytest.raises(ValueError):
|
||||||
words=[
|
Visualizer.render_dep_tree(horse_doc[1:3], True)
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
assert AttributeFormat("head.i").render(doc[2]) == "3"
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_native_attribute_int_with_right_padding(en_vocab):
|
def test_viz_render_native_attributes(horse_doc):
|
||||||
doc = Doc(
|
assert AttributeFormat("head.i").render(horse_doc[2]) == "3"
|
||||||
en_vocab,
|
assert AttributeFormat("head.i").render(horse_doc[2], right_pad_to_len=3) == "3 "
|
||||||
words=[
|
assert AttributeFormat("dep_").render(horse_doc[2]) == "dep"
|
||||||
"I",
|
with pytest.raises(AttributeError):
|
||||||
"saw",
|
AttributeFormat("depp").render(horse_doc[2])
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
assert AttributeFormat("head.i").render(doc[2], right_pad_to_length=3) == "3 "
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_native_attribute_str(en_vocab):
|
def test_viz_render_colors(horse_doc):
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert AttributeFormat("dep_").render(doc[2]) == "dep"
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_colors(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"dep_",
|
"dep_",
|
||||||
value_dependent_fg_colors={"dep": 2},
|
value_dep_fg_colors={"dep": 2},
|
||||||
value_dependent_bg_colors={"dep": 11},
|
value_dep_bg_colors={"dep": 11},
|
||||||
).render(doc[2])
|
).render(horse_doc[2])
|
||||||
== "\x1b[38;5;2;48;5;11mdep\x1b[0m"
|
== "\x1b[38;5;2;48;5;11mdep\x1b[0m"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "dep"
|
else "dep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# whole row
|
||||||
def test_visualization_render_whole_row_colors(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"dep_",
|
"dep_",
|
||||||
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
||||||
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "dep"
|
else "dep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# whole row with value dependent colors
|
||||||
def test_visualization_render_whole_row_colors_with_value_dependent_colors(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"dep_",
|
"dep_",
|
||||||
value_dependent_fg_colors={"dep": 2},
|
value_dep_fg_colors={"dep": 2},
|
||||||
value_dependent_bg_colors={"dep": 11},
|
value_dep_bg_colors={"dep": 11},
|
||||||
).render(doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
).render(horse_doc[2], whole_row_fg_color=8, whole_row_bg_color=9)
|
||||||
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
== "\x1b[38;5;8;48;5;9mdep\x1b[0m"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "dep"
|
else "dep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# foreground only
|
||||||
def test_visualization_render_colors_only_fg(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"dep_",
|
"dep_",
|
||||||
value_dependent_fg_colors={"dep": 2},
|
value_dep_fg_colors={"dep": 2},
|
||||||
).render(doc[2])
|
).render(horse_doc[2])
|
||||||
== "\x1b[38;5;2mdep\x1b[0m"
|
== "\x1b[38;5;2mdep\x1b[0m"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "dep"
|
else "dep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# background only
|
||||||
def test_visualization_render_entity_colors_only_bg(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"dep_",
|
"dep_",
|
||||||
value_dependent_bg_colors={"dep": 11},
|
value_dep_bg_colors={"dep": 11},
|
||||||
).render(doc[2])
|
).render(horse_doc[2])
|
||||||
== "\x1b[48;5;11mdep\x1b[0m"
|
== "\x1b[48;5;11mdep\x1b[0m"
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "dep"
|
else "dep"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_native_attribute_missing(en_vocab):
|
def test_viz_render_custom_attributes(horse_doc):
|
||||||
doc = Doc(
|
Token.set_extension("test", default="tested1", force=True)
|
||||||
en_vocab,
|
assert AttributeFormat("_.test").render(horse_doc[2]) == "tested1"
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
with pytest.raises(AttributeError):
|
|
||||||
AttributeFormat("depp").render(doc[2])
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_custom_attribute_str(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
Token.set_extension("test", default="tested", force=True)
|
|
||||||
assert AttributeFormat("_.test").render(doc[2]) == "tested"
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_nested_custom_attribute_str(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
|
|
||||||
class Test:
|
class Test:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.inner_test = "tested"
|
self.inner_test = "tested2"
|
||||||
|
|
||||||
Token.set_extension("test", default=Test(), force=True)
|
Token.set_extension("test", default=Test(), force=True)
|
||||||
assert AttributeFormat("_.test.inner_test").render(doc[2]) == "tested"
|
assert AttributeFormat("_.test.inner_test").render(horse_doc[2]) == "tested2"
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_custom_attribute_missing(en_vocab):
|
|
||||||
doc = Doc(
|
|
||||||
en_vocab,
|
|
||||||
words=[
|
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
with pytest.raises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
AttributeFormat("._depp").render(doc[2])
|
AttributeFormat("._depp").render(horse_doc[2])
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_permitted_values(en_vocab):
|
def test_viz_render_permitted_values(horse_doc):
|
||||||
doc = Doc(
|
attribute_format = AttributeFormat("head.i", permitted_vals=(3, 7))
|
||||||
en_vocab,
|
vals = ["", "", "3", "", "", "7", "7", "3", ""]
|
||||||
words=[
|
assert [attribute_format.render(token) for token in horse_doc] == vals
|
||||||
"I",
|
|
||||||
"saw",
|
|
||||||
"a",
|
|
||||||
"horse",
|
|
||||||
"yesterday",
|
|
||||||
"that",
|
|
||||||
"was",
|
|
||||||
"injured",
|
|
||||||
".",
|
|
||||||
],
|
|
||||||
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
|
||||||
deps=["dep"] * 9,
|
|
||||||
)
|
|
||||||
attribute_format = AttributeFormat("head.i", permitted_values=(3, 7))
|
|
||||||
assert [attribute_format.render(token) for token in doc] == [
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"3",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"7",
|
|
||||||
"7",
|
|
||||||
"3",
|
|
||||||
"",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_one_sentence(
|
def test_viz_minimal_render_table_one_sentence(
|
||||||
fully_featured_doc_one_sentence,
|
fully_featured_doc_one_sentence,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -565,9 +311,10 @@ def test_visualization_minimal_render_table_one_sentence(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_empty_text_no_headers(
|
def test_viz_minimal_render_table_empty_text(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
):
|
):
|
||||||
|
# no headers
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat("tree_left"),
|
AttributeFormat("tree_left"),
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
|
@ -580,10 +327,7 @@ def test_visualization_minimal_render_table_empty_text_no_headers(
|
||||||
]
|
]
|
||||||
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
|
# headers
|
||||||
def test_visualization_minimal_render_table_empty_text_headers(
|
|
||||||
en_vocab,
|
|
||||||
):
|
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat("tree_left", name="tree"),
|
AttributeFormat("tree_left", name="tree"),
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
|
@ -597,14 +341,14 @@ def test_visualization_minimal_render_table_empty_text_headers(
|
||||||
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
assert Visualizer().render_table(Doc(en_vocab), formats, spacing=3).strip() == ""
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_permitted_values(
|
def test_viz_minimal_render_table_permitted_values(
|
||||||
fully_featured_doc_one_sentence,
|
fully_featured_doc_one_sentence,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat("tree_left"),
|
AttributeFormat("tree_left"),
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
AttributeFormat("text"),
|
AttributeFormat("text"),
|
||||||
AttributeFormat("lemma_", permitted_values=("fly", "to")),
|
AttributeFormat("lemma_", permitted_vals=("fly", "to")),
|
||||||
AttributeFormat("pos_"),
|
AttributeFormat("pos_"),
|
||||||
AttributeFormat("tag_"),
|
AttributeFormat("tag_"),
|
||||||
AttributeFormat("morph"),
|
AttributeFormat("morph"),
|
||||||
|
@ -629,7 +373,7 @@ def test_visualization_minimal_render_table_permitted_values(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_spacing(
|
def test_viz_minimal_render_table_spacing(
|
||||||
fully_featured_doc_one_sentence,
|
fully_featured_doc_one_sentence,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -661,7 +405,7 @@ def test_visualization_spacing(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_table_two_sentences(
|
def test_viz_minimal_render_table_two_sentences(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -700,7 +444,7 @@ def test_visualization_minimal_render_table_two_sentences(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_rich_render_table_one_sentence(
|
def test_viz_rich_render_table_one_sentence(
|
||||||
fully_featured_doc_one_sentence,
|
fully_featured_doc_one_sentence,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -716,8 +460,8 @@ def test_visualization_rich_render_table_one_sentence(
|
||||||
"ent_type_",
|
"ent_type_",
|
||||||
name="ent",
|
name="ent",
|
||||||
fg_color=196,
|
fg_color=196,
|
||||||
value_dependent_fg_colors={"PERSON": 50},
|
value_dep_fg_colors={"PERSON": 50},
|
||||||
value_dependent_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
|
@ -727,10 +471,7 @@ def test_visualization_rich_render_table_one_sentence(
|
||||||
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
|
else "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma pos tag morph ent \n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- ----- --- --------------- ------\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah PROPN NNP NounType=prop|N PERSON\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's PART POS Poss=yes \n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister NOUN NN Number=sing \n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly VERB VBD Tense=past|Verb \n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to ADP IN \n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via ADP IN \n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london PROPN NNP NounType=prop|N GPE \n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . PUNCT . PunctType=peri \n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# trigger value for value_dep shorter than maximum length in column
|
||||||
def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than_maximum(
|
|
||||||
fully_featured_doc_one_sentence,
|
|
||||||
):
|
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
|
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
|
||||||
AttributeFormat("dep_", name="dep", fg_color=2),
|
AttributeFormat("dep_", name="dep", fg_color=2),
|
||||||
|
@ -739,8 +480,8 @@ def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than
|
||||||
"text",
|
"text",
|
||||||
name="text",
|
name="text",
|
||||||
fg_color=196,
|
fg_color=196,
|
||||||
value_dependent_fg_colors={"'s": 50},
|
value_dep_fg_colors={"'s": 50},
|
||||||
value_dependent_bg_colors={"'s": 12},
|
value_dep_bg_colors={"'s": 12},
|
||||||
),
|
),
|
||||||
AttributeFormat("lemma_", name="lemma"),
|
AttributeFormat("lemma_", name="lemma"),
|
||||||
AttributeFormat("pos_", name="pos", fg_color=100),
|
AttributeFormat("pos_", name="pos", fg_color=100),
|
||||||
|
@ -759,7 +500,7 @@ def test_visualization_rich_render_table_one_sentence_trigger_value_shorter_than
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_rich_render_table_two_sentences(
|
def test_viz_rich_render_table_two_sentences(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
|
@ -775,8 +516,8 @@ def test_visualization_rich_render_table_two_sentences(
|
||||||
"ent_type_",
|
"ent_type_",
|
||||||
name="ent",
|
name="ent",
|
||||||
fg_color=196,
|
fg_color=196,
|
||||||
value_dependent_fg_colors={"PERSON": 50},
|
value_dep_fg_colors={"PERSON": 50},
|
||||||
value_dependent_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
|
@ -787,25 +528,25 @@ def test_visualization_rich_render_table_two_sentences(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_text_with_text_format(
|
def test_viz_text_with_text_format(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"ent_type_",
|
"ent_type_",
|
||||||
fg_color=50,
|
fg_color=50,
|
||||||
value_dependent_fg_colors={"PERSON": 50},
|
value_dep_fg_colors={"PERSON": 50},
|
||||||
value_dependent_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"text",
|
"text",
|
||||||
fg_color=50,
|
fg_color=50,
|
||||||
bg_color=53,
|
bg_color=53,
|
||||||
value_dependent_fg_colors={"PERSON": 50},
|
value_dep_fg_colors={"PERSON": 50},
|
||||||
value_dependent_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"lemma_", fg_color=50, bg_color=53, permitted_values=("fly", "valley")
|
"lemma_", fg_color=50, bg_color=53, permitted_vals=("fly", "valley")
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
|
@ -816,16 +557,16 @@ def test_visualization_text_with_text_format(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_text_without_text_format(
|
def test_viz_render_text_without_text_format(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
formats = [
|
formats = [
|
||||||
AttributeFormat(
|
AttributeFormat(
|
||||||
"ent_type_",
|
"ent_type_",
|
||||||
value_dependent_fg_colors={"PERSON": 50},
|
value_dep_fg_colors={"PERSON": 50},
|
||||||
value_dependent_bg_colors={"PERSON": 12},
|
value_dep_bg_colors={"PERSON": 12},
|
||||||
),
|
),
|
||||||
AttributeFormat("lemma_", permitted_values=("fly", "valley")),
|
AttributeFormat("lemma_", permitted_vals=("fly", "valley")),
|
||||||
]
|
]
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
|
Visualizer().render_text(fully_featured_doc_two_sentences, formats)
|
||||||
|
@ -835,9 +576,10 @@ def test_visualization_render_text_without_text_format(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
|
def test_viz_render_instances_two_sentences(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
):
|
):
|
||||||
|
# search on entity type
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
AttributeFormat("text"),
|
AttributeFormat("text"),
|
||||||
|
@ -852,8 +594,8 @@ def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -863,10 +605,7 @@ def test_visualization_minimal_render_instances_two_sentences_type_non_grouping(
|
||||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
|
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# search on entity type with permitted values
|
||||||
def test_visualization_minimal_render_instances_two_sentences_value_non_grouping(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
AttributeFormat("text"),
|
AttributeFormat("text"),
|
||||||
|
@ -877,13 +616,13 @@ def test_visualization_minimal_render_instances_two_sentences_value_non_grouping
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
|
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -893,10 +632,7 @@ def test_visualization_minimal_render_instances_two_sentences_value_non_grouping
|
||||||
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
== "\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# include surrounding tokens
|
||||||
def test_visualization_minimal_render_instances_two_sentences_value_surrounding_sentences_non_grouping(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
AttributeFormat("text"),
|
AttributeFormat("text"),
|
||||||
|
@ -907,13 +643,13 @@ def test_visualization_minimal_render_instances_two_sentences_value_surrounding_
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSON"])]
|
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSON"])]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=2,
|
surrounding_tokens_height=2,
|
||||||
|
@ -926,9 +662,7 @@ def test_visualization_minimal_render_instances_two_sentences_value_surrounding_
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_instances_two_sentences_missing_value_non_grouping(
|
# missing permitted value
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_", name="dep"),
|
AttributeFormat("dep_", name="dep"),
|
||||||
AttributeFormat("text", name="text"),
|
AttributeFormat("text", name="text"),
|
||||||
|
@ -939,13 +673,13 @@ def test_visualization_render_instances_two_sentences_missing_value_non_grouping
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
|
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -955,10 +689,7 @@ def test_visualization_render_instances_two_sentences_missing_value_non_grouping
|
||||||
== "\ndep text \n--- ---- \n"
|
== "\ndep text \n--- ---- \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# missing permitted value, include surrounding tokens
|
||||||
def test_visualization_render_instances_two_sentences_missing_value_surrounding_sentences_non_grouping(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_", name="dep"),
|
AttributeFormat("dep_", name="dep"),
|
||||||
AttributeFormat("text", name="text"),
|
AttributeFormat("text", name="text"),
|
||||||
|
@ -969,13 +700,13 @@ def test_visualization_render_instances_two_sentences_missing_value_surrounding_
|
||||||
AttributeFormat("ent_type_"),
|
AttributeFormat("ent_type_"),
|
||||||
]
|
]
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_", permitted_values=["PERSONN"])]
|
search_attributes = [AttributeFormat("ent_type_", permitted_vals=["PERSONN"])]
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=False,
|
group=False,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -985,10 +716,7 @@ def test_visualization_render_instances_two_sentences_missing_value_surrounding_
|
||||||
== "\ndep text \n--- ---- \n"
|
== "\ndep text \n--- ---- \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# with grouping
|
||||||
def test_visualization_render_instances_two_sentences_type_grouping(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_"),
|
AttributeFormat("dep_"),
|
||||||
AttributeFormat("text"),
|
AttributeFormat("text"),
|
||||||
|
@ -1004,8 +732,8 @@ def test_visualization_render_instances_two_sentences_type_grouping(
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=True,
|
group=True,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -1015,10 +743,7 @@ def test_visualization_render_instances_two_sentences_type_grouping(
|
||||||
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
== "\npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# with grouping and colors
|
||||||
def test_visualization_render_instances_two_sentences_type_grouping_colors(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
display_columns = [
|
||||||
AttributeFormat("dep_", fg_color=20),
|
AttributeFormat("dep_", fg_color=20),
|
||||||
AttributeFormat("text", bg_color=30),
|
AttributeFormat("text", bg_color=30),
|
||||||
|
@ -1034,8 +759,8 @@ def test_visualization_render_instances_two_sentences_type_grouping_colors(
|
||||||
assert (
|
assert (
|
||||||
Visualizer().render_instances(
|
Visualizer().render_instances(
|
||||||
fully_featured_doc_two_sentences,
|
fully_featured_doc_two_sentences,
|
||||||
search_attributes=search_attributes,
|
search_attrs=search_attributes,
|
||||||
display_columns=display_columns,
|
display_cols=display_columns,
|
||||||
group=True,
|
group=True,
|
||||||
spacing=3,
|
spacing=3,
|
||||||
surrounding_tokens_height=0,
|
surrounding_tokens_height=0,
|
||||||
|
@ -1046,35 +771,3 @@ def test_visualization_render_instances_two_sentences_type_grouping_colors(
|
||||||
if SUPPORTS_ANSI
|
if SUPPORTS_ANSI
|
||||||
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
else "npobj London london PROPN NNP NounType=prop|Number=sing GPE \n\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_visualization_render_instances_two_sentences_type_grouping_colors_with_surrounding_sentences(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
):
|
|
||||||
display_columns = [
|
|
||||||
AttributeFormat("dep_", fg_color=20),
|
|
||||||
AttributeFormat("text", bg_color=30),
|
|
||||||
AttributeFormat("lemma_"),
|
|
||||||
AttributeFormat("pos_"),
|
|
||||||
AttributeFormat("tag_"),
|
|
||||||
AttributeFormat("morph"),
|
|
||||||
AttributeFormat("ent_type_"),
|
|
||||||
]
|
|
||||||
|
|
||||||
search_attributes = [AttributeFormat("ent_type_"), AttributeFormat("lemma_")]
|
|
||||||
|
|
||||||
assert (
|
|
||||||
Visualizer().render_instances(
|
|
||||||
fully_featured_doc_two_sentences,
|
|
||||||
search_attributes=search_attributes,
|
|
||||||
display_columns=display_columns,
|
|
||||||
group=True,
|
|
||||||
spacing=3,
|
|
||||||
surrounding_tokens_height=3,
|
|
||||||
surrounding_tokens_fg_color=11,
|
|
||||||
surrounding_tokens_bg_color=None,
|
|
||||||
)
|
|
||||||
== "\n\x1b[38;5;20m\x1b[38;5;11mcompound\x1b[0m\x1b[0m \x1b[48;5;30m\x1b[38;5;11mSilicon\x1b[0m\x1b[0m \x1b[38;5;11msilicon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mValley\x1b[0m \x1b[0m \x1b[38;5;11mvalley\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mLondon \x1b[0m london PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mShe\x1b[0m \x1b[0m \x1b[38;5;11mshe\x1b[0m \x1b[38;5;11mPRON\x1b[0m \x1b[38;5;11mPRP\x1b[0m \x1b[38;5;11mCase=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mloved\x1b[0m \x1b[0m \x1b[38;5;11mlove\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=Past|VerbForm=Fin\x1b[0m \n\n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mto\x1b[0m \x1b[0m \x1b[38;5;11mto\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20mcompound\x1b[0m \x1b[48;5;30mSilicon\x1b[0m silicon PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20mpobj \x1b[0m \x1b[48;5;30mValley \x1b[0m valley PROPN NNP NounType=prop|Number=sing GPE \n\x1b[38;5;20m\x1b[38;5;11mprep\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mvia\x1b[0m \x1b[0m \x1b[38;5;11mvia\x1b[0m \x1b[38;5;11mADP\x1b[0m \x1b[38;5;11mIN\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpobj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mLondon\x1b[0m \x1b[0m \x1b[38;5;11mlondon\x1b[0m \x1b[38;5;11mPROPN\x1b[0m \x1b[38;5;11mNNP\x1b[0m \x1b[38;5;11mNounType=prop|Number=sing\x1b[0m \x1b[38;5;11mGPE\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mpunct\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m.\x1b[0m \x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPUNCT\x1b[0m \x1b[38;5;11m.\x1b[0m \x1b[38;5;11mPunctType=peri\x1b[0m \n\n\x1b[38;5;20mposs \x1b[0m \x1b[48;5;30mSarah \x1b[0m sarah PROPN NNP NounType=prop|Number=sing PERSON\n\x1b[38;5;20m\x1b[38;5;11mcase\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11m's\x1b[0m \x1b[0m \x1b[38;5;11m's\x1b[0m \x1b[38;5;11mPART\x1b[0m \x1b[38;5;11mPOS\x1b[0m \x1b[38;5;11mPoss=yes\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mnsubj\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11msister\x1b[0m \x1b[0m \x1b[38;5;11msister\x1b[0m \x1b[38;5;11mNOUN\x1b[0m \x1b[38;5;11mNN\x1b[0m \x1b[38;5;11mNumber=sing\x1b[0m \n\x1b[38;5;20m\x1b[38;5;11mROOT\x1b[0m \x1b[0m \x1b[48;5;30m\x1b[38;5;11mflew\x1b[0m \x1b[0m \x1b[38;5;11mfly\x1b[0m \x1b[38;5;11mVERB\x1b[0m \x1b[38;5;11mVBD\x1b[0m \x1b[38;5;11mTense=past|VerbForm=fin\x1b[0m \n"
|
|
||||||
if SUPPORTS_ANSI
|
|
||||||
else "\ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \nnsubj She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \nROOT loved love VERB VBD Tense=Past|VerbForm=Fin \n\nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \nprep to to ADP IN \ncompound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \npobj Valley valley PROPN NNP NounType=prop|Number=sing GPE \nprep via via ADP IN \npobj London london PROPN NNP NounType=prop|Number=sing GPE \npunct . . PUNCT . PunctType=peri \n\nposs Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\ncase 's 's PART POS Poss=yes \nnsubj sister sister NOUN NN Number=sing \nROOT flew fly VERB VBD Tense=past|VerbForm=fin \n"
|
|
||||||
)
|
|
||||||
|
|
|
@ -52,25 +52,25 @@ class AttributeFormat:
|
||||||
name: str = "",
|
name: str = "",
|
||||||
aligns: str = "l",
|
aligns: str = "l",
|
||||||
max_width: Optional[int] = None,
|
max_width: Optional[int] = None,
|
||||||
fg_color: Union[str, int, None] = None,
|
fg_color: Optional[Union[str, int]] = None,
|
||||||
bg_color: Union[str, int, None] = None,
|
bg_color: Optional[Union[str, int]] = None,
|
||||||
permitted_values: Optional[tuple] = None,
|
permitted_vals: Optional[tuple] = None,
|
||||||
value_dependent_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
value_dep_fg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
||||||
value_dependent_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma
|
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma
|
||||||
name: the name to display e.g. in column headers
|
name: the name to display e.g. in column headers
|
||||||
aligns: where appropriate the column alignment 'l' (left,
|
aligns: where appropriate the column alignment 'l' (left,
|
||||||
default), 'r' (right) or 'c' (center).
|
default), 'r' (right) or 'c' (center).
|
||||||
max_width: a maximum width to which values of the attribute should be truncated.
|
max_width: a maximum width to which values of the attribute should be truncated.
|
||||||
fg_color: the foreground color that should be used to display instances of the attribute
|
fg_color: the foreground color that should be used to display instances of the attribute
|
||||||
bg_color: the background color that should be used to display instances of the attribute
|
bg_color: the background color that should be used to display instances of the attribute
|
||||||
permitted_values: a tuple of values of the attribute that should be displayed. If
|
permitted_vals: a tuple of values of the attribute that should be displayed. If
|
||||||
permitted_values is not None and a value of the attribute is not
|
permitted_values is not None and a value of the attribute is not
|
||||||
in permitted_values, the empty string is rendered instead of the value.
|
in permitted_values, the empty string is rendered instead of the value.
|
||||||
value_dependent_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
|
value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
|
||||||
value_dependent_bg_colors: a dictionary from values to background colors that should be used to display those values.
|
value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values.
|
||||||
"""
|
"""
|
||||||
self.attribute = attribute
|
self.attribute = attribute
|
||||||
self.name = name
|
self.name = name
|
||||||
|
@ -78,57 +78,58 @@ class AttributeFormat:
|
||||||
self.max_width = max_width
|
self.max_width = max_width
|
||||||
self.fg_color = fg_color
|
self.fg_color = fg_color
|
||||||
self.bg_color = bg_color
|
self.bg_color = bg_color
|
||||||
self.permitted_values = permitted_values
|
self.permitted_vals = permitted_vals
|
||||||
self.value_dependent_fg_colors = value_dependent_fg_colors
|
self.value_dep_fg_colors = value_dep_fg_colors
|
||||||
self.value_dependent_bg_colors = value_dependent_bg_colors
|
self.value_dep_bg_colors = value_dep_bg_colors
|
||||||
self.printer = wasabi.Printer(no_print=True)
|
self.printer = wasabi.Printer(no_print=True)
|
||||||
|
|
||||||
def render(
|
def render(
|
||||||
self,
|
self,
|
||||||
token: Token,
|
token: Token,
|
||||||
*,
|
*,
|
||||||
right_pad_to_length: Optional[int] = None,
|
right_pad_to_len: Optional[int] = None,
|
||||||
ignore_colors: bool = False,
|
ignore_colors: bool = False,
|
||||||
render_all_colors_within_values: bool = False,
|
render_all_colors_in_vals: bool = False,
|
||||||
whole_row_fg_color: Union[int, str, None] = None,
|
whole_row_fg_color: Union[int, str, None] = None,
|
||||||
whole_row_bg_color: Union[int, str, None] = None,
|
whole_row_bg_color: Union[int, str, None] = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
|
||||||
render_all_colors_within_values: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi.
|
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
||||||
This argument is set to True when rendering a text to signal that colors should be rendered here.
|
render_all_colors_in_vals: when rendering a table, self.fg_color and self.bg_color are rendered in Wasabi.
|
||||||
whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors.
|
This argument is set to True when rendering a text to signal that colors should be rendered here.
|
||||||
whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_bg_colors.
|
whole_row_fg_color: a foreground color used for the whole row. This takes precedence over value_dependent_fg_colors.
|
||||||
|
whole_row_bg_color: a background color used for the whole row. This takes precedence over value_dependent_bg_colors.
|
||||||
"""
|
"""
|
||||||
obj = token
|
obj = token
|
||||||
parts = self.attribute.split(".")
|
parts = self.attribute.split(".")
|
||||||
for part in parts[:-1]:
|
for part in parts[:-1]:
|
||||||
obj = getattr(obj, part)
|
obj = getattr(obj, part)
|
||||||
value = str(getattr(obj, parts[-1]))
|
value = str(getattr(obj, parts[-1]))
|
||||||
if self.permitted_values is not None and value not in (
|
if self.permitted_vals is not None and value not in (
|
||||||
str(v) for v in self.permitted_values
|
str(v) for v in self.permitted_vals
|
||||||
):
|
):
|
||||||
return ""
|
return ""
|
||||||
if self.max_width is not None:
|
if self.max_width is not None:
|
||||||
value = value[: self.max_width]
|
value = value[: self.max_width]
|
||||||
fg_color = None
|
fg_color = None
|
||||||
bg_color = None
|
bg_color = None
|
||||||
if right_pad_to_length is not None:
|
if right_pad_to_len is not None:
|
||||||
right_padding = " " * (right_pad_to_length - len(value))
|
right_padding = " " * (right_pad_to_len - len(value))
|
||||||
else:
|
else:
|
||||||
right_padding = ""
|
right_padding = ""
|
||||||
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
||||||
if whole_row_fg_color is not None:
|
if whole_row_fg_color is not None:
|
||||||
fg_color = whole_row_fg_color
|
fg_color = whole_row_fg_color
|
||||||
elif self.value_dependent_fg_colors is not None:
|
elif self.value_dep_fg_colors is not None:
|
||||||
fg_color = self.value_dependent_fg_colors.get(value, None)
|
fg_color = self.value_dep_fg_colors.get(value, None)
|
||||||
if fg_color is None and render_all_colors_within_values:
|
if fg_color is None and render_all_colors_in_vals:
|
||||||
fg_color = self.fg_color
|
fg_color = self.fg_color
|
||||||
if self.value_dependent_bg_colors is not None:
|
if self.value_dep_bg_colors is not None:
|
||||||
bg_color = self.value_dependent_bg_colors.get(value, None)
|
bg_color = self.value_dep_bg_colors.get(value, None)
|
||||||
if whole_row_bg_color is not None:
|
if whole_row_bg_color is not None:
|
||||||
bg_color = whole_row_bg_color
|
bg_color = whole_row_bg_color
|
||||||
elif bg_color is None and render_all_colors_within_values:
|
elif bg_color is None and render_all_colors_in_vals:
|
||||||
bg_color = self.bg_color
|
bg_color = self.bg_color
|
||||||
if fg_color is not None or bg_color is not None:
|
if fg_color is not None or bg_color is not None:
|
||||||
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
||||||
|
@ -137,7 +138,7 @@ class AttributeFormat:
|
||||||
|
|
||||||
class Visualizer:
|
class Visualizer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def render_dependency_tree(sent: Span, root_right: bool) -> List[str]:
|
def render_dep_tree(sent: Span, root_right: bool) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
||||||
dependency tree output for a given token has the same index within the output list of
|
dependency tree output for a given token has the same index within the output list of
|
||||||
|
@ -150,276 +151,234 @@ class Visualizer:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check sent is really a sentence
|
# Check sent is really a sentence
|
||||||
assert sent.start == sent[0].sent.start
|
if sent.start != sent[0].sent.start or sent.end != sent[0].sent.end:
|
||||||
assert sent.end == sent[0].sent.end
|
raise ValueError(f"Span is not a sentence: '{sent}'")
|
||||||
heads: List[Optional[int]] = [
|
heads: List[Optional[int]] = []
|
||||||
None
|
for token in sent:
|
||||||
if token.dep_.lower() == "root" or token.head.i == token.i
|
if token.dep_.lower() == "root" or token.head.i == token.i:
|
||||||
else token.head.i - sent.start
|
heads.append(None)
|
||||||
for token in sent
|
else:
|
||||||
]
|
heads.append(token.head.i - sent.start)
|
||||||
# Check there are no head references outside the sentence
|
# Check there are no head references outside the sentence
|
||||||
assert (
|
heads_outside_sent = [
|
||||||
len(
|
1 for h in heads if h is not None and (h < 0 or h > sent.end - sent.start)
|
||||||
[
|
]
|
||||||
head
|
if len(heads_outside_sent) > 0:
|
||||||
for head in heads
|
raise ValueError(f"Head reference outside sentence in sentence '{sent}'")
|
||||||
if head is not None and (head < 0 or head > sent.end - sent.start)
|
|
||||||
]
|
|
||||||
)
|
|
||||||
== 0
|
|
||||||
)
|
|
||||||
children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)]
|
children_lists: List[List[int]] = [[] for _ in range(sent.end - sent.start)]
|
||||||
for child, head in enumerate(heads):
|
for child, head in enumerate(heads):
|
||||||
if head is not None:
|
if head is not None:
|
||||||
children_lists[head].append(child)
|
children_lists[head].append(child)
|
||||||
all_indices_ordered_by_column: List[int] = []
|
all_ind_ord_by_col: List[int] = []
|
||||||
# start with the root column
|
# start with the root column
|
||||||
indices_in_current_column = [i for i, h in enumerate(heads) if h is None]
|
inds_in_this_col = [i for i, h in enumerate(heads) if h is None]
|
||||||
while len(indices_in_current_column) > 0:
|
while len(inds_in_this_col) > 0:
|
||||||
assert (
|
all_ind_ord_by_col = inds_in_this_col + all_ind_ord_by_col
|
||||||
len(
|
inds_in_next_col = []
|
||||||
[
|
|
||||||
i
|
|
||||||
for i in indices_in_current_column
|
|
||||||
if i in all_indices_ordered_by_column
|
|
||||||
]
|
|
||||||
)
|
|
||||||
== 0
|
|
||||||
)
|
|
||||||
all_indices_ordered_by_column = (
|
|
||||||
indices_in_current_column + all_indices_ordered_by_column
|
|
||||||
)
|
|
||||||
indices_in_next_column = []
|
|
||||||
# The calculation order of the horizontal lengths of the children
|
# The calculation order of the horizontal lengths of the children
|
||||||
# on either given side of a head must ensure that children
|
# on either given side of a head must ensure that children
|
||||||
# closer to the head are processed first.
|
# closer to the head are processed first.
|
||||||
for index_in_current_column in indices_in_current_column:
|
for ind_in_this_col in inds_in_this_col:
|
||||||
following_children_indices = [
|
following_child_inds = [
|
||||||
i
|
i for i in children_lists[ind_in_this_col] if i > ind_in_this_col
|
||||||
for i in children_lists[index_in_current_column]
|
|
||||||
if i > index_in_current_column
|
|
||||||
]
|
]
|
||||||
indices_in_next_column.extend(following_children_indices)
|
inds_in_next_col.extend(following_child_inds)
|
||||||
preceding_children_indices = [
|
preceding_child_inds = [
|
||||||
i
|
i for i in children_lists[ind_in_this_col] if i < ind_in_this_col
|
||||||
for i in children_lists[index_in_current_column]
|
|
||||||
if i < index_in_current_column
|
|
||||||
]
|
]
|
||||||
preceding_children_indices.reverse()
|
preceding_child_inds.reverse()
|
||||||
indices_in_next_column.extend(preceding_children_indices)
|
inds_in_next_col.extend(preceding_child_inds)
|
||||||
indices_in_current_column = indices_in_next_column
|
inds_in_this_col = inds_in_next_col
|
||||||
horizontal_line_lengths = [
|
horiz_line_lens: List[int] = []
|
||||||
-1 if heads[i] is None else 1
|
for i in range(sent.end - sent.start):
|
||||||
# length == 1: governed by direct neighbour and has no children itself
|
if heads[i] is None:
|
||||||
if len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1 else 0
|
horiz_line_lens.append(-1)
|
||||||
for i in range(sent.end - sent.start)
|
elif len(children_lists[i]) == 0 and abs(cast(int, heads[i]) - i) == 1:
|
||||||
]
|
# governed by direct neighbour and has no children itself
|
||||||
while 0 in horizontal_line_lengths:
|
horiz_line_lens.append(1)
|
||||||
for working_token_index in (
|
else:
|
||||||
i
|
horiz_line_lens.append(0)
|
||||||
for i in all_indices_ordered_by_column
|
while 0 in horiz_line_lens:
|
||||||
if horizontal_line_lengths[i] == 0
|
for working_token_ind in (
|
||||||
|
i for i in all_ind_ord_by_col if horiz_line_lens[i] == 0
|
||||||
):
|
):
|
||||||
# render relation between this token and its head
|
# render relation between this token and its head
|
||||||
first_index_in_relation = min(
|
first_ind_in_rel = min(
|
||||||
working_token_index,
|
working_token_ind,
|
||||||
cast(int, heads[working_token_index]),
|
cast(int, heads[working_token_ind]),
|
||||||
)
|
)
|
||||||
second_index_in_relation = max(
|
second_ind_in_rel = max(
|
||||||
working_token_index,
|
working_token_ind,
|
||||||
cast(int, heads[working_token_index]),
|
cast(int, heads[working_token_ind]),
|
||||||
)
|
)
|
||||||
# If this token has children, they will already have been rendered.
|
# If this token has children, they will already have been rendered.
|
||||||
# The line needs to be one character longer than the longest of the
|
# The line needs to be one character longer than the longest of the
|
||||||
# children's lines.
|
# children's lines.
|
||||||
if len(children_lists[working_token_index]) > 0:
|
if len(children_lists[working_token_ind]) > 0:
|
||||||
horizontal_line_lengths[working_token_index] = (
|
horiz_line_lens[working_token_ind] = (
|
||||||
max(
|
max(
|
||||||
[
|
[
|
||||||
horizontal_line_lengths[i]
|
horiz_line_lens[i]
|
||||||
for i in children_lists[working_token_index]
|
for i in children_lists[working_token_ind]
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
+ 1
|
+ 1
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
horizontal_line_lengths[working_token_index] = 1
|
horiz_line_lens[working_token_ind] = 1
|
||||||
for inbetween_index in (
|
for inbetween_ind in (
|
||||||
i
|
i
|
||||||
for i in range(
|
for i in range(first_ind_in_rel + 1, second_ind_in_rel)
|
||||||
first_index_in_relation + 1, second_index_in_relation
|
if horiz_line_lens[i] != 0
|
||||||
)
|
|
||||||
if horizontal_line_lengths[i] != 0
|
|
||||||
):
|
):
|
||||||
horizontal_line_lengths[working_token_index] = max(
|
alt_ind: int
|
||||||
horizontal_line_lengths[working_token_index],
|
if (
|
||||||
horizontal_line_lengths[inbetween_index]
|
inbetween_ind
|
||||||
if inbetween_index
|
in children_lists[cast(int, heads[working_token_ind])]
|
||||||
in children_lists[cast(int, heads[working_token_index])]
|
and inbetween_ind not in children_lists[working_token_ind]
|
||||||
and inbetween_index not in children_lists[working_token_index]
|
):
|
||||||
else horizontal_line_lengths[inbetween_index] + 1,
|
alt_ind = horiz_line_lens[inbetween_ind]
|
||||||
)
|
else:
|
||||||
max_horizontal_line_length = max(horizontal_line_lengths)
|
alt_ind = horiz_line_lens[inbetween_ind] + 1
|
||||||
|
if alt_ind > horiz_line_lens[working_token_ind]:
|
||||||
|
horiz_line_lens[working_token_ind] = alt_ind
|
||||||
|
max_horiz_line_len = max(horiz_line_lens)
|
||||||
char_matrix = [
|
char_matrix = [
|
||||||
[SPACE] * max_horizontal_line_length * 2
|
[SPACE] * max_horiz_line_len * 2 for _ in range(sent.start, sent.end)
|
||||||
for _ in range(sent.start, sent.end)
|
|
||||||
]
|
]
|
||||||
for working_token_index in range(sent.end - sent.start):
|
for working_token_ind in range(sent.end - sent.start):
|
||||||
head_token_index = heads[working_token_index]
|
head_token_ind = heads[working_token_ind]
|
||||||
if head_token_index is None:
|
if head_token_ind is None:
|
||||||
continue
|
continue
|
||||||
first_index_in_relation = min(working_token_index, head_token_index)
|
first_ind_in_rel = min(working_token_ind, head_token_ind)
|
||||||
second_index_in_relation = max(working_token_index, head_token_index)
|
second_ind_in_rel = max(working_token_ind, head_token_ind)
|
||||||
char_horizontal_line_length = (
|
char_horiz_line_len = 2 * horiz_line_lens[working_token_ind]
|
||||||
2 * horizontal_line_lengths[working_token_index]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Draw the corners of the relation
|
# Draw the corners of the relation
|
||||||
char_matrix[first_index_in_relation][char_horizontal_line_length - 1] |= (
|
char_matrix[first_ind_in_rel][char_horiz_line_len - 1] |= (
|
||||||
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
|
HALF_HORIZONTAL_LINE + LOWER_HALF_VERTICAL_LINE
|
||||||
)
|
)
|
||||||
char_matrix[second_index_in_relation][char_horizontal_line_length - 1] |= (
|
char_matrix[second_ind_in_rel][char_horiz_line_len - 1] |= (
|
||||||
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
|
HALF_HORIZONTAL_LINE + UPPER_HALF_VERTICAL_LINE
|
||||||
)
|
)
|
||||||
|
|
||||||
# Draw the horizontal line for the governing token
|
# Draw the horizontal line for the governing token
|
||||||
for working_horizontal_position in range(char_horizontal_line_length - 1):
|
for working_horiz_pos in range(char_horiz_line_len - 1):
|
||||||
if (
|
if char_matrix[head_token_ind][working_horiz_pos] != FULL_VERTICAL_LINE:
|
||||||
char_matrix[head_token_index][working_horizontal_position]
|
char_matrix[head_token_ind][
|
||||||
!= FULL_VERTICAL_LINE
|
working_horiz_pos
|
||||||
):
|
|
||||||
char_matrix[head_token_index][
|
|
||||||
working_horizontal_position
|
|
||||||
] |= FULL_HORIZONTAL_LINE
|
] |= FULL_HORIZONTAL_LINE
|
||||||
|
|
||||||
# Draw the vertical line for the relation
|
# Draw the vertical line for the relation
|
||||||
for working_vertical_position in range(
|
for working_vert_pos in range(first_ind_in_rel + 1, second_ind_in_rel):
|
||||||
first_index_in_relation + 1, second_index_in_relation
|
|
||||||
):
|
|
||||||
if (
|
if (
|
||||||
char_matrix[working_vertical_position][
|
char_matrix[working_vert_pos][char_horiz_line_len - 1]
|
||||||
char_horizontal_line_length - 1
|
|
||||||
]
|
|
||||||
!= FULL_HORIZONTAL_LINE
|
!= FULL_HORIZONTAL_LINE
|
||||||
):
|
):
|
||||||
char_matrix[working_vertical_position][
|
char_matrix[working_vert_pos][
|
||||||
char_horizontal_line_length - 1
|
char_horiz_line_len - 1
|
||||||
] |= FULL_VERTICAL_LINE
|
] |= FULL_VERTICAL_LINE
|
||||||
for working_token_index in (
|
for working_token_ind in (
|
||||||
i for i in range(sent.end - sent.start) if heads[i] is not None
|
i for i in range(sent.end - sent.start) if heads[i] is not None
|
||||||
):
|
):
|
||||||
for working_horizontal_position in range(
|
for working_horiz_pos in range(
|
||||||
2 * horizontal_line_lengths[working_token_index] - 2, -1, -1
|
2 * horiz_line_lens[working_token_ind] - 2, -1, -1
|
||||||
):
|
):
|
||||||
if (
|
if (
|
||||||
(
|
(
|
||||||
char_matrix[working_token_index][working_horizontal_position]
|
char_matrix[working_token_ind][working_horiz_pos]
|
||||||
== FULL_VERTICAL_LINE
|
== FULL_VERTICAL_LINE
|
||||||
)
|
)
|
||||||
and working_horizontal_position > 1
|
and working_horiz_pos > 1
|
||||||
and char_matrix[working_token_index][
|
and char_matrix[working_token_ind][working_horiz_pos - 2] == SPACE
|
||||||
working_horizontal_position - 2
|
|
||||||
]
|
|
||||||
== SPACE
|
|
||||||
):
|
):
|
||||||
# Cross over the existing vertical line, which is owing to a non-projective tree
|
# Cross over the existing vertical line, which is owing to a non-projective tree
|
||||||
continue
|
continue
|
||||||
if (
|
if char_matrix[working_token_ind][working_horiz_pos] != SPACE:
|
||||||
char_matrix[working_token_index][working_horizontal_position]
|
|
||||||
!= SPACE
|
|
||||||
):
|
|
||||||
# Draw the arrowhead to the right of what is already there
|
# Draw the arrowhead to the right of what is already there
|
||||||
char_matrix[working_token_index][
|
char_matrix[working_token_ind][working_horiz_pos + 1] = ARROWHEAD
|
||||||
working_horizontal_position + 1
|
|
||||||
] = ARROWHEAD
|
|
||||||
break
|
break
|
||||||
if working_horizontal_position == 0:
|
if working_horiz_pos == 0:
|
||||||
# Draw the arrowhead at the boundary of the diagram
|
# Draw the arrowhead at the boundary of the diagram
|
||||||
char_matrix[working_token_index][
|
char_matrix[working_token_ind][working_horiz_pos] = ARROWHEAD
|
||||||
working_horizontal_position
|
|
||||||
] = ARROWHEAD
|
|
||||||
else:
|
else:
|
||||||
# Fill in the horizontal line for the governed token
|
# Fill in the horizontal line for the governed token
|
||||||
char_matrix[working_token_index][
|
char_matrix[working_token_ind][
|
||||||
working_horizontal_position
|
working_horiz_pos
|
||||||
] |= FULL_HORIZONTAL_LINE
|
] |= FULL_HORIZONTAL_LINE
|
||||||
if root_right:
|
if root_right:
|
||||||
return [
|
return [
|
||||||
"".join(
|
"".join(
|
||||||
ROOT_RIGHT_CHARS[
|
ROOT_RIGHT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
||||||
char_matrix[vertical_position][horizontal_position]
|
for horiz_pos in range((max_horiz_line_len * 2))
|
||||||
]
|
|
||||||
for horizontal_position in range((max_horizontal_line_length * 2))
|
|
||||||
)
|
)
|
||||||
for vertical_position in range(sent.end - sent.start)
|
for vert_pos in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return [
|
return [
|
||||||
"".join(
|
"".join(
|
||||||
ROOT_LEFT_CHARS[char_matrix[vertical_position][horizontal_position]]
|
ROOT_LEFT_CHARS[char_matrix[vert_pos][horiz_pos]]
|
||||||
for horizontal_position in range((max_horizontal_line_length * 2))
|
for horiz_pos in range((max_horiz_line_len * 2))
|
||||||
)[::-1]
|
)[::-1]
|
||||||
for vertical_position in range(sent.end - sent.start)
|
for vert_pos in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
|
|
||||||
def render_table(
|
def render_table(self, doc: Doc, cols: List[AttributeFormat], spacing: int) -> str:
|
||||||
self, doc: Doc, columns: List[AttributeFormat], spacing: int
|
|
||||||
) -> str:
|
|
||||||
"""Renders a document as a table.
|
"""Renders a document as a table.
|
||||||
TODO: specify a specific portion of the document to display.
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
columns: the attribute formats of the columns to display.
|
cols: the attribute formats of the columns to display.
|
||||||
tree_right and tree_left are magic values for the
|
tree_right and tree_left are magic values for the
|
||||||
attributes that render dependency trees where the
|
attributes that render dependency trees where the
|
||||||
roots are on the left or right respectively.
|
roots are on the left or right respectively.
|
||||||
spacing: the number of spaces between each column in the table.
|
spacing: the number of spaces between each column in the table.
|
||||||
"""
|
"""
|
||||||
return_string = ""
|
return_str = ""
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
if "tree_right" in (c.attribute for c in columns):
|
if "tree_right" in (c.attribute for c in cols):
|
||||||
tree_right = self.render_dependency_tree(sent, True)
|
tree_right = self.render_dep_tree(sent, True)
|
||||||
if "tree_left" in (c.attribute for c in columns):
|
if "tree_left" in (c.attribute for c in cols):
|
||||||
tree_left = self.render_dependency_tree(sent, False)
|
tree_left = self.render_dep_tree(sent, False)
|
||||||
widths = []
|
widths = []
|
||||||
for column in columns:
|
for col in cols:
|
||||||
# get the values without any color codes
|
# get the values without any color codes
|
||||||
if column.attribute == "tree_left":
|
if col.attribute == "tree_left":
|
||||||
width = len(tree_left[0]) # type: ignore
|
width = len(tree_left[0]) # type: ignore
|
||||||
elif column.attribute == "tree_right":
|
elif col.attribute == "tree_right":
|
||||||
width = len(tree_right[0]) # type: ignore
|
width = len(tree_right[0]) # type: ignore
|
||||||
else:
|
else:
|
||||||
if len(sent) > 0:
|
if len(sent) > 0:
|
||||||
width = max(
|
width = max(
|
||||||
len(column.render(token, ignore_colors=True))
|
len(col.render(token, ignore_colors=True)) for token in sent
|
||||||
for token in sent
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
width = 0
|
width = 0
|
||||||
if column.max_width is not None:
|
if col.max_width is not None:
|
||||||
width = min(width, column.max_width)
|
width = min(width, col.max_width)
|
||||||
width = max(width, len(column.name))
|
width = max(width, len(col.name))
|
||||||
widths.append(width)
|
widths.append(width)
|
||||||
data = [
|
data: List[List[str]] = []
|
||||||
[
|
for token_index, token in enumerate(sent):
|
||||||
tree_right[token_index] # type: ignore
|
inner_data: List[str] = []
|
||||||
if column.attribute == "tree_right"
|
for col_index, col in enumerate(cols):
|
||||||
else tree_left[token_index] # type: ignore
|
if col.attribute == "tree_right":
|
||||||
if column.attribute == "tree_left"
|
inner_data.append(tree_right[token_index])
|
||||||
else column.render(token, right_pad_to_length=widths[column_index])
|
elif col.attribute == "tree_left":
|
||||||
for column_index, column in enumerate(columns)
|
inner_data.append(tree_left[token_index])
|
||||||
]
|
else:
|
||||||
for token_index, token in enumerate(sent)
|
inner_data.append(
|
||||||
]
|
col.render(token, right_pad_to_len=widths[col_index])
|
||||||
|
)
|
||||||
|
data.append(inner_data)
|
||||||
header: Optional[List[str]]
|
header: Optional[List[str]]
|
||||||
if len([1 for c in columns if len(c.name) > 0]) > 0:
|
if len([1 for c in cols if len(c.name) > 0]) > 0:
|
||||||
header = [c.name for c in columns]
|
header = [c.name for c in cols]
|
||||||
else:
|
else:
|
||||||
header = None
|
header = None
|
||||||
aligns = [c.aligns for c in columns]
|
aligns = [c.aligns for c in cols]
|
||||||
fg_colors = [c.fg_color for c in columns]
|
fg_colors = [c.fg_color for c in cols]
|
||||||
bg_colors = [c.bg_color for c in columns]
|
bg_colors = [c.bg_color for c in cols]
|
||||||
return_string += (
|
return_str += (
|
||||||
wasabi.table(
|
wasabi.table(
|
||||||
data,
|
data,
|
||||||
header=header,
|
header=header,
|
||||||
|
@ -432,41 +391,38 @@ class Visualizer:
|
||||||
)
|
)
|
||||||
+ "\n"
|
+ "\n"
|
||||||
)
|
)
|
||||||
return return_string
|
return return_str
|
||||||
|
|
||||||
def render_text(self, doc: Doc, attributes: List[AttributeFormat]) -> str:
|
def render_text(self, doc: Doc, attrs: List[AttributeFormat]) -> str:
|
||||||
"""Renders a text interspersed with attribute labels.
|
"""Renders a text interspersed with attribute labels.
|
||||||
TODO: specify a specific portion of the document to display.
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return_string = ""
|
return_str = ""
|
||||||
text_attributes = [a for a in attributes if a.attribute == "text"]
|
text_attrs = [a for a in attrs if a.attribute == "text"]
|
||||||
text_attribute = (
|
text_attr = text_attrs[0] if len(text_attrs) > 0 else AttributeFormat("text")
|
||||||
text_attributes[0] if len(text_attributes) > 0 else AttributeFormat("text")
|
|
||||||
)
|
|
||||||
for token in doc:
|
for token in doc:
|
||||||
this_token_strings = [""]
|
this_token_strs = [""]
|
||||||
for attribute in (a for a in attributes if a.attribute != "text"):
|
for attr in (a for a in attrs if a.attribute != "text"):
|
||||||
attribute_text = attribute.render(
|
attr_text = attr.render(token, render_all_colors_in_vals=True)
|
||||||
token, render_all_colors_within_values=True
|
if attr_text is not None and len(attr_text) > 0:
|
||||||
|
this_token_strs.append(" " + attr_text)
|
||||||
|
if len(this_token_strs) == 1:
|
||||||
|
this_token_strs[0] = token.text
|
||||||
|
else:
|
||||||
|
this_token_strs[0] = text_attr.render(
|
||||||
|
token, render_all_colors_in_vals=True
|
||||||
)
|
)
|
||||||
if attribute_text is not None and len(attribute_text) > 0:
|
this_token_strs.append(token.whitespace_)
|
||||||
this_token_strings.append(" " + attribute_text)
|
return_str += "".join(this_token_strs)
|
||||||
this_token_strings[0] = (
|
return return_str
|
||||||
token.text
|
|
||||||
if len(this_token_strings) == 1
|
|
||||||
else text_attribute.render(token, render_all_colors_within_values=True)
|
|
||||||
)
|
|
||||||
this_token_strings.append(token.whitespace_)
|
|
||||||
return_string += "".join(this_token_strings)
|
|
||||||
return return_string
|
|
||||||
|
|
||||||
def render_instances(
|
def render_instances(
|
||||||
self,
|
self,
|
||||||
doc: Doc,
|
doc: Doc,
|
||||||
*,
|
*,
|
||||||
search_attributes: List[AttributeFormat],
|
search_attrs: List[AttributeFormat],
|
||||||
display_columns: List[AttributeFormat],
|
display_cols: List[AttributeFormat],
|
||||||
group: bool,
|
group: bool,
|
||||||
spacing: int,
|
spacing: int,
|
||||||
surrounding_tokens_height: int,
|
surrounding_tokens_height: int,
|
||||||
|
@ -476,8 +432,8 @@ class Visualizer:
|
||||||
"""Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'.
|
"""Shows all tokens in a document with specific attribute(s), e.g. entity labels, or attribute value(s), e.g. 'GPE'.
|
||||||
TODO: specify a specific portion of the document to display.
|
TODO: specify a specific portion of the document to display.
|
||||||
|
|
||||||
search_attributes: the attribute(s) or attribute value(s) that cause a row to be displayed for a token.
|
search_attrs: the attribute(s) or attribute value(s) that cause a row to be displayed for a token.
|
||||||
display_columns: the attributes that should be displayed in each row.
|
display_cols: the attributes that should be displayed in each row.
|
||||||
group: True if the rows should be ordered by the search attribute values,
|
group: True if the rows should be ordered by the search attribute values,
|
||||||
False if they should retain their in-document order.
|
False if they should retain their in-document order.
|
||||||
spacing: the number of spaces between each column.
|
spacing: the number of spaces between each column.
|
||||||
|
@ -491,105 +447,102 @@ class Visualizer:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def filter(token: Token) -> bool:
|
def filter(token: Token) -> bool:
|
||||||
for attribute in search_attributes:
|
for attr in search_attrs:
|
||||||
value = attribute.render(token, ignore_colors=True)
|
value = attr.render(token, ignore_colors=True)
|
||||||
if len(value) == 0:
|
if len(value) == 0:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
matched_tokens = [token for token in doc if filter(token)]
|
matched_tokens = [token for token in doc if filter(token)]
|
||||||
tokens_to_display_indices = [
|
tokens_to_display_inds: List[int] = []
|
||||||
index
|
for token in matched_tokens:
|
||||||
for token in matched_tokens
|
for ind in range(
|
||||||
for index in range(
|
|
||||||
token.i - surrounding_tokens_height,
|
token.i - surrounding_tokens_height,
|
||||||
token.i + surrounding_tokens_height + 1,
|
token.i + surrounding_tokens_height + 1,
|
||||||
)
|
):
|
||||||
if index >= 0 and index < len(doc)
|
if ind >= 0 and ind < len(doc):
|
||||||
]
|
tokens_to_display_inds.append(ind)
|
||||||
widths = []
|
widths = []
|
||||||
for column in display_columns:
|
for col in display_cols:
|
||||||
if len(tokens_to_display_indices) > 0:
|
if len(tokens_to_display_inds) > 0:
|
||||||
width = max(
|
width = max(
|
||||||
len(column.render(doc[i], ignore_colors=True))
|
len(col.render(doc[i], ignore_colors=True))
|
||||||
for i in tokens_to_display_indices
|
for i in tokens_to_display_inds
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
width = 0
|
width = 0
|
||||||
if column.max_width is not None:
|
if col.max_width is not None:
|
||||||
width = min(width, column.max_width)
|
width = min(width, col.max_width)
|
||||||
width = max(width, len(column.name))
|
width = max(width, len(col.name))
|
||||||
widths.append(width)
|
widths.append(width)
|
||||||
if group:
|
if group:
|
||||||
matched_tokens.sort(
|
matched_tokens.sort(
|
||||||
key=(
|
key=(
|
||||||
lambda token: [
|
lambda token: [
|
||||||
attribute.render(token, ignore_colors=True)
|
attr.render(token, ignore_colors=True) for attr in search_attrs
|
||||||
for attribute in search_attributes
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
token_index_to_display = -1
|
token_ind_to_display = -1
|
||||||
for matched_token_index, matched_token in enumerate(matched_tokens):
|
for matched_token_ind, matched_token in enumerate(matched_tokens):
|
||||||
if surrounding_tokens_height > 0:
|
if surrounding_tokens_height > 0:
|
||||||
surrounding_start_index = max(
|
surrounding_start_ind = max(
|
||||||
0, matched_token.i - surrounding_tokens_height
|
0, matched_token.i - surrounding_tokens_height
|
||||||
)
|
)
|
||||||
if token_index_to_display + 1 == matched_token.i:
|
if token_ind_to_display + 1 == matched_token.i:
|
||||||
surrounding_start_index = token_index_to_display + 1
|
surrounding_start_ind = token_ind_to_display + 1
|
||||||
surrounding_end_index = min(
|
surrounding_end_ind = min(
|
||||||
len(doc), matched_token.i + surrounding_tokens_height + 1
|
len(doc), matched_token.i + surrounding_tokens_height + 1
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
matched_token_index + 1 < len(matched_tokens)
|
matched_token_ind + 1 < len(matched_tokens)
|
||||||
and matched_token.i + 1 == matched_tokens[matched_token_index + 1].i
|
and matched_token.i + 1 == matched_tokens[matched_token_ind + 1].i
|
||||||
):
|
):
|
||||||
surrounding_end_index = matched_token.i + 1
|
surrounding_end_ind = matched_token.i + 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
surrounding_start_index = matched_token.i
|
surrounding_start_ind = matched_token.i
|
||||||
surrounding_end_index = surrounding_start_index + 1
|
surrounding_end_ind = surrounding_start_ind + 1
|
||||||
for token_index_to_display in range(
|
for token_ind_to_display in range(
|
||||||
surrounding_start_index, surrounding_end_index
|
surrounding_start_ind, surrounding_end_ind
|
||||||
):
|
):
|
||||||
if token_index_to_display == matched_token.i:
|
if token_ind_to_display == matched_token.i:
|
||||||
rows.append(
|
rows.append(
|
||||||
[
|
[
|
||||||
column.render(
|
col.render(
|
||||||
matched_token,
|
matched_token,
|
||||||
right_pad_to_length=widths[column_index],
|
right_pad_to_len=widths[col_ind],
|
||||||
)
|
)
|
||||||
for column_index, column in enumerate(display_columns)
|
for col_ind, col in enumerate(display_cols)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
rows.append(
|
rows.append(
|
||||||
[
|
[
|
||||||
column.render(
|
col.render(
|
||||||
doc[token_index_to_display],
|
doc[token_ind_to_display],
|
||||||
whole_row_fg_color=surrounding_tokens_fg_color,
|
whole_row_fg_color=surrounding_tokens_fg_color,
|
||||||
whole_row_bg_color=surrounding_tokens_bg_color,
|
whole_row_bg_color=surrounding_tokens_bg_color,
|
||||||
right_pad_to_length=widths[column_index],
|
right_pad_to_len=widths[col_ind],
|
||||||
)
|
)
|
||||||
for column_index, column in enumerate(display_columns)
|
for col_ind, col in enumerate(display_cols)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
if (
|
if (
|
||||||
matched_token_index + 1 < len(matched_tokens)
|
matched_token_ind + 1 < len(matched_tokens)
|
||||||
and token_index_to_display + 1
|
and token_ind_to_display + 1 != matched_tokens[matched_token_ind + 1].i
|
||||||
!= matched_tokens[matched_token_index + 1].i
|
|
||||||
):
|
):
|
||||||
rows.append([])
|
rows.append([])
|
||||||
header: Optional[List[str]]
|
header: Optional[List[str]]
|
||||||
if len([1 for c in display_columns if len(c.name) > 0]) > 0:
|
if len([1 for c in display_cols if len(c.name) > 0]) > 0:
|
||||||
header = [c.name for c in display_columns]
|
header = [c.name for c in display_cols]
|
||||||
else:
|
else:
|
||||||
header = None
|
header = None
|
||||||
aligns = [c.aligns for c in display_columns]
|
aligns = [c.aligns for c in display_cols]
|
||||||
fg_colors = [c.fg_color for c in display_columns]
|
fg_colors = [c.fg_color for c in display_cols]
|
||||||
bg_colors = [c.bg_color for c in display_columns]
|
bg_colors = [c.bg_color for c in display_cols]
|
||||||
return wasabi.table(
|
return wasabi.table(
|
||||||
rows,
|
rows,
|
||||||
header=header,
|
header=header,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user