mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Added tabular view
This commit is contained in:
parent
e04950ef3c
commit
9f7f234b0f
|
@ -1,5 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.util import get_lang_class
|
from spacy.util import get_lang_class
|
||||||
|
from spacy.lang.en import English
|
||||||
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
|
||||||
def pytest_addoption(parser):
|
def pytest_addoption(parser):
|
||||||
|
@ -390,3 +392,239 @@ def zh_tokenizer_pkuseg():
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
def hy_tokenizer():
|
def hy_tokenizer():
|
||||||
return get_lang_class("hy")().tokenizer
|
return get_lang_class("hy")().tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tagged_doc():
|
||||||
|
text = "Sarah's sister flew to Silicon Valley via London."
|
||||||
|
tags = ["NNP", "POS", "NN", "VBD", "IN", "NNP", "NNP", "IN", "NNP", "."]
|
||||||
|
pos = [
|
||||||
|
"PROPN",
|
||||||
|
"PART",
|
||||||
|
"NOUN",
|
||||||
|
"VERB",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PROPN",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PUNCT",
|
||||||
|
]
|
||||||
|
morphs = [
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"Poss=yes",
|
||||||
|
"Number=sing",
|
||||||
|
"Tense=past|VerbForm=fin",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"PunctType=peri",
|
||||||
|
]
|
||||||
|
nlp = English()
|
||||||
|
doc = nlp(text)
|
||||||
|
for i in range(len(tags)):
|
||||||
|
doc[i].tag_ = tags[i]
|
||||||
|
doc[i].pos_ = pos[i]
|
||||||
|
doc[i].set_morph(morphs[i])
|
||||||
|
if i > 0:
|
||||||
|
doc[i].is_sent_start = False
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fully_featured_doc_one_sentence(en_vocab):
|
||||||
|
words = [
|
||||||
|
"Sarah",
|
||||||
|
"'s",
|
||||||
|
"sister",
|
||||||
|
"flew",
|
||||||
|
"to",
|
||||||
|
"Silicon",
|
||||||
|
"Valley",
|
||||||
|
"via",
|
||||||
|
"London",
|
||||||
|
".",
|
||||||
|
]
|
||||||
|
lemmas = [
|
||||||
|
"sarah",
|
||||||
|
"'s",
|
||||||
|
"sister",
|
||||||
|
"fly",
|
||||||
|
"to",
|
||||||
|
"silicon",
|
||||||
|
"valley",
|
||||||
|
"via",
|
||||||
|
"london",
|
||||||
|
".",
|
||||||
|
]
|
||||||
|
spaces = [False, True, True, True, True, True, True, True, False, False]
|
||||||
|
tags = ["NNP", "POS", "NN", "VBD", "IN", "NNP", "NNP", "IN", "NNP", "."]
|
||||||
|
pos = [
|
||||||
|
"PROPN",
|
||||||
|
"PART",
|
||||||
|
"NOUN",
|
||||||
|
"VERB",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PROPN",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PUNCT",
|
||||||
|
]
|
||||||
|
morphs = [
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"Poss=yes",
|
||||||
|
"Number=sing",
|
||||||
|
"Tense=past|VerbForm=fin",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"PunctType=peri",
|
||||||
|
]
|
||||||
|
heads = [2, 0, 3, 3, 3, 6, 4, 3, 7, 3]
|
||||||
|
deps = [
|
||||||
|
"poss",
|
||||||
|
"case",
|
||||||
|
"nsubj",
|
||||||
|
"ROOT",
|
||||||
|
"prep",
|
||||||
|
"compound",
|
||||||
|
"pobj",
|
||||||
|
"prep",
|
||||||
|
"pobj",
|
||||||
|
"punct",
|
||||||
|
]
|
||||||
|
ent_types = ["PERSON", "", "", "", "", "GPE", "GPE", "", "GPE", ""]
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=words,
|
||||||
|
lemmas=lemmas,
|
||||||
|
spaces=spaces,
|
||||||
|
heads=heads,
|
||||||
|
deps=deps,
|
||||||
|
morphs=morphs,
|
||||||
|
)
|
||||||
|
for i in range(len(tags)):
|
||||||
|
doc[i].tag_ = tags[i]
|
||||||
|
doc[i].pos_ = pos[i]
|
||||||
|
doc[i].ent_type_ = ent_types[i]
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fully_featured_doc_two_sentences(en_vocab):
|
||||||
|
words = [
|
||||||
|
"Sarah",
|
||||||
|
"'s",
|
||||||
|
"sister",
|
||||||
|
"flew",
|
||||||
|
"to",
|
||||||
|
"Silicon",
|
||||||
|
"Valley",
|
||||||
|
"via",
|
||||||
|
"London",
|
||||||
|
".",
|
||||||
|
"She",
|
||||||
|
"loved",
|
||||||
|
"it",
|
||||||
|
"."
|
||||||
|
]
|
||||||
|
lemmas = [
|
||||||
|
"sarah",
|
||||||
|
"'s",
|
||||||
|
"sister",
|
||||||
|
"fly",
|
||||||
|
"to",
|
||||||
|
"silicon",
|
||||||
|
"valley",
|
||||||
|
"via",
|
||||||
|
"london",
|
||||||
|
".",
|
||||||
|
"she",
|
||||||
|
"love",
|
||||||
|
"it",
|
||||||
|
"."
|
||||||
|
]
|
||||||
|
spaces = [False, True, True, True, True, True, True, True, False, False, True, True, False, False]
|
||||||
|
pos = [
|
||||||
|
"PROPN",
|
||||||
|
"PART",
|
||||||
|
"NOUN",
|
||||||
|
"VERB",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PROPN",
|
||||||
|
"ADP",
|
||||||
|
"PROPN",
|
||||||
|
"PUNCT",
|
||||||
|
"PRON",
|
||||||
|
"VERB",
|
||||||
|
"PRON",
|
||||||
|
"PUNCT"
|
||||||
|
]
|
||||||
|
tags = ["NNP", "POS", "NN", "VBD", "IN", "NNP", "NNP", "IN", "NNP", ".", "PRP", "VBD", "PRP", "."]
|
||||||
|
morphs = [
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"Poss=yes",
|
||||||
|
"Number=sing",
|
||||||
|
"Tense=past|VerbForm=fin",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"",
|
||||||
|
"NounType=prop|Number=sing",
|
||||||
|
"PunctType=peri",
|
||||||
|
"Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs",
|
||||||
|
"Tense=Past|VerbForm=Fin",
|
||||||
|
"Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs",
|
||||||
|
"PunctType=peri",
|
||||||
|
]
|
||||||
|
heads = [2, 0, 3, 3, 3, 6, 4, 3, 7, 3, 11, 11, 11, 11]
|
||||||
|
deps = [
|
||||||
|
"poss",
|
||||||
|
"case",
|
||||||
|
"nsubj",
|
||||||
|
"ROOT",
|
||||||
|
"prep",
|
||||||
|
"compound",
|
||||||
|
"pobj",
|
||||||
|
"prep",
|
||||||
|
"pobj",
|
||||||
|
"punct",
|
||||||
|
"nsubj",
|
||||||
|
"ROOT",
|
||||||
|
"dobj",
|
||||||
|
"punct",
|
||||||
|
]
|
||||||
|
ent_types = ["PERSON", "", "", "", "", "GPE", "GPE", "", "GPE", "", "", "", "", ""]
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=words,
|
||||||
|
lemmas=lemmas,
|
||||||
|
spaces=spaces,
|
||||||
|
heads=heads,
|
||||||
|
deps=deps,
|
||||||
|
morphs=morphs,
|
||||||
|
)
|
||||||
|
for i in range(len(tags)):
|
||||||
|
doc[i].tag_ = tags[i]
|
||||||
|
doc[i].pos_ = pos[i]
|
||||||
|
doc[i].ent_type_ = ent_types[i]
|
||||||
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sented_doc():
|
||||||
|
text = "One sentence. Two sentences. Three sentences."
|
||||||
|
nlp = English()
|
||||||
|
doc = nlp(text)
|
||||||
|
for i in range(len(doc)):
|
||||||
|
if i % 3 == 0:
|
||||||
|
doc[i].is_sent_start = True
|
||||||
|
else:
|
||||||
|
doc[i].is_sent_start = False
|
||||||
|
return doc
|
||||||
|
|
|
@ -43,58 +43,6 @@ test_ner_apple = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def tagged_doc():
|
|
||||||
text = "Sarah's sister flew to Silicon Valley via London."
|
|
||||||
tags = ["NNP", "POS", "NN", "VBD", "IN", "NNP", "NNP", "IN", "NNP", "."]
|
|
||||||
pos = [
|
|
||||||
"PROPN",
|
|
||||||
"PART",
|
|
||||||
"NOUN",
|
|
||||||
"VERB",
|
|
||||||
"ADP",
|
|
||||||
"PROPN",
|
|
||||||
"PROPN",
|
|
||||||
"ADP",
|
|
||||||
"PROPN",
|
|
||||||
"PUNCT",
|
|
||||||
]
|
|
||||||
morphs = [
|
|
||||||
"NounType=prop|Number=sing",
|
|
||||||
"Poss=yes",
|
|
||||||
"Number=sing",
|
|
||||||
"Tense=past|VerbForm=fin",
|
|
||||||
"",
|
|
||||||
"NounType=prop|Number=sing",
|
|
||||||
"NounType=prop|Number=sing",
|
|
||||||
"",
|
|
||||||
"NounType=prop|Number=sing",
|
|
||||||
"PunctType=peri",
|
|
||||||
]
|
|
||||||
nlp = English()
|
|
||||||
doc = nlp(text)
|
|
||||||
for i in range(len(tags)):
|
|
||||||
doc[i].tag_ = tags[i]
|
|
||||||
doc[i].pos_ = pos[i]
|
|
||||||
doc[i].set_morph(morphs[i])
|
|
||||||
if i > 0:
|
|
||||||
doc[i].is_sent_start = False
|
|
||||||
return doc
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sented_doc():
|
|
||||||
text = "One sentence. Two sentences. Three sentences."
|
|
||||||
nlp = English()
|
|
||||||
doc = nlp(text)
|
|
||||||
for i in range(len(doc)):
|
|
||||||
if i % 3 == 0:
|
|
||||||
doc[i].is_sent_start = True
|
|
||||||
else:
|
|
||||||
doc[i].is_sent_start = False
|
|
||||||
return doc
|
|
||||||
|
|
||||||
|
|
||||||
def test_tokenization(sented_doc):
|
def test_tokenization(sented_doc):
|
||||||
scorer = Scorer()
|
scorer = Scorer()
|
||||||
gold = {"sent_starts": [t.sent_start for t in sented_doc]}
|
gold = {"sent_starts": [t.sent_start for t in sented_doc]}
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
import pytest
|
import pytest
|
||||||
import deplacy
|
import deplacy
|
||||||
from spacy.visualization import Visualizer
|
from wasabi.util import supports_ansi
|
||||||
from spacy.tokens import Span, Doc
|
from spacy.visualization import AttributeFormat, Visualizer
|
||||||
|
from spacy.tokens import Span, Doc, Token
|
||||||
|
|
||||||
|
|
||||||
def test_dependency_tree_basic(en_vocab):
|
SUPPORTS_ANSI = supports_ansi()
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_dependency_tree_basic(en_vocab):
|
||||||
"""Test basic dependency tree display."""
|
"""Test basic dependency tree display."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -48,7 +52,7 @@ def test_dependency_tree_basic(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_dependency_tree_non_initial_sentence(en_vocab):
|
def test_visualization_dependency_tree_non_initial_sentence(en_vocab):
|
||||||
"""Test basic dependency tree display."""
|
"""Test basic dependency tree display."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -95,8 +99,8 @@ def test_dependency_tree_non_initial_sentence(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_dependency_tree_non_projective(en_vocab):
|
def test_visualization_dependency_tree_non_projective(en_vocab):
|
||||||
"""Test dependency tree display with a non-prejective dependency."""
|
"""Test dependency tree display with a non-projective dependency."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
words=[
|
words=[
|
||||||
|
@ -114,8 +118,6 @@ def test_dependency_tree_non_projective(en_vocab):
|
||||||
deps=["dep"] * 9,
|
deps=["dep"] * 9,
|
||||||
)
|
)
|
||||||
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], True)
|
dep_tree = Visualizer.render_dependency_tree(doc[0 : len(doc)], True)
|
||||||
for line in dep_tree:
|
|
||||||
print(line)
|
|
||||||
assert dep_tree == [
|
assert dep_tree == [
|
||||||
"<╗ ",
|
"<╗ ",
|
||||||
"═╩═══╗",
|
"═╩═══╗",
|
||||||
|
@ -141,7 +143,7 @@ def test_dependency_tree_non_projective(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_dependency_tree_input_not_span(en_vocab):
|
def test_visualization_dependency_tree_input_not_span(en_vocab):
|
||||||
"""Test dependency tree display behaviour when the input is not a Span."""
|
"""Test dependency tree display behaviour when the input is not a Span."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -163,7 +165,8 @@ def test_dependency_tree_input_not_span(en_vocab):
|
||||||
with pytest.raises(AssertionError):
|
with pytest.raises(AssertionError):
|
||||||
Visualizer.render_dependency_tree(doc[1:3], True)
|
Visualizer.render_dependency_tree(doc[1:3], True)
|
||||||
|
|
||||||
def test_dependency_tree_highly_nonprojective(en_vocab):
|
|
||||||
|
def test_visualization_dependency_tree_highly_nonprojective(en_vocab):
|
||||||
"""Test a highly non-projective tree (colloquial Polish)."""
|
"""Test a highly non-projective tree (colloquial Polish)."""
|
||||||
doc = Doc(
|
doc = Doc(
|
||||||
en_vocab,
|
en_vocab,
|
||||||
|
@ -204,3 +207,337 @@ def test_dependency_tree_highly_nonprojective(en_vocab):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_native_attribute_int(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
assert Visualizer().get_entity(doc[2], "head.i") == "3"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_native_attribute_str(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert Visualizer().get_entity(doc[2], "dep_") == "dep"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_colors(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().get_entity(
|
||||||
|
doc[2],
|
||||||
|
"dep_",
|
||||||
|
value_dependent_fg_colors={"dep": 2},
|
||||||
|
value_dependent_bg_colors={"dep": 11},
|
||||||
|
)
|
||||||
|
== "\x1b[38;5;2;48;5;11mdep\x1b[0m"
|
||||||
|
if supports_ansi
|
||||||
|
else "dep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_colors_only_fg(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().get_entity(doc[2], "dep_", value_dependent_fg_colors={"dep": 2})
|
||||||
|
== "\x1b[38;5;2mdep\x1b[0m"
|
||||||
|
if supports_ansi
|
||||||
|
else "dep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_colors_only_bg(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().get_entity(doc[2], "dep_", value_dependent_bg_colors={"dep": 11})
|
||||||
|
== "\x1b[48;5;11mdep\x1b[0m"
|
||||||
|
if supports_ansi
|
||||||
|
else "dep"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_native_attribute_missing(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
Visualizer().get_entity(doc[2], "depp")
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_custom_attribute_str(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
Token.set_extension("test", default="tested", force=True)
|
||||||
|
assert Visualizer().get_entity(doc[2], "_.test") == "tested"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_nested_custom_attribute_str(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
|
||||||
|
class Test:
|
||||||
|
def __init__(self):
|
||||||
|
self.inner_test = "tested"
|
||||||
|
|
||||||
|
Token.set_extension("test", default=Test(), force=True)
|
||||||
|
assert Visualizer().get_entity(doc[2], "_.test.inner_test") == "tested"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_get_entity_custom_attribute_missing(en_vocab):
|
||||||
|
doc = Doc(
|
||||||
|
en_vocab,
|
||||||
|
words=[
|
||||||
|
"I",
|
||||||
|
"saw",
|
||||||
|
"a",
|
||||||
|
"horse",
|
||||||
|
"yesterday",
|
||||||
|
"that",
|
||||||
|
"was",
|
||||||
|
"injured",
|
||||||
|
".",
|
||||||
|
],
|
||||||
|
heads=[1, None, 3, 1, 1, 7, 7, 3, 1],
|
||||||
|
deps=["dep"] * 9,
|
||||||
|
)
|
||||||
|
with pytest.raises(AttributeError):
|
||||||
|
Visualizer().get_entity(doc[2], "_.depp")
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_minimal_render_table_one_sentence(
|
||||||
|
fully_featured_doc_one_sentence,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat("tree_left"),
|
||||||
|
AttributeFormat("dep_"),
|
||||||
|
AttributeFormat("text"),
|
||||||
|
AttributeFormat("lemma_"),
|
||||||
|
AttributeFormat("pos_"),
|
||||||
|
AttributeFormat("tag_"),
|
||||||
|
AttributeFormat("morph"),
|
||||||
|
AttributeFormat("ent_type_"),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
Visualizer().render_table(fully_featured_doc_one_sentence, formats).strip()
|
||||||
|
== """
|
||||||
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
╔>╚═══ nsubj sister sister NOUN NN Number=sing
|
||||||
|
╠═════ ROOT flew fly VERB VBD Tense=past|VerbForm=fin
|
||||||
|
╠>╔═══ prep to to ADP IN
|
||||||
|
║ ║ ╔> compound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
║ ╚>╚═ pobj Valley valley PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
╠══>╔═ prep via via ADP IN
|
||||||
|
║ ╚> pobj London london PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
╚════> punct . . PUNCT . PunctType=peri
|
||||||
|
""".strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_minimal_render_table_two_sentences(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat("tree_left"),
|
||||||
|
AttributeFormat("dep_"),
|
||||||
|
AttributeFormat("text"),
|
||||||
|
AttributeFormat("lemma_"),
|
||||||
|
AttributeFormat("pos_"),
|
||||||
|
AttributeFormat("tag_"),
|
||||||
|
AttributeFormat("morph"),
|
||||||
|
AttributeFormat("ent_type_"),
|
||||||
|
]
|
||||||
|
|
||||||
|
assert (
|
||||||
|
Visualizer().render_table(fully_featured_doc_two_sentences, formats).strip()
|
||||||
|
== """
|
||||||
|
╔>╔═ poss Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON
|
||||||
|
║ ╚> case 's 's PART POS Poss=yes
|
||||||
|
╔>╚═══ nsubj sister sister NOUN NN Number=sing
|
||||||
|
╠═════ ROOT flew fly VERB VBD Tense=past|VerbForm=fin
|
||||||
|
╠>╔═══ prep to to ADP IN
|
||||||
|
║ ║ ╔> compound Silicon silicon PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
║ ╚>╚═ pobj Valley valley PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
╠══>╔═ prep via via ADP IN
|
||||||
|
║ ╚> pobj London london PROPN NNP NounType=prop|Number=sing GPE
|
||||||
|
╚════> punct . . PUNCT . PunctType=peri
|
||||||
|
|
||||||
|
|
||||||
|
╔> nsubj She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs
|
||||||
|
╠═ ROOT loved love VERB VBD Tense=Past|VerbForm=Fin
|
||||||
|
╠> dobj it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs
|
||||||
|
╚> punct . . PUNCT . PunctType=peri
|
||||||
|
""".strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_rich_render_table_one_sentence(
|
||||||
|
fully_featured_doc_one_sentence,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
|
||||||
|
AttributeFormat("dep_", name="dep", fg_color=2),
|
||||||
|
AttributeFormat("i", name="index", aligns="r"),
|
||||||
|
AttributeFormat("text", name="text"),
|
||||||
|
AttributeFormat("lemma_", name="lemma"),
|
||||||
|
AttributeFormat("pos_", name="pos", fg_color=100),
|
||||||
|
AttributeFormat("tag_", name="tag", fg_color=100),
|
||||||
|
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
|
||||||
|
AttributeFormat(
|
||||||
|
"ent_type_",
|
||||||
|
name="ent",
|
||||||
|
fg_color=196,
|
||||||
|
value_dependent_fg_colors={"PERSON": 50},
|
||||||
|
value_dependent_bg_colors={"PERSON": 12},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
Visualizer().render_table(fully_featured_doc_one_sentence, formats)
|
||||||
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
|
if supports_ansi
|
||||||
|
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_visualization_rich_render_table_two_sentences(
|
||||||
|
fully_featured_doc_two_sentences,
|
||||||
|
):
|
||||||
|
formats = [
|
||||||
|
AttributeFormat("tree_left", name="tree", aligns="r", fg_color=2),
|
||||||
|
AttributeFormat("dep_", name="dep", fg_color=2),
|
||||||
|
AttributeFormat("i", name="index", aligns="r"),
|
||||||
|
AttributeFormat("text", name="text"),
|
||||||
|
AttributeFormat("lemma_", name="lemma"),
|
||||||
|
AttributeFormat("pos_", name="pos", fg_color=100),
|
||||||
|
AttributeFormat("tag_", name="tag", fg_color=100),
|
||||||
|
AttributeFormat("morph", name="morph", fg_color=100, max_width=15),
|
||||||
|
AttributeFormat(
|
||||||
|
"ent_type_",
|
||||||
|
name="ent",
|
||||||
|
fg_color=196,
|
||||||
|
value_dependent_fg_colors={"PERSON": 50},
|
||||||
|
value_dependent_bg_colors={"PERSON": 12},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
Visualizer().render_table(fully_featured_doc_two_sentences, formats)
|
||||||
|
== "\n\x1b[38;5;2m tree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment \x1b[0m\n\x1b[38;5;2m------\x1b[0m \x1b[38;5;2m--------\x1b[0m ----- ------- ------- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m------\x1b[0m\n\x1b[38;5;2m ╔>╔═\x1b[0m \x1b[38;5;2mposs \x1b[0m 0 Sarah sarah \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196m\x1b[38;5;50;48;5;12mPERSON\x1b[0m\x1b[0m\n\x1b[38;5;2m ║ ╚>\x1b[0m \x1b[38;5;2mcase \x1b[0m 1 's 's \x1b[38;5;100mPART \x1b[0m \x1b[38;5;100mPOS\x1b[0m \x1b[38;5;100mPoss=yes \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╔>╚═══\x1b[0m \x1b[38;5;2mnsubj \x1b[0m 2 sister sister \x1b[38;5;100mNOUN \x1b[0m \x1b[38;5;100mNN \x1b[0m \x1b[38;5;100mNumber=sing \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠═════\x1b[0m \x1b[38;5;2mROOT \x1b[0m 3 flew fly \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m╠>╔═══\x1b[0m \x1b[38;5;2mprep \x1b[0m 4 to to \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ║ ╔>\x1b[0m \x1b[38;5;2mcompound\x1b[0m 5 Silicon silicon \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m║ ╚>╚═\x1b[0m \x1b[38;5;2mpobj \x1b[0m 6 Valley valley \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╠══>╔═\x1b[0m \x1b[38;5;2mprep \x1b[0m 7 via via \x1b[38;5;100mADP \x1b[0m \x1b[38;5;100mIN \x1b[0m \x1b[38;5;100m \x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m║ ╚>\x1b[0m \x1b[38;5;2mpobj \x1b[0m 8 London london \x1b[38;5;100mPROPN\x1b[0m \x1b[38;5;100mNNP\x1b[0m \x1b[38;5;100mNounType=prop|N\x1b[0m \x1b[38;5;196mGPE \x1b[0m\n\x1b[38;5;2m╚════>\x1b[0m \x1b[38;5;2mpunct \x1b[0m 9 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n\n\x1b[38;5;2mtree\x1b[0m \x1b[38;5;2mdep \x1b[0m index text lemma \x1b[38;5;100mpos \x1b[0m \x1b[38;5;100mtag\x1b[0m \x1b[38;5;100mmorph \x1b[0m \x1b[38;5;196ment\x1b[0m\n\x1b[38;5;2m----\x1b[0m \x1b[38;5;2m-----\x1b[0m ----- ----- ----- \x1b[38;5;100m-----\x1b[0m \x1b[38;5;100m---\x1b[0m \x1b[38;5;100m---------------\x1b[0m \x1b[38;5;196m---\x1b[0m\n\x1b[38;5;2m ╔>\x1b[0m \x1b[38;5;2mnsubj\x1b[0m 10 She she \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Nom|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠═\x1b[0m \x1b[38;5;2mROOT \x1b[0m 11 loved love \x1b[38;5;100mVERB \x1b[0m \x1b[38;5;100mVBD\x1b[0m \x1b[38;5;100mTense=Past|Verb\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╠>\x1b[0m \x1b[38;5;2mdobj \x1b[0m 12 it it \x1b[38;5;100mPRON \x1b[0m \x1b[38;5;100mPRP\x1b[0m \x1b[38;5;100mCase=Acc|Gender\x1b[0m \x1b[38;5;196m \x1b[0m\n\x1b[38;5;2m ╚>\x1b[0m \x1b[38;5;2mpunct\x1b[0m 13 . . \x1b[38;5;100mPUNCT\x1b[0m \x1b[38;5;100m. \x1b[0m \x1b[38;5;100mPunctType=peri \x1b[0m \x1b[38;5;196m \x1b[0m\n\n"
|
||||||
|
if supports_ansi
|
||||||
|
else "\n tree dep index text lemma pos tag morph ent \n------ -------- ----- ------- ------- ----- --- ------------------------- ------\n ╔>╔═ poss 0 Sarah sarah PROPN NNP NounType=prop|Number=sing PERSON\n ║ ╚> case 1 's 's PART POS Poss=yes \n╔>╚═══ nsubj 2 sister sister NOUN NN Number=sing \n╠═════ ROOT 3 flew fly VERB VBD Tense=past|VerbForm=fin \n╠>╔═══ prep 4 to to ADP IN \n║ ║ ╔> compound 5 Silicon silicon PROPN NNP NounType=prop|Number=sing GPE \n║ ╚>╚═ pobj 6 Valley valley PROPN NNP NounType=prop|Number=sing GPE \n╠══>╔═ prep 7 via via ADP IN \n║ ╚> pobj 8 London london PROPN NNP NounType=prop|Number=sing GPE \n╚════> punct 9 . . PUNCT . PunctType=peri \n\n\ntree dep index text lemma pos tag morph ent\n---- ----- ----- ----- ----- ----- --- ------------------------------------------------------ ---\n ╔> nsubj 10 She she PRON PRP Case=Nom|Gender=Fem|Number=Sing|Person=3|PronType=Prs \n ╠═ ROOT 11 loved love VERB VBD Tense=Past|VerbForm=Fin \n ╠> dobj 12 it it PRON PRP Case=Acc|Gender=Neut|Number=Sing|Person=3|PronType=Prs \n ╚> punct 13 . . PUNCT . PunctType=peri \n\n"
|
||||||
|
)
|
||||||
|
|
|
@ -1,7 +1,34 @@
|
||||||
|
from os import linesep, truncate
|
||||||
|
from typing import Union
|
||||||
|
import wasabi
|
||||||
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
|
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
|
||||||
from spacy.tokens import Span
|
from spacy.tokens import Span, Token, Doc
|
||||||
from spacy.util import working_dir
|
from spacy.util import working_dir
|
||||||
|
|
||||||
|
|
||||||
|
class AttributeFormat:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
attribute: str,
|
||||||
|
*,
|
||||||
|
name: str = "",
|
||||||
|
aligns: str = "l",
|
||||||
|
max_width: int = None,
|
||||||
|
fg_color: Union[str, int] = None,
|
||||||
|
bg_color: Union[str, int] = None,
|
||||||
|
value_dependent_fg_colors: dict[str, Union[str, int]] = None,
|
||||||
|
value_dependent_bg_colors: dict[str, Union[str, int]] = None,
|
||||||
|
):
|
||||||
|
self.attribute = attribute
|
||||||
|
self.name = name
|
||||||
|
self.aligns = aligns
|
||||||
|
self.max_width = max_width
|
||||||
|
self.fg_color = fg_color
|
||||||
|
self.bg_color = bg_color
|
||||||
|
self.value_dependent_fg_colors = value_dependent_fg_colors
|
||||||
|
self.value_dependent_bg_colors = value_dependent_bg_colors
|
||||||
|
|
||||||
|
|
||||||
SPACE = 0
|
SPACE = 0
|
||||||
HALF_HORIZONTAL_LINE = 1 # the half is the half further away from the root
|
HALF_HORIZONTAL_LINE = 1 # the half is the half further away from the root
|
||||||
FULL_HORIZONTAL_LINE = 3
|
FULL_HORIZONTAL_LINE = 3
|
||||||
|
@ -37,12 +64,11 @@ ROOT_LEFT_CHARS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class TableColumn:
|
|
||||||
def __init__(self, entity: str, width: int, overflow_strategy: str = "truncate"):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Visualizer:
|
class Visualizer:
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.printer = wasabi.Printer(no_print=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
|
def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
|
||||||
"""
|
"""
|
||||||
|
@ -65,6 +91,17 @@ class Visualizer:
|
||||||
else token.head.i - sent.start
|
else token.head.i - sent.start
|
||||||
for token in sent
|
for token in sent
|
||||||
]
|
]
|
||||||
|
# Check there are no head references outside the sentence
|
||||||
|
assert (
|
||||||
|
len(
|
||||||
|
[
|
||||||
|
head
|
||||||
|
for head in heads
|
||||||
|
if head is not None and (head < 0 or head > sent.end - sent.start)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
== 0
|
||||||
|
)
|
||||||
children_lists = [[] for _ in range(sent.end - sent.start)]
|
children_lists = [[] for _ in range(sent.end - sent.start)]
|
||||||
for child, head in enumerate(heads):
|
for child, head in enumerate(heads):
|
||||||
if head is not None:
|
if head is not None:
|
||||||
|
@ -257,3 +294,85 @@ class Visualizer:
|
||||||
)[::-1]
|
)[::-1]
|
||||||
for vertical_position in range(sent.end - sent.start)
|
for vertical_position in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_entity(
|
||||||
|
self,
|
||||||
|
token: Token,
|
||||||
|
entity_name: str,
|
||||||
|
*,
|
||||||
|
value_dependent_fg_colors: dict[str : Union[str, int]] = None,
|
||||||
|
value_dependent_bg_colors: dict[str : Union[str, int]] = None,
|
||||||
|
truncate_at_width: int = None
|
||||||
|
) -> str:
|
||||||
|
obj = token
|
||||||
|
parts = entity_name.split(".")
|
||||||
|
for part in parts[:-1]:
|
||||||
|
obj = getattr(obj, part)
|
||||||
|
value = str(getattr(obj, parts[-1]))
|
||||||
|
if truncate_at_width is not None:
|
||||||
|
value = value[:truncate_at_width]
|
||||||
|
fg_color = value_dependent_fg_colors.get(value, None) if value_dependent_fg_colors is not None else None
|
||||||
|
bg_color = value_dependent_bg_colors.get(value, None) if value_dependent_bg_colors is not None else None
|
||||||
|
if fg_color is not None or bg_color is not None:
|
||||||
|
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
||||||
|
return value
|
||||||
|
|
||||||
|
def render_table(
|
||||||
|
self, doc: Doc, columns: list[AttributeFormat], spacing: int = 3
|
||||||
|
) -> str:
|
||||||
|
return_string = ""
|
||||||
|
for sent in doc.sents:
|
||||||
|
if "tree_right" in (c.attribute for c in columns):
|
||||||
|
tree_right = self.render_dependency_tree(sent, True)
|
||||||
|
if "tree_left" in (c.attribute for c in columns):
|
||||||
|
tree_left = self.render_dependency_tree(sent, False)
|
||||||
|
widths = []
|
||||||
|
for column in columns:
|
||||||
|
# get the values without any color codes
|
||||||
|
if column.attribute == 'tree_left':
|
||||||
|
width = len(tree_left[0])
|
||||||
|
elif column.attribute == 'tree_right':
|
||||||
|
width = len(tree_right[0])
|
||||||
|
else:
|
||||||
|
width = max(len(self.get_entity(token, column.attribute)) for token in sent)
|
||||||
|
if column.max_width is not None:
|
||||||
|
width = min(width, column.max_width)
|
||||||
|
width = max(width, len(column.name))
|
||||||
|
widths.append(width)
|
||||||
|
data = [
|
||||||
|
[
|
||||||
|
tree_right[token_index]
|
||||||
|
if column.attribute == "tree_right"
|
||||||
|
else tree_left[token_index]
|
||||||
|
if column.attribute == "tree_left"
|
||||||
|
else self.get_entity(
|
||||||
|
token,
|
||||||
|
column.attribute,
|
||||||
|
value_dependent_fg_colors=column.value_dependent_fg_colors,
|
||||||
|
value_dependent_bg_colors=column.value_dependent_bg_colors,
|
||||||
|
truncate_at_width=widths[column_index]
|
||||||
|
)
|
||||||
|
for column_index, column in enumerate(columns)
|
||||||
|
]
|
||||||
|
for token_index, token in enumerate(sent)
|
||||||
|
]
|
||||||
|
if len([1 for c in columns if len(c.name) > 0]) > 0:
|
||||||
|
header = [c.name for c in columns]
|
||||||
|
else:
|
||||||
|
header = None
|
||||||
|
aligns = [c.aligns for c in columns]
|
||||||
|
fg_colors = [c.fg_color for c in columns]
|
||||||
|
bg_colors = [c.bg_color for c in columns]
|
||||||
|
return_string += (
|
||||||
|
wasabi.table(
|
||||||
|
data,
|
||||||
|
header=header,
|
||||||
|
divider=True,
|
||||||
|
aligns=aligns,
|
||||||
|
widths=widths,
|
||||||
|
fg_colors=fg_colors,
|
||||||
|
bg_colors=bg_colors,
|
||||||
|
)
|
||||||
|
+ linesep
|
||||||
|
)
|
||||||
|
return return_string
|
||||||
|
|
Loading…
Reference in New Issue
Block a user