Render sentences rather than documents

This commit is contained in:
Richard Hudson 2021-11-30 16:01:05 +01:00
parent b4265eccf9
commit 9a1d291191

View File

@ -1,5 +1,5 @@
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
from spacy.tokens import Doc, Token from spacy.tokens import Span
from spacy.util import working_dir from spacy.util import working_dir
SPACE = 0 SPACE = 0
@ -39,7 +39,7 @@ ROOT_LEFT_CHARS = {
class Visualizer: class Visualizer:
@staticmethod @staticmethod
def render_dependency_trees(doc: Doc, root_right: bool) -> list[str]: def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
""" """
Returns an ASCII rendering of the document with a dependency tree for each sentence. The Returns an ASCII rendering of the document with a dependency tree for each sentence. The
dependency tree output for a given token has the same index within the output list of dependency tree output for a given token has the same index within the output list of
@ -53,10 +53,10 @@ class Visualizer:
heads = [ heads = [
None None
if token.dep_.lower() == "root" or token.head.i == token.i if token.dep_.lower() == "root" or token.head.i == token.i
else token.head.i else token.head.i - sent.start
for token in doc for token in sent
] ]
children_lists = [[] for _ in range(len(doc))] children_lists = [[] for _ in range(sent.end - sent.start)]
for child, head in enumerate(heads): for child, head in enumerate(heads):
if head is not None: if head is not None:
children_lists[head].append(child) children_lists[head].append(child)
@ -78,7 +78,7 @@ class Visualizer:
-1 if heads[i] is None else 1 -1 if heads[i] is None else 1
# length == 1: governed by direct neighbour and has no children itself # length == 1: governed by direct neighbour and has no children itself
if len(children_lists[i]) == 0 and abs(heads[i] - i) == 1 else 0 if len(children_lists[i]) == 0 and abs(heads[i] - i) == 1 else 0
for i in range(len(doc)) for i in range(sent.end - sent.start)
] ]
while 0 in horizontal_line_lengths: while 0 in horizontal_line_lengths:
for working_token_index in ( for working_token_index in (
@ -126,9 +126,10 @@ class Visualizer:
) )
max_horizontal_line_length = max(horizontal_line_lengths) max_horizontal_line_length = max(horizontal_line_lengths)
char_matrix = [ char_matrix = [
[SPACE] * max_horizontal_line_length * 2 for _ in range(len(doc)) [SPACE] * max_horizontal_line_length * 2
for _ in range(sent.start, sent.end)
] ]
for working_token_index in range(len(doc)): for working_token_index in range(sent.end - sent.start):
head_token_index = heads[working_token_index] head_token_index = heads[working_token_index]
if head_token_index is None: if head_token_index is None:
continue continue
@ -169,7 +170,9 @@ class Visualizer:
char_matrix[working_vertical_position][ char_matrix[working_vertical_position][
char_horizontal_line_length - 1 char_horizontal_line_length - 1
] |= FULL_VERTICAL_LINE ] |= FULL_VERTICAL_LINE
for working_token_index in (i for i in range(len(doc)) if heads[i] is not None): for working_token_index in (
i for i in range(sent.end - sent.start) if heads[i] is not None
):
for working_horizontal_position in range( for working_horizontal_position in range(
2 * horizontal_line_lengths[working_token_index] - 2, -1, -1 2 * horizontal_line_lengths[working_token_index] - 2, -1, -1
): ):
@ -213,7 +216,7 @@ class Visualizer:
] ]
for horizontal_position in range((max_horizontal_line_length * 2)) for horizontal_position in range((max_horizontal_line_length * 2))
) )
for vertical_position in range(len(doc)) for vertical_position in range(sent.end - sent.start)
] ]
else: else:
return [ return [
@ -221,5 +224,5 @@ class Visualizer:
ROOT_LEFT_CHARS[char_matrix[vertical_position][horizontal_position]] ROOT_LEFT_CHARS[char_matrix[vertical_position][horizontal_position]]
for horizontal_position in range((max_horizontal_line_length * 2)) for horizontal_position in range((max_horizontal_line_length * 2))
)[::-1] )[::-1]
for vertical_position in range(len(doc)) for vertical_position in range(sent.end - sent.start)
] ]