mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Render sentences rather than documents
This commit is contained in:
parent
b4265eccf9
commit
9a1d291191
|
@ -1,5 +1,5 @@
|
||||||
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
|
from spacy.tests.lang.ko.test_tokenizer import FULL_TAG_TESTS
|
||||||
from spacy.tokens import Doc, Token
|
from spacy.tokens import Span
|
||||||
from spacy.util import working_dir
|
from spacy.util import working_dir
|
||||||
|
|
||||||
SPACE = 0
|
SPACE = 0
|
||||||
|
@ -39,7 +39,7 @@ ROOT_LEFT_CHARS = {
|
||||||
|
|
||||||
class Visualizer:
|
class Visualizer:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def render_dependency_trees(doc: Doc, root_right: bool) -> list[str]:
|
def render_dependency_tree(sent: Span, root_right: bool) -> list[str]:
|
||||||
"""
|
"""
|
||||||
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
Returns an ASCII rendering of the document with a dependency tree for each sentence. The
|
||||||
dependency tree output for a given token has the same index within the output list of
|
dependency tree output for a given token has the same index within the output list of
|
||||||
|
@ -53,10 +53,10 @@ class Visualizer:
|
||||||
heads = [
|
heads = [
|
||||||
None
|
None
|
||||||
if token.dep_.lower() == "root" or token.head.i == token.i
|
if token.dep_.lower() == "root" or token.head.i == token.i
|
||||||
else token.head.i
|
else token.head.i - sent.start
|
||||||
for token in doc
|
for token in sent
|
||||||
]
|
]
|
||||||
children_lists = [[] for _ in range(len(doc))]
|
children_lists = [[] for _ in range(sent.end - sent.start)]
|
||||||
for child, head in enumerate(heads):
|
for child, head in enumerate(heads):
|
||||||
if head is not None:
|
if head is not None:
|
||||||
children_lists[head].append(child)
|
children_lists[head].append(child)
|
||||||
|
@ -78,7 +78,7 @@ class Visualizer:
|
||||||
-1 if heads[i] is None else 1
|
-1 if heads[i] is None else 1
|
||||||
# length == 1: governed by direct neighbour and has no children itself
|
# length == 1: governed by direct neighbour and has no children itself
|
||||||
if len(children_lists[i]) == 0 and abs(heads[i] - i) == 1 else 0
|
if len(children_lists[i]) == 0 and abs(heads[i] - i) == 1 else 0
|
||||||
for i in range(len(doc))
|
for i in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
while 0 in horizontal_line_lengths:
|
while 0 in horizontal_line_lengths:
|
||||||
for working_token_index in (
|
for working_token_index in (
|
||||||
|
@ -126,9 +126,10 @@ class Visualizer:
|
||||||
)
|
)
|
||||||
max_horizontal_line_length = max(horizontal_line_lengths)
|
max_horizontal_line_length = max(horizontal_line_lengths)
|
||||||
char_matrix = [
|
char_matrix = [
|
||||||
[SPACE] * max_horizontal_line_length * 2 for _ in range(len(doc))
|
[SPACE] * max_horizontal_line_length * 2
|
||||||
|
for _ in range(sent.start, sent.end)
|
||||||
]
|
]
|
||||||
for working_token_index in range(len(doc)):
|
for working_token_index in range(sent.end - sent.start):
|
||||||
head_token_index = heads[working_token_index]
|
head_token_index = heads[working_token_index]
|
||||||
if head_token_index is None:
|
if head_token_index is None:
|
||||||
continue
|
continue
|
||||||
|
@ -169,7 +170,9 @@ class Visualizer:
|
||||||
char_matrix[working_vertical_position][
|
char_matrix[working_vertical_position][
|
||||||
char_horizontal_line_length - 1
|
char_horizontal_line_length - 1
|
||||||
] |= FULL_VERTICAL_LINE
|
] |= FULL_VERTICAL_LINE
|
||||||
for working_token_index in (i for i in range(len(doc)) if heads[i] is not None):
|
for working_token_index in (
|
||||||
|
i for i in range(sent.end - sent.start) if heads[i] is not None
|
||||||
|
):
|
||||||
for working_horizontal_position in range(
|
for working_horizontal_position in range(
|
||||||
2 * horizontal_line_lengths[working_token_index] - 2, -1, -1
|
2 * horizontal_line_lengths[working_token_index] - 2, -1, -1
|
||||||
):
|
):
|
||||||
|
@ -213,7 +216,7 @@ class Visualizer:
|
||||||
]
|
]
|
||||||
for horizontal_position in range((max_horizontal_line_length * 2))
|
for horizontal_position in range((max_horizontal_line_length * 2))
|
||||||
)
|
)
|
||||||
for vertical_position in range(len(doc))
|
for vertical_position in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
return [
|
return [
|
||||||
|
@ -221,5 +224,5 @@ class Visualizer:
|
||||||
ROOT_LEFT_CHARS[char_matrix[vertical_position][horizontal_position]]
|
ROOT_LEFT_CHARS[char_matrix[vertical_position][horizontal_position]]
|
||||||
for horizontal_position in range((max_horizontal_line_length * 2))
|
for horizontal_position in range((max_horizontal_line_length * 2))
|
||||||
)[::-1]
|
)[::-1]
|
||||||
for vertical_position in range(len(doc))
|
for vertical_position in range(sent.end - sent.start)
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user