mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
displaCy Spans Vertical Alignment Fix 2 (#11092)
* add in span render slot fix * fix spacing off by one * rm demo * adjust comments * fix whitespace and overlap issue
This commit is contained in:
parent
dc38a0f079
commit
36cb2029a9
|
@ -130,26 +130,56 @@ class SpanRenderer:
|
||||||
title (str / None): Document title set in Doc.user_data['title'].
|
title (str / None): Document title set in Doc.user_data['title'].
|
||||||
"""
|
"""
|
||||||
per_token_info = []
|
per_token_info = []
|
||||||
|
# we must sort so that we can correctly describe when spans need to "stack"
|
||||||
|
# which is determined by their start token, then span length (longer spans on top),
|
||||||
|
# then break any remaining ties with the span label
|
||||||
|
spans = sorted(
|
||||||
|
spans,
|
||||||
|
key=lambda s: (
|
||||||
|
s["start_token"],
|
||||||
|
-(s["end_token"] - s["start_token"]),
|
||||||
|
s["label"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
for s in spans:
|
||||||
|
# this is the vertical 'slot' that the span will be rendered in
|
||||||
|
# vertical_position = span_label_offset + (offset_step * (slot - 1))
|
||||||
|
s["render_slot"] = 0
|
||||||
for idx, token in enumerate(tokens):
|
for idx, token in enumerate(tokens):
|
||||||
# Identify if a token belongs to a Span (and which) and if it's a
|
# Identify if a token belongs to a Span (and which) and if it's a
|
||||||
# start token of said Span. We'll use this for the final HTML render
|
# start token of said Span. We'll use this for the final HTML render
|
||||||
token_markup: Dict[str, Any] = {}
|
token_markup: Dict[str, Any] = {}
|
||||||
token_markup["text"] = token
|
token_markup["text"] = token
|
||||||
|
concurrent_spans = 0
|
||||||
entities = []
|
entities = []
|
||||||
for span in spans:
|
for span in spans:
|
||||||
ent = {}
|
ent = {}
|
||||||
if span["start_token"] <= idx < span["end_token"]:
|
if span["start_token"] <= idx < span["end_token"]:
|
||||||
|
concurrent_spans += 1
|
||||||
|
span_start = idx == span["start_token"]
|
||||||
ent["label"] = span["label"]
|
ent["label"] = span["label"]
|
||||||
ent["is_start"] = True if idx == span["start_token"] else False
|
ent["is_start"] = span_start
|
||||||
|
if span_start:
|
||||||
|
# When the span starts, we need to know how many other
|
||||||
|
# spans are on the 'span stack' and will be rendered.
|
||||||
|
# This value becomes the vertical render slot for this entire span
|
||||||
|
span["render_slot"] = concurrent_spans
|
||||||
|
ent["render_slot"] = span["render_slot"]
|
||||||
kb_id = span.get("kb_id", "")
|
kb_id = span.get("kb_id", "")
|
||||||
kb_url = span.get("kb_url", "#")
|
kb_url = span.get("kb_url", "#")
|
||||||
ent["kb_link"] = (
|
ent["kb_link"] = (
|
||||||
TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
||||||
)
|
)
|
||||||
entities.append(ent)
|
entities.append(ent)
|
||||||
|
else:
|
||||||
|
# We don't specifically need to do this since we loop
|
||||||
|
# over tokens and spans sorted by their start_token,
|
||||||
|
# so we'll never use a span again after the last token it appears in,
|
||||||
|
# but if we were to use these spans again we'd want to make sure
|
||||||
|
# this value was reset correctly.
|
||||||
|
span["render_slot"] = 0
|
||||||
token_markup["entities"] = entities
|
token_markup["entities"] = entities
|
||||||
per_token_info.append(token_markup)
|
per_token_info.append(token_markup)
|
||||||
|
|
||||||
markup = self._render_markup(per_token_info)
|
markup = self._render_markup(per_token_info)
|
||||||
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
||||||
if title:
|
if title:
|
||||||
|
@ -160,8 +190,12 @@ class SpanRenderer:
|
||||||
"""Render the markup from per-token information"""
|
"""Render the markup from per-token information"""
|
||||||
markup = ""
|
markup = ""
|
||||||
for token in per_token_info:
|
for token in per_token_info:
|
||||||
entities = sorted(token["entities"], key=lambda d: d["label"])
|
entities = sorted(token["entities"], key=lambda d: d["render_slot"])
|
||||||
if entities:
|
# Whitespace tokens disrupt the vertical space (no line height) so that the
|
||||||
|
# span indicators get misaligned. We don't render them as individual
|
||||||
|
# tokens anyway, so we'll just not display a span indicator either.
|
||||||
|
is_whitespace = token["text"].strip() == ""
|
||||||
|
if entities and not is_whitespace:
|
||||||
slices = self._get_span_slices(token["entities"])
|
slices = self._get_span_slices(token["entities"])
|
||||||
starts = self._get_span_starts(token["entities"])
|
starts = self._get_span_starts(token["entities"])
|
||||||
total_height = (
|
total_height = (
|
||||||
|
@ -182,10 +216,18 @@ class SpanRenderer:
|
||||||
def _get_span_slices(self, entities: List[Dict]) -> str:
|
def _get_span_slices(self, entities: List[Dict]) -> str:
|
||||||
"""Get the rendered markup of all Span slices"""
|
"""Get the rendered markup of all Span slices"""
|
||||||
span_slices = []
|
span_slices = []
|
||||||
for entity, step in zip(entities, itertools.count(step=self.offset_step)):
|
for entity in entities:
|
||||||
|
# rather than iterate over multiples of offset_step, we use entity['render_slot']
|
||||||
|
# to determine the vertical position, since that tells where
|
||||||
|
# the span starts vertically so we can extend it horizontally,
|
||||||
|
# past other spans that might have already ended
|
||||||
color = self.colors.get(entity["label"].upper(), self.default_color)
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||||
|
top_offset = self.top_offset + (
|
||||||
|
self.offset_step * (entity["render_slot"] - 1)
|
||||||
|
)
|
||||||
span_slice = self.span_slice_template.format(
|
span_slice = self.span_slice_template.format(
|
||||||
bg=color, top_offset=self.top_offset + step
|
bg=color,
|
||||||
|
top_offset=top_offset,
|
||||||
)
|
)
|
||||||
span_slices.append(span_slice)
|
span_slices.append(span_slice)
|
||||||
return "".join(span_slices)
|
return "".join(span_slices)
|
||||||
|
@ -193,12 +235,15 @@ class SpanRenderer:
|
||||||
def _get_span_starts(self, entities: List[Dict]) -> str:
|
def _get_span_starts(self, entities: List[Dict]) -> str:
|
||||||
"""Get the rendered markup of all Span start tokens"""
|
"""Get the rendered markup of all Span start tokens"""
|
||||||
span_starts = []
|
span_starts = []
|
||||||
for entity, step in zip(entities, itertools.count(step=self.offset_step)):
|
for entity in entities:
|
||||||
color = self.colors.get(entity["label"].upper(), self.default_color)
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||||
|
top_offset = self.top_offset + (
|
||||||
|
self.offset_step * (entity["render_slot"] - 1)
|
||||||
|
)
|
||||||
span_start = (
|
span_start = (
|
||||||
self.span_start_template.format(
|
self.span_start_template.format(
|
||||||
bg=color,
|
bg=color,
|
||||||
top_offset=self.top_offset + step,
|
top_offset=top_offset,
|
||||||
label=entity["label"],
|
label=entity["label"],
|
||||||
kb_link=entity["kb_link"],
|
kb_link=entity["kb_link"],
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user