mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
0566c3a166
These changes add a missing call to `escape_html` in the displaCy span renderer. Previously span-annotated tokens would be inserted into the page markup without being escaped, resulting in potentially incorrect rendering. When I encountered this issue, it resulted in some docs and span underlines being superimposed on top of properly rendered docs and span underlines near the beginning of the visualization (due to an unescaped `<span>` tag).
591 lines
23 KiB
Python
591 lines
23 KiB
Python
import uuid
|
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
|
|
from ..errors import Errors
|
|
from ..util import escape_html, minify_html, registry
|
|
from .templates import (
|
|
TPL_DEP_ARCS,
|
|
TPL_DEP_SVG,
|
|
TPL_DEP_WORDS,
|
|
TPL_DEP_WORDS_LEMMA,
|
|
TPL_ENT,
|
|
TPL_ENT_RTL,
|
|
TPL_ENTS,
|
|
TPL_FIGURE,
|
|
TPL_KB_LINK,
|
|
TPL_PAGE,
|
|
TPL_SPAN,
|
|
TPL_SPAN_RTL,
|
|
TPL_SPAN_SLICE,
|
|
TPL_SPAN_SLICE_RTL,
|
|
TPL_SPAN_START,
|
|
TPL_SPAN_START_RTL,
|
|
TPL_SPANS,
|
|
TPL_TITLE,
|
|
)
|
|
|
|
DEFAULT_LANG = "en"
|
|
DEFAULT_DIR = "ltr"
|
|
DEFAULT_ENTITY_COLOR = "#ddd"
|
|
DEFAULT_LABEL_COLORS = {
|
|
"ORG": "#7aecec",
|
|
"PRODUCT": "#bfeeb7",
|
|
"GPE": "#feca74",
|
|
"LOC": "#ff9561",
|
|
"PERSON": "#aa9cfc",
|
|
"NORP": "#c887fb",
|
|
"FAC": "#9cc9cc",
|
|
"EVENT": "#ffeb80",
|
|
"LAW": "#ff8197",
|
|
"LANGUAGE": "#ff8197",
|
|
"WORK_OF_ART": "#f0d0ff",
|
|
"DATE": "#bfe1d9",
|
|
"TIME": "#bfe1d9",
|
|
"MONEY": "#e4e7d2",
|
|
"QUANTITY": "#e4e7d2",
|
|
"ORDINAL": "#e4e7d2",
|
|
"CARDINAL": "#e4e7d2",
|
|
"PERCENT": "#e4e7d2",
|
|
}
|
|
|
|
|
|
class SpanRenderer:
|
|
"""Render Spans as SVGs."""
|
|
|
|
style = "span"
|
|
|
|
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
|
"""Initialise span renderer
|
|
|
|
options (dict): Visualiser-specific options (colors, spans)
|
|
"""
|
|
# Set up the colors and overall look
|
|
colors = dict(DEFAULT_LABEL_COLORS)
|
|
user_colors = registry.displacy_colors.get_all()
|
|
for user_color in user_colors.values():
|
|
if callable(user_color):
|
|
# Since this comes from the function registry, we want to make
|
|
# sure we support functions that *return* a dict of colors
|
|
user_color = user_color()
|
|
if not isinstance(user_color, dict):
|
|
raise ValueError(Errors.E925.format(obj=type(user_color)))
|
|
colors.update(user_color)
|
|
colors.update(options.get("colors", {}))
|
|
self.default_color = DEFAULT_ENTITY_COLOR
|
|
self.colors = {label.upper(): color for label, color in colors.items()}
|
|
|
|
# Set up how the text and labels will be rendered
|
|
self.direction = DEFAULT_DIR
|
|
self.lang = DEFAULT_LANG
|
|
# These values are in px
|
|
self.top_offset = options.get("top_offset", 40)
|
|
# This is how far under the top offset the span labels appear
|
|
self.span_label_offset = options.get("span_label_offset", 20)
|
|
self.offset_step = options.get("top_offset_step", 17)
|
|
|
|
# Set up which templates will be used
|
|
template = options.get("template")
|
|
if template:
|
|
self.span_template = template["span"]
|
|
self.span_slice_template = template["slice"]
|
|
self.span_start_template = template["start"]
|
|
else:
|
|
if self.direction == "rtl":
|
|
self.span_template = TPL_SPAN_RTL
|
|
self.span_slice_template = TPL_SPAN_SLICE_RTL
|
|
self.span_start_template = TPL_SPAN_START_RTL
|
|
else:
|
|
self.span_template = TPL_SPAN
|
|
self.span_slice_template = TPL_SPAN_SLICE
|
|
self.span_start_template = TPL_SPAN_START
|
|
|
|
def render(
|
|
self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
|
|
) -> str:
|
|
"""Render complete markup.
|
|
|
|
parsed (list): Dependency parses to render.
|
|
page (bool): Render parses wrapped as full HTML page.
|
|
minify (bool): Minify HTML markup.
|
|
RETURNS (str): Rendered SVG or HTML markup.
|
|
"""
|
|
rendered = []
|
|
for i, p in enumerate(parsed):
|
|
if i == 0:
|
|
settings = p.get("settings", {})
|
|
self.direction = settings.get("direction", DEFAULT_DIR)
|
|
self.lang = settings.get("lang", DEFAULT_LANG)
|
|
rendered.append(self.render_spans(p["tokens"], p["spans"], p.get("title")))
|
|
|
|
if page:
|
|
docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
|
|
markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
|
|
else:
|
|
markup = "".join(rendered)
|
|
if minify:
|
|
return minify_html(markup)
|
|
return markup
|
|
|
|
def render_spans(
|
|
self,
|
|
tokens: List[str],
|
|
spans: List[Dict[str, Any]],
|
|
title: Optional[str],
|
|
) -> str:
|
|
"""Render span types in text.
|
|
|
|
Spans are rendered per-token, this means that for each token, we check if it's part
|
|
of a span slice (a member of a span type) or a span start (the starting token of a
|
|
given span type).
|
|
|
|
tokens (list): Individual tokens in the text
|
|
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
|
|
title (str / None): Document title set in Doc.user_data['title'].
|
|
"""
|
|
per_token_info = []
|
|
# we must sort so that we can correctly describe when spans need to "stack"
|
|
# which is determined by their start token, then span length (longer spans on top),
|
|
# then break any remaining ties with the span label
|
|
spans = sorted(
|
|
spans,
|
|
key=lambda s: (
|
|
s["start_token"],
|
|
-(s["end_token"] - s["start_token"]),
|
|
s["label"],
|
|
),
|
|
)
|
|
for s in spans:
|
|
# this is the vertical 'slot' that the span will be rendered in
|
|
# vertical_position = span_label_offset + (offset_step * (slot - 1))
|
|
s["render_slot"] = 0
|
|
for idx, token in enumerate(tokens):
|
|
# Identify if a token belongs to a Span (and which) and if it's a
|
|
# start token of said Span. We'll use this for the final HTML render
|
|
token_markup: Dict[str, Any] = {}
|
|
token_markup["text"] = token
|
|
concurrent_spans = 0
|
|
entities = []
|
|
for span in spans:
|
|
ent = {}
|
|
if span["start_token"] <= idx < span["end_token"]:
|
|
concurrent_spans += 1
|
|
span_start = idx == span["start_token"]
|
|
ent["label"] = span["label"]
|
|
ent["is_start"] = span_start
|
|
if span_start:
|
|
# When the span starts, we need to know how many other
|
|
# spans are on the 'span stack' and will be rendered.
|
|
# This value becomes the vertical render slot for this entire span
|
|
span["render_slot"] = concurrent_spans
|
|
ent["render_slot"] = span["render_slot"]
|
|
kb_id = span.get("kb_id", "")
|
|
kb_url = span.get("kb_url", "#")
|
|
ent["kb_link"] = (
|
|
TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
|
)
|
|
entities.append(ent)
|
|
else:
|
|
# We don't specifically need to do this since we loop
|
|
# over tokens and spans sorted by their start_token,
|
|
# so we'll never use a span again after the last token it appears in,
|
|
# but if we were to use these spans again we'd want to make sure
|
|
# this value was reset correctly.
|
|
span["render_slot"] = 0
|
|
token_markup["entities"] = entities
|
|
per_token_info.append(token_markup)
|
|
markup = self._render_markup(per_token_info)
|
|
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
|
if title:
|
|
markup = TPL_TITLE.format(title=title) + markup
|
|
return markup
|
|
|
|
def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
|
|
"""Render the markup from per-token information"""
|
|
markup = ""
|
|
for token in per_token_info:
|
|
entities = sorted(token["entities"], key=lambda d: d["render_slot"])
|
|
# Whitespace tokens disrupt the vertical space (no line height) so that the
|
|
# span indicators get misaligned. We don't render them as individual
|
|
# tokens anyway, so we'll just not display a span indicator either.
|
|
is_whitespace = token["text"].strip() == ""
|
|
if entities and not is_whitespace:
|
|
slices = self._get_span_slices(token["entities"])
|
|
starts = self._get_span_starts(token["entities"])
|
|
total_height = (
|
|
self.top_offset
|
|
+ self.span_label_offset
|
|
+ (self.offset_step * (len(entities) - 1))
|
|
)
|
|
markup += self.span_template.format(
|
|
text=escape_html(token["text"]),
|
|
span_slices=slices,
|
|
span_starts=starts,
|
|
total_height=total_height,
|
|
)
|
|
else:
|
|
markup += escape_html(token["text"] + " ")
|
|
return markup
|
|
|
|
def _get_span_slices(self, entities: List[Dict]) -> str:
|
|
"""Get the rendered markup of all Span slices"""
|
|
span_slices = []
|
|
for entity in entities:
|
|
# rather than iterate over multiples of offset_step, we use entity['render_slot']
|
|
# to determine the vertical position, since that tells where
|
|
# the span starts vertically so we can extend it horizontally,
|
|
# past other spans that might have already ended
|
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
|
top_offset = self.top_offset + (
|
|
self.offset_step * (entity["render_slot"] - 1)
|
|
)
|
|
span_slice = self.span_slice_template.format(
|
|
bg=color,
|
|
top_offset=top_offset,
|
|
)
|
|
span_slices.append(span_slice)
|
|
return "".join(span_slices)
|
|
|
|
def _get_span_starts(self, entities: List[Dict]) -> str:
|
|
"""Get the rendered markup of all Span start tokens"""
|
|
span_starts = []
|
|
for entity in entities:
|
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
|
top_offset = self.top_offset + (
|
|
self.offset_step * (entity["render_slot"] - 1)
|
|
)
|
|
span_start = (
|
|
self.span_start_template.format(
|
|
bg=color,
|
|
top_offset=top_offset,
|
|
label=entity["label"],
|
|
kb_link=entity["kb_link"],
|
|
)
|
|
if entity["is_start"]
|
|
else ""
|
|
)
|
|
span_starts.append(span_start)
|
|
return "".join(span_starts)
|
|
|
|
|
|
class DependencyRenderer:
|
|
"""Render dependency parses as SVGs."""
|
|
|
|
style = "dep"
|
|
|
|
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
|
"""Initialise dependency renderer.
|
|
|
|
options (dict): Visualiser-specific options (compact, word_spacing,
|
|
arrow_spacing, arrow_width, arrow_stroke, distance, offset_x,
|
|
color, bg, font)
|
|
"""
|
|
self.compact = options.get("compact", False)
|
|
self.word_spacing = options.get("word_spacing", 45)
|
|
self.arrow_spacing = options.get("arrow_spacing", 12 if self.compact else 20)
|
|
self.arrow_width = options.get("arrow_width", 6 if self.compact else 10)
|
|
self.arrow_stroke = options.get("arrow_stroke", 2)
|
|
self.distance = options.get("distance", 150 if self.compact else 175)
|
|
self.offset_x = options.get("offset_x", 50)
|
|
self.color = options.get("color", "#000000")
|
|
self.bg = options.get("bg", "#ffffff")
|
|
self.font = options.get("font", "Arial")
|
|
self.direction = DEFAULT_DIR
|
|
self.lang = DEFAULT_LANG
|
|
|
|
def render(
|
|
self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
|
|
) -> str:
|
|
"""Render complete markup.
|
|
|
|
parsed (list): Dependency parses to render.
|
|
page (bool): Render parses wrapped as full HTML page.
|
|
minify (bool): Minify HTML markup.
|
|
RETURNS (str): Rendered SVG or HTML markup.
|
|
"""
|
|
# Create a random ID prefix to make sure parses don't receive the
|
|
# same ID, even if they're identical
|
|
id_prefix = uuid.uuid4().hex
|
|
rendered = []
|
|
for i, p in enumerate(parsed):
|
|
if i == 0:
|
|
settings = p.get("settings", {})
|
|
self.direction = settings.get("direction", DEFAULT_DIR)
|
|
self.lang = settings.get("lang", DEFAULT_LANG)
|
|
render_id = f"{id_prefix}-{i}"
|
|
svg = self.render_svg(render_id, p["words"], p["arcs"])
|
|
rendered.append(svg)
|
|
if page:
|
|
content = "".join([TPL_FIGURE.format(content=svg) for svg in rendered])
|
|
markup = TPL_PAGE.format(
|
|
content=content, lang=self.lang, dir=self.direction
|
|
)
|
|
else:
|
|
markup = "".join(rendered)
|
|
if minify:
|
|
return minify_html(markup)
|
|
return markup
|
|
|
|
def render_svg(
|
|
self,
|
|
render_id: Union[int, str],
|
|
words: List[Dict[str, Any]],
|
|
arcs: List[Dict[str, Any]],
|
|
) -> str:
|
|
"""Render SVG.
|
|
|
|
render_id (Union[int, str]): Unique ID, typically index of document.
|
|
words (list): Individual words and their tags.
|
|
arcs (list): Individual arcs and their start, end, direction and label.
|
|
RETURNS (str): Rendered SVG markup.
|
|
"""
|
|
self.levels = self.get_levels(arcs)
|
|
self.highest_level = max(self.levels.values(), default=0)
|
|
self.offset_y = self.distance / 2 * self.highest_level + self.arrow_stroke
|
|
self.width = self.offset_x + len(words) * self.distance
|
|
self.height = self.offset_y + 3 * self.word_spacing
|
|
self.id = render_id
|
|
words_svg = [
|
|
self.render_word(w["text"], w["tag"], w.get("lemma", None), i)
|
|
for i, w in enumerate(words)
|
|
]
|
|
arcs_svg = [
|
|
self.render_arrow(a["label"], a["start"], a["end"], a["dir"], i)
|
|
for i, a in enumerate(arcs)
|
|
]
|
|
content = "".join(words_svg) + "".join(arcs_svg)
|
|
return TPL_DEP_SVG.format(
|
|
id=self.id,
|
|
width=self.width,
|
|
height=self.height,
|
|
color=self.color,
|
|
bg=self.bg,
|
|
font=self.font,
|
|
content=content,
|
|
dir=self.direction,
|
|
lang=self.lang,
|
|
)
|
|
|
|
def render_word(self, text: str, tag: str, lemma: str, i: int) -> str:
|
|
"""Render individual word.
|
|
|
|
text (str): Word text.
|
|
tag (str): Part-of-speech tag.
|
|
i (int): Unique ID, typically word index.
|
|
RETURNS (str): Rendered SVG markup.
|
|
"""
|
|
y = self.offset_y + self.word_spacing
|
|
x = self.offset_x + i * self.distance
|
|
if self.direction == "rtl":
|
|
x = self.width - x
|
|
html_text = escape_html(text)
|
|
if lemma is not None:
|
|
return TPL_DEP_WORDS_LEMMA.format(
|
|
text=html_text, tag=tag, lemma=lemma, x=x, y=y
|
|
)
|
|
return TPL_DEP_WORDS.format(text=html_text, tag=tag, x=x, y=y)
|
|
|
|
def render_arrow(
|
|
self, label: str, start: int, end: int, direction: str, i: int
|
|
) -> str:
|
|
"""Render individual arrow.
|
|
|
|
label (str): Dependency label.
|
|
start (int): Index of start word.
|
|
end (int): Index of end word.
|
|
direction (str): Arrow direction, 'left' or 'right'.
|
|
i (int): Unique ID, typically arrow index.
|
|
RETURNS (str): Rendered SVG markup.
|
|
"""
|
|
if start < 0 or end < 0:
|
|
error_args = dict(start=start, end=end, label=label, dir=direction)
|
|
raise ValueError(Errors.E157.format(**error_args))
|
|
level = self.levels[(start, end, label)]
|
|
x_start = self.offset_x + start * self.distance + self.arrow_spacing
|
|
if self.direction == "rtl":
|
|
x_start = self.width - x_start
|
|
y = self.offset_y
|
|
x_end = (
|
|
self.offset_x
|
|
+ (end - start) * self.distance
|
|
+ start * self.distance
|
|
- self.arrow_spacing * (self.highest_level - level) / 4
|
|
)
|
|
if self.direction == "rtl":
|
|
x_end = self.width - x_end
|
|
y_curve = self.offset_y - level * self.distance / 2
|
|
if self.compact:
|
|
y_curve = self.offset_y - level * self.distance / 6
|
|
if y_curve == 0 and max(self.levels.values(), default=0) > 5:
|
|
y_curve = -self.distance
|
|
arrowhead = self.get_arrowhead(direction, x_start, y, x_end)
|
|
arc = self.get_arc(x_start, y, y_curve, x_end)
|
|
label_side = "right" if self.direction == "rtl" else "left"
|
|
return TPL_DEP_ARCS.format(
|
|
id=self.id,
|
|
i=i,
|
|
stroke=self.arrow_stroke,
|
|
head=arrowhead,
|
|
label=label,
|
|
label_side=label_side,
|
|
arc=arc,
|
|
)
|
|
|
|
def get_arc(self, x_start: int, y: int, y_curve: int, x_end: int) -> str:
|
|
"""Render individual arc.
|
|
|
|
x_start (int): X-coordinate of arrow start point.
|
|
y (int): Y-coordinate of arrow start and end point.
|
|
y_curve (int): Y-corrdinate of Cubic Bézier y_curve point.
|
|
x_end (int): X-coordinate of arrow end point.
|
|
RETURNS (str): Definition of the arc path ('d' attribute).
|
|
"""
|
|
template = "M{x},{y} C{x},{c} {e},{c} {e},{y}"
|
|
if self.compact:
|
|
template = "M{x},{y} {x},{c} {e},{c} {e},{y}"
|
|
return template.format(x=x_start, y=y, c=y_curve, e=x_end)
|
|
|
|
def get_arrowhead(self, direction: str, x: int, y: int, end: int) -> str:
|
|
"""Render individual arrow head.
|
|
|
|
direction (str): Arrow direction, 'left' or 'right'.
|
|
x (int): X-coordinate of arrow start point.
|
|
y (int): Y-coordinate of arrow start and end point.
|
|
end (int): X-coordinate of arrow end point.
|
|
RETURNS (str): Definition of the arrow head path ('d' attribute).
|
|
"""
|
|
if direction == "left":
|
|
p1, p2, p3 = (x, x - self.arrow_width + 2, x + self.arrow_width - 2)
|
|
else:
|
|
p1, p2, p3 = (end, end + self.arrow_width - 2, end - self.arrow_width + 2)
|
|
return f"M{p1},{y + 2} L{p2},{y - self.arrow_width} {p3},{y - self.arrow_width}"
|
|
|
|
def get_levels(self, arcs: List[Dict[str, Any]]) -> Dict[Tuple[int, int, str], int]:
|
|
"""Calculate available arc height "levels".
|
|
Used to calculate arrow heights dynamically and without wasting space.
|
|
|
|
args (list): Individual arcs and their start, end, direction and label.
|
|
RETURNS (dict): Arc levels keyed by (start, end, label).
|
|
"""
|
|
arcs = [dict(t) for t in {tuple(sorted(arc.items())) for arc in arcs}]
|
|
length = max([arc["end"] for arc in arcs], default=0)
|
|
max_level = [0] * length
|
|
levels = {}
|
|
for arc in sorted(arcs, key=lambda arc: arc["end"] - arc["start"]):
|
|
level = max(max_level[arc["start"] : arc["end"]]) + 1
|
|
for i in range(arc["start"], arc["end"]):
|
|
max_level[i] = level
|
|
levels[(arc["start"], arc["end"], arc["label"])] = level
|
|
return levels
|
|
|
|
|
|
class EntityRenderer:
|
|
"""Render named entities as HTML."""
|
|
|
|
style = "ent"
|
|
|
|
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
|
"""Initialise entity renderer.
|
|
|
|
options (dict): Visualiser-specific options (colors, ents)
|
|
"""
|
|
colors = dict(DEFAULT_LABEL_COLORS)
|
|
user_colors = registry.displacy_colors.get_all()
|
|
for user_color in user_colors.values():
|
|
if callable(user_color):
|
|
# Since this comes from the function registry, we want to make
|
|
# sure we support functions that *return* a dict of colors
|
|
user_color = user_color()
|
|
if not isinstance(user_color, dict):
|
|
raise ValueError(Errors.E925.format(obj=type(user_color)))
|
|
colors.update(user_color)
|
|
colors.update(options.get("colors", {}))
|
|
self.default_color = DEFAULT_ENTITY_COLOR
|
|
self.colors = {label.upper(): color for label, color in colors.items()}
|
|
self.ents = options.get("ents", None)
|
|
if self.ents is not None:
|
|
self.ents = [ent.upper() for ent in self.ents]
|
|
self.direction = DEFAULT_DIR
|
|
self.lang = DEFAULT_LANG
|
|
template = options.get("template")
|
|
if template:
|
|
self.ent_template = template
|
|
else:
|
|
if self.direction == "rtl":
|
|
self.ent_template = TPL_ENT_RTL
|
|
else:
|
|
self.ent_template = TPL_ENT
|
|
|
|
def render(
|
|
self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
|
|
) -> str:
|
|
"""Render complete markup.
|
|
|
|
parsed (list): Dependency parses to render.
|
|
page (bool): Render parses wrapped as full HTML page.
|
|
minify (bool): Minify HTML markup.
|
|
RETURNS (str): Rendered SVG or HTML markup.
|
|
"""
|
|
rendered = []
|
|
for i, p in enumerate(parsed):
|
|
if i == 0:
|
|
settings = p.get("settings", {})
|
|
self.direction = settings.get("direction", DEFAULT_DIR)
|
|
self.lang = settings.get("lang", DEFAULT_LANG)
|
|
rendered.append(self.render_ents(p["text"], p["ents"], p.get("title")))
|
|
if page:
|
|
docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
|
|
markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
|
|
else:
|
|
markup = "".join(rendered)
|
|
if minify:
|
|
return minify_html(markup)
|
|
return markup
|
|
|
|
def render_ents(
|
|
self, text: str, spans: List[Dict[str, Any]], title: Optional[str]
|
|
) -> str:
|
|
"""Render entities in text.
|
|
|
|
text (str): Original text.
|
|
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
|
|
title (str / None): Document title set in Doc.user_data['title'].
|
|
"""
|
|
markup = ""
|
|
offset = 0
|
|
for span in spans:
|
|
label = span["label"]
|
|
start = span["start"]
|
|
end = span["end"]
|
|
kb_id = span.get("kb_id", "")
|
|
kb_url = span.get("kb_url", "#")
|
|
kb_link = TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
|
additional_params = span.get("params", {})
|
|
entity = escape_html(text[start:end])
|
|
fragments = text[offset:start].split("\n")
|
|
for i, fragment in enumerate(fragments):
|
|
markup += escape_html(fragment)
|
|
if len(fragments) > 1 and i != len(fragments) - 1:
|
|
markup += "</br>"
|
|
if self.ents is None or label.upper() in self.ents:
|
|
color = self.colors.get(label.upper(), self.default_color)
|
|
ent_settings = {
|
|
"label": label,
|
|
"text": entity,
|
|
"bg": color,
|
|
"kb_link": kb_link,
|
|
}
|
|
ent_settings.update(additional_params)
|
|
markup += self.ent_template.format(**ent_settings)
|
|
else:
|
|
markup += entity
|
|
offset = end
|
|
fragments = text[offset:].split("\n")
|
|
for i, fragment in enumerate(fragments):
|
|
markup += escape_html(fragment)
|
|
if len(fragments) > 1 and i != len(fragments) - 1:
|
|
markup += "</br>"
|
|
markup = TPL_ENTS.format(content=markup, dir=self.direction)
|
|
if title:
|
|
markup = TPL_TITLE.format(title=title) + markup
|
|
return markup
|