mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Add displacy support for overlapping Spans (#10332)
* Fix docstring for EntityRenderer * Add warning in displacy if doc.spans are empty * Implement parse_spans converter One notable change here is that the default spans_key is sc, and it's set by the user through the options. * Implement SpanRenderer Here, I implemented a SpanRenderer that looks similar to the EntityRenderer except for some templates. The spans_key, by default, is set to sc, but can be configured in the options (see parse_spans). The way I rendered these spans is per-token, i.e., I first check if each token (1) belongs to a given span type and (2) a starting token of a given span type. Once I have this information, I render them into the markup. * Fix mypy issues on typing * Add tests for displacy spans support * Update colors from RGB to hex Co-authored-by: Ines Montani <ines@ines.io> * Remove unnecessary CSS properties * Add documentation for website * Remove unnecesasry scripts * Update wording on the documentation Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Put typing dependency on top of file * Put back z-index so that spans overlap properly * Make warning more explicit for spans_key Co-authored-by: Ines Montani <ines@ines.io> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
e021dc6279
commit
a79cd3542b
|
@ -4,10 +4,10 @@ spaCy's built in visualization suite for dependencies and named entities.
|
||||||
DOCS: https://spacy.io/api/top-level#displacy
|
DOCS: https://spacy.io/api/top-level#displacy
|
||||||
USAGE: https://spacy.io/usage/visualizers
|
USAGE: https://spacy.io/usage/visualizers
|
||||||
"""
|
"""
|
||||||
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
from typing import List, Union, Iterable, Optional, Dict, Any, Callable
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from .render import DependencyRenderer, EntityRenderer
|
from .render import DependencyRenderer, EntityRenderer, SpanRenderer
|
||||||
from ..tokens import Doc, Span
|
from ..tokens import Doc, Span
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
from ..util import is_in_jupyter
|
from ..util import is_in_jupyter
|
||||||
|
@ -44,6 +44,7 @@ def render(
|
||||||
factories = {
|
factories = {
|
||||||
"dep": (DependencyRenderer, parse_deps),
|
"dep": (DependencyRenderer, parse_deps),
|
||||||
"ent": (EntityRenderer, parse_ents),
|
"ent": (EntityRenderer, parse_ents),
|
||||||
|
"span": (SpanRenderer, parse_spans),
|
||||||
}
|
}
|
||||||
if style not in factories:
|
if style not in factories:
|
||||||
raise ValueError(Errors.E087.format(style=style))
|
raise ValueError(Errors.E087.format(style=style))
|
||||||
|
@ -203,6 +204,42 @@ def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||||
return {"text": doc.text, "ents": ents, "title": title, "settings": settings}
|
return {"text": doc.text, "ents": ents, "title": title, "settings": settings}
|
||||||
|
|
||||||
|
|
||||||
|
def parse_spans(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||||
|
"""Generate spans in [{start: i, end: i, label: 'label'}] format.
|
||||||
|
|
||||||
|
doc (Doc): Document to parse.
|
||||||
|
options (Dict[str, any]): Span-specific visualisation options.
|
||||||
|
RETURNS (dict): Generated span types keyed by text (original text) and spans.
|
||||||
|
"""
|
||||||
|
kb_url_template = options.get("kb_url_template", None)
|
||||||
|
spans_key = options.get("spans_key", "sc")
|
||||||
|
spans = [
|
||||||
|
{
|
||||||
|
"start": span.start_char,
|
||||||
|
"end": span.end_char,
|
||||||
|
"start_token": span.start,
|
||||||
|
"end_token": span.end,
|
||||||
|
"label": span.label_,
|
||||||
|
"kb_id": span.kb_id_ if span.kb_id_ else "",
|
||||||
|
"kb_url": kb_url_template.format(span.kb_id_) if kb_url_template else "#",
|
||||||
|
}
|
||||||
|
for span in doc.spans[spans_key]
|
||||||
|
]
|
||||||
|
tokens = [token.text for token in doc]
|
||||||
|
|
||||||
|
if not spans:
|
||||||
|
warnings.warn(Warnings.W117.format(spans_key=spans_key))
|
||||||
|
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
|
||||||
|
settings = get_doc_settings(doc)
|
||||||
|
return {
|
||||||
|
"text": doc.text,
|
||||||
|
"spans": spans,
|
||||||
|
"title": title,
|
||||||
|
"settings": settings,
|
||||||
|
"tokens": tokens,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def set_render_wrapper(func: Callable[[str], str]) -> None:
|
def set_render_wrapper(func: Callable[[str], str]) -> None:
|
||||||
"""Set an optional wrapper function that is called around the generated
|
"""Set an optional wrapper function that is called around the generated
|
||||||
HTML markup on displacy.render. This can be used to allow integration into
|
HTML markup on displacy.render. This can be used to allow integration into
|
||||||
|
|
|
@ -1,12 +1,15 @@
|
||||||
from typing import Dict, Any, List, Optional, Union
|
from typing import Any, Dict, List, Optional, Union
|
||||||
import uuid
|
import uuid
|
||||||
|
import itertools
|
||||||
|
|
||||||
from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_WORDS_LEMMA, TPL_DEP_ARCS
|
|
||||||
from .templates import TPL_ENT, TPL_ENT_RTL, TPL_FIGURE, TPL_TITLE, TPL_PAGE
|
|
||||||
from .templates import TPL_ENTS, TPL_KB_LINK
|
|
||||||
from ..util import minify_html, escape_html, registry
|
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
from ..util import escape_html, minify_html, registry
|
||||||
|
from .templates import TPL_DEP_ARCS, TPL_DEP_SVG, TPL_DEP_WORDS
|
||||||
|
from .templates import TPL_DEP_WORDS_LEMMA, TPL_ENT, TPL_ENT_RTL, TPL_ENTS
|
||||||
|
from .templates import TPL_FIGURE, TPL_KB_LINK, TPL_PAGE, TPL_SPAN
|
||||||
|
from .templates import TPL_SPAN_RTL, TPL_SPAN_SLICE, TPL_SPAN_SLICE_RTL
|
||||||
|
from .templates import TPL_SPAN_START, TPL_SPAN_START_RTL, TPL_SPANS
|
||||||
|
from .templates import TPL_TITLE
|
||||||
|
|
||||||
DEFAULT_LANG = "en"
|
DEFAULT_LANG = "en"
|
||||||
DEFAULT_DIR = "ltr"
|
DEFAULT_DIR = "ltr"
|
||||||
|
@ -33,6 +36,168 @@ DEFAULT_LABEL_COLORS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SpanRenderer:
|
||||||
|
"""Render Spans as SVGs."""
|
||||||
|
|
||||||
|
style = "span"
|
||||||
|
|
||||||
|
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
||||||
|
"""Initialise span renderer
|
||||||
|
|
||||||
|
options (dict): Visualiser-specific options (colors, spans)
|
||||||
|
"""
|
||||||
|
# Set up the colors and overall look
|
||||||
|
colors = dict(DEFAULT_LABEL_COLORS)
|
||||||
|
user_colors = registry.displacy_colors.get_all()
|
||||||
|
for user_color in user_colors.values():
|
||||||
|
if callable(user_color):
|
||||||
|
# Since this comes from the function registry, we want to make
|
||||||
|
# sure we support functions that *return* a dict of colors
|
||||||
|
user_color = user_color()
|
||||||
|
if not isinstance(user_color, dict):
|
||||||
|
raise ValueError(Errors.E925.format(obj=type(user_color)))
|
||||||
|
colors.update(user_color)
|
||||||
|
colors.update(options.get("colors", {}))
|
||||||
|
self.default_color = DEFAULT_ENTITY_COLOR
|
||||||
|
self.colors = {label.upper(): color for label, color in colors.items()}
|
||||||
|
|
||||||
|
# Set up how the text and labels will be rendered
|
||||||
|
self.direction = DEFAULT_DIR
|
||||||
|
self.lang = DEFAULT_LANG
|
||||||
|
self.top_offset = options.get("top_offset", 40)
|
||||||
|
self.top_offset_step = options.get("top_offset_step", 17)
|
||||||
|
|
||||||
|
# Set up which templates will be used
|
||||||
|
template = options.get("template")
|
||||||
|
if template:
|
||||||
|
self.span_template = template["span"]
|
||||||
|
self.span_slice_template = template["slice"]
|
||||||
|
self.span_start_template = template["start"]
|
||||||
|
else:
|
||||||
|
if self.direction == "rtl":
|
||||||
|
self.span_template = TPL_SPAN_RTL
|
||||||
|
self.span_slice_template = TPL_SPAN_SLICE_RTL
|
||||||
|
self.span_start_template = TPL_SPAN_START_RTL
|
||||||
|
else:
|
||||||
|
self.span_template = TPL_SPAN
|
||||||
|
self.span_slice_template = TPL_SPAN_SLICE
|
||||||
|
self.span_start_template = TPL_SPAN_START
|
||||||
|
|
||||||
|
def render(
|
||||||
|
self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
|
||||||
|
) -> str:
|
||||||
|
"""Render complete markup.
|
||||||
|
|
||||||
|
parsed (list): Dependency parses to render.
|
||||||
|
page (bool): Render parses wrapped as full HTML page.
|
||||||
|
minify (bool): Minify HTML markup.
|
||||||
|
RETURNS (str): Rendered HTML markup.
|
||||||
|
"""
|
||||||
|
rendered = []
|
||||||
|
for i, p in enumerate(parsed):
|
||||||
|
if i == 0:
|
||||||
|
settings = p.get("settings", {})
|
||||||
|
self.direction = settings.get("direction", DEFAULT_DIR)
|
||||||
|
self.lang = settings.get("lang", DEFAULT_LANG)
|
||||||
|
rendered.append(self.render_spans(p["tokens"], p["spans"], p.get("title")))
|
||||||
|
|
||||||
|
if page:
|
||||||
|
docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
|
||||||
|
markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
|
||||||
|
else:
|
||||||
|
markup = "".join(rendered)
|
||||||
|
if minify:
|
||||||
|
return minify_html(markup)
|
||||||
|
return markup
|
||||||
|
|
||||||
|
def render_spans(
|
||||||
|
self,
|
||||||
|
tokens: List[str],
|
||||||
|
spans: List[Dict[str, Any]],
|
||||||
|
title: Optional[str],
|
||||||
|
) -> str:
|
||||||
|
"""Render span types in text.
|
||||||
|
|
||||||
|
Spans are rendered per-token, this means that for each token, we check if it's part
|
||||||
|
of a span slice (a member of a span type) or a span start (the starting token of a
|
||||||
|
given span type).
|
||||||
|
|
||||||
|
tokens (list): Individual tokens in the text
|
||||||
|
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
|
||||||
|
title (str / None): Document title set in Doc.user_data['title'].
|
||||||
|
"""
|
||||||
|
per_token_info = []
|
||||||
|
for idx, token in enumerate(tokens):
|
||||||
|
# Identify if a token belongs to a Span (and which) and if it's a
|
||||||
|
# start token of said Span. We'll use this for the final HTML render
|
||||||
|
token_markup: Dict[str, Any] = {}
|
||||||
|
token_markup["text"] = token
|
||||||
|
entities = []
|
||||||
|
for span in spans:
|
||||||
|
ent = {}
|
||||||
|
if span["start_token"] <= idx < span["end_token"]:
|
||||||
|
ent["label"] = span["label"]
|
||||||
|
ent["is_start"] = True if idx == span["start_token"] else False
|
||||||
|
kb_id = span.get("kb_id", "")
|
||||||
|
kb_url = span.get("kb_url", "#")
|
||||||
|
ent["kb_link"] = (
|
||||||
|
TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
||||||
|
)
|
||||||
|
entities.append(ent)
|
||||||
|
token_markup["entities"] = entities
|
||||||
|
per_token_info.append(token_markup)
|
||||||
|
|
||||||
|
markup = self._render_markup(per_token_info)
|
||||||
|
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
||||||
|
if title:
|
||||||
|
markup = TPL_TITLE.format(title=title) + markup
|
||||||
|
return markup
|
||||||
|
|
||||||
|
def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
|
||||||
|
"""Render the markup from per-token information"""
|
||||||
|
markup = ""
|
||||||
|
for token in per_token_info:
|
||||||
|
entities = sorted(token["entities"], key=lambda d: d["label"])
|
||||||
|
if entities:
|
||||||
|
slices = self._get_span_slices(token["entities"])
|
||||||
|
starts = self._get_span_starts(token["entities"])
|
||||||
|
markup += self.span_template.format(
|
||||||
|
text=token["text"], span_slices=slices, span_starts=starts
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
markup += escape_html(token["text"] + " ")
|
||||||
|
return markup
|
||||||
|
|
||||||
|
def _get_span_slices(self, entities: List[Dict]) -> str:
|
||||||
|
"""Get the rendered markup of all Span slices"""
|
||||||
|
span_slices = []
|
||||||
|
for entity, step in zip(entities, itertools.count(step=self.top_offset_step)):
|
||||||
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||||
|
span_slice = self.span_slice_template.format(
|
||||||
|
bg=color, top_offset=self.top_offset + step
|
||||||
|
)
|
||||||
|
span_slices.append(span_slice)
|
||||||
|
return "".join(span_slices)
|
||||||
|
|
||||||
|
def _get_span_starts(self, entities: List[Dict]) -> str:
|
||||||
|
"""Get the rendered markup of all Span start tokens"""
|
||||||
|
span_starts = []
|
||||||
|
for entity, step in zip(entities, itertools.count(step=self.top_offset_step)):
|
||||||
|
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||||
|
span_start = (
|
||||||
|
self.span_start_template.format(
|
||||||
|
bg=color,
|
||||||
|
top_offset=self.top_offset + step,
|
||||||
|
label=entity["label"],
|
||||||
|
kb_link=entity["kb_link"],
|
||||||
|
)
|
||||||
|
if entity["is_start"]
|
||||||
|
else ""
|
||||||
|
)
|
||||||
|
span_starts.append(span_start)
|
||||||
|
return "".join(span_starts)
|
||||||
|
|
||||||
|
|
||||||
class DependencyRenderer:
|
class DependencyRenderer:
|
||||||
"""Render dependency parses as SVGs."""
|
"""Render dependency parses as SVGs."""
|
||||||
|
|
||||||
|
@ -242,7 +407,7 @@ class EntityRenderer:
|
||||||
style = "ent"
|
style = "ent"
|
||||||
|
|
||||||
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
||||||
"""Initialise dependency renderer.
|
"""Initialise entity renderer.
|
||||||
|
|
||||||
options (dict): Visualiser-specific options (colors, ents)
|
options (dict): Visualiser-specific options (colors, ents)
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -62,6 +62,55 @@ TPL_ENT_RTL = """
|
||||||
</mark>
|
</mark>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
TPL_SPANS = """
|
||||||
|
<div class="spans" style="line-height: 2.5; direction: {dir}">{content}</div>
|
||||||
|
"""
|
||||||
|
|
||||||
|
TPL_SPAN = """
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
{text}
|
||||||
|
{span_slices}
|
||||||
|
{span_starts}
|
||||||
|
</span>
|
||||||
|
"""
|
||||||
|
|
||||||
|
TPL_SPAN_SLICE = """
|
||||||
|
<span style="background: {bg}; top: {top_offset}px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
TPL_SPAN_START = """
|
||||||
|
<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
<span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||||
|
{label}{kb_link}
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TPL_SPAN_RTL = """
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
{text}
|
||||||
|
{span_slices}
|
||||||
|
{span_starts}
|
||||||
|
</span>
|
||||||
|
"""
|
||||||
|
|
||||||
|
TPL_SPAN_SLICE_RTL = """
|
||||||
|
<span style="background: {bg}; top: {top_offset}px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
"""
|
||||||
|
|
||||||
|
TPL_SPAN_START_RTL = """
|
||||||
|
<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
<span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||||
|
{label}{kb_link}
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
# Important: this needs to start with a space!
|
# Important: this needs to start with a space!
|
||||||
TPL_KB_LINK = """
|
TPL_KB_LINK = """
|
||||||
<a style="text-decoration: none; color: inherit; font-weight: normal" href="{kb_url}">{kb_id}</a>
|
<a style="text-decoration: none; color: inherit; font-weight: normal" href="{kb_url}">{kb_id}</a>
|
||||||
|
|
|
@ -192,6 +192,10 @@ class Warnings(metaclass=ErrorsWithCodes):
|
||||||
W115 = ("Skipping {method}: the floret vector table cannot be modified. "
|
W115 = ("Skipping {method}: the floret vector table cannot be modified. "
|
||||||
"Vectors are calculated from character ngrams.")
|
"Vectors are calculated from character ngrams.")
|
||||||
W116 = ("Unable to clean attribute '{attr}'.")
|
W116 = ("Unable to clean attribute '{attr}'.")
|
||||||
|
W117 = ("No spans to visualize found in Doc object with spans_key: '{spans_key}'. If this is "
|
||||||
|
"surprising to you, make sure the Doc was processed using a model "
|
||||||
|
"that supports span categorization, and check the `doc.spans[spans_key]` "
|
||||||
|
"property manually if necessary.")
|
||||||
|
|
||||||
|
|
||||||
class Errors(metaclass=ErrorsWithCodes):
|
class Errors(metaclass=ErrorsWithCodes):
|
||||||
|
|
|
@ -96,6 +96,92 @@ def test_issue5838():
|
||||||
assert found == 4
|
assert found == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_parse_spans(en_vocab):
|
||||||
|
"""Test that spans on a Doc are converted into displaCy's format."""
|
||||||
|
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||||
|
doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
|
||||||
|
spans = displacy.parse_spans(doc)
|
||||||
|
assert isinstance(spans, dict)
|
||||||
|
assert spans["text"] == "Welcome to the Bank of China "
|
||||||
|
assert spans["spans"] == [
|
||||||
|
{
|
||||||
|
"start": 15,
|
||||||
|
"end": 28,
|
||||||
|
"start_token": 3,
|
||||||
|
"end_token": 6,
|
||||||
|
"label": "ORG",
|
||||||
|
"kb_id": "",
|
||||||
|
"kb_url": "#",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": 23,
|
||||||
|
"end": 28,
|
||||||
|
"start_token": 5,
|
||||||
|
"end_token": 6,
|
||||||
|
"label": "GPE",
|
||||||
|
"kb_id": "",
|
||||||
|
"kb_url": "#",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_parse_spans_with_kb_id_options(en_vocab):
|
||||||
|
"""Test that spans with kb_id on a Doc are converted into displaCy's format"""
|
||||||
|
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||||
|
doc.spans["sc"] = [
|
||||||
|
Span(doc, 3, 6, "ORG", kb_id="Q790068"),
|
||||||
|
Span(doc, 5, 6, "GPE", kb_id="Q148"),
|
||||||
|
]
|
||||||
|
|
||||||
|
spans = displacy.parse_spans(
|
||||||
|
doc, {"kb_url_template": "https://wikidata.org/wiki/{}"}
|
||||||
|
)
|
||||||
|
assert isinstance(spans, dict)
|
||||||
|
assert spans["text"] == "Welcome to the Bank of China "
|
||||||
|
assert spans["spans"] == [
|
||||||
|
{
|
||||||
|
"start": 15,
|
||||||
|
"end": 28,
|
||||||
|
"start_token": 3,
|
||||||
|
"end_token": 6,
|
||||||
|
"label": "ORG",
|
||||||
|
"kb_id": "Q790068",
|
||||||
|
"kb_url": "https://wikidata.org/wiki/Q790068",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"start": 23,
|
||||||
|
"end": 28,
|
||||||
|
"start_token": 5,
|
||||||
|
"end_token": 6,
|
||||||
|
"label": "GPE",
|
||||||
|
"kb_id": "Q148",
|
||||||
|
"kb_url": "https://wikidata.org/wiki/Q148",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_parse_spans_different_spans_key(en_vocab):
|
||||||
|
"""Test that spans in a different spans key will be parsed"""
|
||||||
|
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||||
|
doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
|
||||||
|
doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
|
||||||
|
spans = displacy.parse_spans(doc, options={"spans_key": "custom"})
|
||||||
|
|
||||||
|
assert isinstance(spans, dict)
|
||||||
|
assert spans["text"] == "Welcome to the Bank of China "
|
||||||
|
assert spans["spans"] == [
|
||||||
|
{
|
||||||
|
"start": 15,
|
||||||
|
"end": 28,
|
||||||
|
"start_token": 3,
|
||||||
|
"end_token": 6,
|
||||||
|
"label": "BANK",
|
||||||
|
"kb_id": "",
|
||||||
|
"kb_url": "#",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_displacy_parse_ents(en_vocab):
|
def test_displacy_parse_ents(en_vocab):
|
||||||
"""Test that named entities on a Doc are converted into displaCy's format."""
|
"""Test that named entities on a Doc are converted into displaCy's format."""
|
||||||
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
||||||
|
|
|
@ -320,12 +320,31 @@ If a setting is not present in the options, the default value will be used.
|
||||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||||
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
|
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
|
||||||
|
|
||||||
By default, displaCy comes with colors for all entity types used by
|
|
||||||
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
#### Span Visualizer options {#displacy_options-span}
|
||||||
can use the `colors` setting to add your own colors for them. Your application
|
|
||||||
or pipeline package can also expose a
|
> #### Example
|
||||||
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
|
>
|
||||||
to add custom labels and their colors automatically.
|
> ```python
|
||||||
|
> options = {"spans_key": "sc"}
|
||||||
|
> displacy.serve(doc, style="span", options=options)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
|-----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ |
|
||||||
|
| `templates` | Dictionary containing the keys `"span"`, `"slice"`, and `"start"`. These dictate how the overall span, a span slice, and the starting token will be rendered. ~~Optional[Dict[str, str]~~ |
|
||||||
|
| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ |
|
||||||
|
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||||
|
|
||||||
|
|
||||||
|
By default, displaCy comes with colors for all entity types used by [spaCy's
|
||||||
|
trained pipelines](/models) for both entity and span visualizer. If you're
|
||||||
|
using custom entity types, you can use the `colors` setting to add your own
|
||||||
|
colors for them. Your application or pipeline package can also expose a
|
||||||
|
[`spacy_displacy_colors` entry
|
||||||
|
point](/usage/saving-loading#entry-points-displacy) to add custom labels and
|
||||||
|
their colors automatically.
|
||||||
|
|
||||||
By default, displaCy links to `#` for entities without a `kb_id` set on their
|
By default, displaCy links to `#` for entities without a `kb_id` set on their
|
||||||
span. If you wish to link an entity to their URL then consider using the
|
span. If you wish to link an entity to their URL then consider using the
|
||||||
|
@ -335,6 +354,7 @@ span. If you wish to link an entity to their URL then consider using the
|
||||||
should redirect you to their Wikidata page, in this case
|
should redirect you to their Wikidata page, in this case
|
||||||
`https://www.wikidata.org/wiki/Q95`.
|
`https://www.wikidata.org/wiki/Q95`.
|
||||||
|
|
||||||
|
|
||||||
## registry {#registry source="spacy/util.py" new="3"}
|
## registry {#registry source="spacy/util.py" new="3"}
|
||||||
|
|
||||||
spaCy's function registry extends
|
spaCy's function registry extends
|
||||||
|
|
31
website/docs/images/displacy-span-custom.html
Normal file
31
website/docs/images/displacy-span-custom.html
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
<div class="spans"
|
||||||
|
style="line-height: 2.5; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 18px; direction: ltr">
|
||||||
|
Welcome to the
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
Bank
|
||||||
|
<span
|
||||||
|
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
style="background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
<span
|
||||||
|
style="background: #ddd; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||||
|
BANK
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
of
|
||||||
|
<span
|
||||||
|
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
China
|
||||||
|
|
||||||
|
<span
|
||||||
|
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
.
|
||||||
|
</div>
|
41
website/docs/images/displacy-span.html
Normal file
41
website/docs/images/displacy-span.html
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
<div class="spans"
|
||||||
|
style="line-height: 2.5; direction: ltr; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 18px">
|
||||||
|
Welcome to the
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
Bank
|
||||||
|
<span
|
||||||
|
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
style="background: #7aecec; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
<span
|
||||||
|
style="background: #7aecec; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||||
|
ORG
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
of
|
||||||
|
|
||||||
|
<span
|
||||||
|
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||||
|
China
|
||||||
|
<span
|
||||||
|
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
style="background: #feca74; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
</span>
|
||||||
|
<span
|
||||||
|
style="background: #feca74; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||||
|
<span
|
||||||
|
style="background: #feca74; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||||
|
GPE
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
</span>
|
||||||
|
.
|
||||||
|
</div>
|
|
@ -167,6 +167,59 @@ This feature is especially handy if you're using displaCy to compare performance
|
||||||
at different stages of a process, e.g. during training. Here you could use the
|
at different stages of a process, e.g. during training. Here you could use the
|
||||||
title for a brief description of the text example and the number of iterations.
|
title for a brief description of the text example and the number of iterations.
|
||||||
|
|
||||||
|
## Visualizing spans {#span}
|
||||||
|
|
||||||
|
The span visualizer, `span`, highlights overlapping spans in a text.
|
||||||
|
|
||||||
|
```python
|
||||||
|
### Span example
|
||||||
|
import spacy
|
||||||
|
from spacy import displacy
|
||||||
|
from spacy.tokens import Span
|
||||||
|
|
||||||
|
text = "Welcome to the Bank of China."
|
||||||
|
|
||||||
|
nlp = spacy.blank("en")
|
||||||
|
doc = nlp(text)
|
||||||
|
|
||||||
|
doc.spans["sc"] = [
|
||||||
|
Span(doc, 3, 6, "ORG"),
|
||||||
|
Span(doc, 5, 6, "GPE"),
|
||||||
|
]
|
||||||
|
|
||||||
|
displacy.serve(doc, style="span")
|
||||||
|
```
|
||||||
|
|
||||||
|
import DisplacySpanHtml from 'images/displacy-span.html'
|
||||||
|
|
||||||
|
<Iframe title="displaCy visualizer for entities" html={DisplacySpanHtml} height={180} />
|
||||||
|
|
||||||
|
|
||||||
|
The span visualizer lets you customize the following `options`:
|
||||||
|
|
||||||
|
| Argument | Description |
|
||||||
|
|-----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
|
| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ |
|
||||||
|
| `templates` | Dictionary containing the keys `"span"`, `"slice"`, and `"start"`. These dictate how the overall span, a span slice, and the starting token will be rendered. ~~Optional[Dict[str, str]~~ |
|
||||||
|
| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ |
|
||||||
|
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||||
|
|
||||||
|
Because spans can be stored across different keys in `doc.spans`, you need to specify
|
||||||
|
which one displaCy should use with `spans_key` (`sc` is the default).
|
||||||
|
|
||||||
|
> #### Options example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
|
||||||
|
> options = {"spans_key": "custom"}
|
||||||
|
> displacy.serve(doc, style="span", options=options)
|
||||||
|
|
||||||
|
import DisplacySpanCustomHtml from 'images/displacy-span-custom.html'
|
||||||
|
|
||||||
|
<Iframe title="displaCy visualizer for spans (custom spans_key)" html={DisplacySpanCustomHtml} height={225} />
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Using displaCy in Jupyter notebooks {#jupyter}
|
## Using displaCy in Jupyter notebooks {#jupyter}
|
||||||
|
|
||||||
displaCy is able to detect whether you're working in a
|
displaCy is able to detect whether you're working in a
|
||||||
|
|
Loading…
Reference in New Issue
Block a user