mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Add displacy support for overlapping Spans (#10332)
* Fix docstring for EntityRenderer * Add warning in displacy if doc.spans are empty * Implement parse_spans converter One notable change here is that the default spans_key is sc, and it's set by the user through the options. * Implement SpanRenderer Here, I implemented a SpanRenderer that looks similar to the EntityRenderer except for some templates. The spans_key, by default, is set to sc, but can be configured in the options (see parse_spans). The way I rendered these spans is per-token, i.e., I first check if each token (1) belongs to a given span type and (2) a starting token of a given span type. Once I have this information, I render them into the markup. * Fix mypy issues on typing * Add tests for displacy spans support * Update colors from RGB to hex Co-authored-by: Ines Montani <ines@ines.io> * Remove unnecessary CSS properties * Add documentation for website * Remove unnecesasry scripts * Update wording on the documentation Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Put typing dependency on top of file * Put back z-index so that spans overlap properly * Make warning more explicit for spans_key Co-authored-by: Ines Montani <ines@ines.io> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
e021dc6279
commit
a79cd3542b
|
@ -4,10 +4,10 @@ spaCy's built in visualization suite for dependencies and named entities.
|
|||
DOCS: https://spacy.io/api/top-level#displacy
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
||||
from typing import List, Union, Iterable, Optional, Dict, Any, Callable
|
||||
import warnings
|
||||
|
||||
from .render import DependencyRenderer, EntityRenderer
|
||||
from .render import DependencyRenderer, EntityRenderer, SpanRenderer
|
||||
from ..tokens import Doc, Span
|
||||
from ..errors import Errors, Warnings
|
||||
from ..util import is_in_jupyter
|
||||
|
@ -44,6 +44,7 @@ def render(
|
|||
factories = {
|
||||
"dep": (DependencyRenderer, parse_deps),
|
||||
"ent": (EntityRenderer, parse_ents),
|
||||
"span": (SpanRenderer, parse_spans),
|
||||
}
|
||||
if style not in factories:
|
||||
raise ValueError(Errors.E087.format(style=style))
|
||||
|
@ -203,6 +204,42 @@ def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
|||
return {"text": doc.text, "ents": ents, "title": title, "settings": settings}
|
||||
|
||||
|
||||
def parse_spans(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||
"""Generate spans in [{start: i, end: i, label: 'label'}] format.
|
||||
|
||||
doc (Doc): Document to parse.
|
||||
options (Dict[str, any]): Span-specific visualisation options.
|
||||
RETURNS (dict): Generated span types keyed by text (original text) and spans.
|
||||
"""
|
||||
kb_url_template = options.get("kb_url_template", None)
|
||||
spans_key = options.get("spans_key", "sc")
|
||||
spans = [
|
||||
{
|
||||
"start": span.start_char,
|
||||
"end": span.end_char,
|
||||
"start_token": span.start,
|
||||
"end_token": span.end,
|
||||
"label": span.label_,
|
||||
"kb_id": span.kb_id_ if span.kb_id_ else "",
|
||||
"kb_url": kb_url_template.format(span.kb_id_) if kb_url_template else "#",
|
||||
}
|
||||
for span in doc.spans[spans_key]
|
||||
]
|
||||
tokens = [token.text for token in doc]
|
||||
|
||||
if not spans:
|
||||
warnings.warn(Warnings.W117.format(spans_key=spans_key))
|
||||
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
|
||||
settings = get_doc_settings(doc)
|
||||
return {
|
||||
"text": doc.text,
|
||||
"spans": spans,
|
||||
"title": title,
|
||||
"settings": settings,
|
||||
"tokens": tokens,
|
||||
}
|
||||
|
||||
|
||||
def set_render_wrapper(func: Callable[[str], str]) -> None:
|
||||
"""Set an optional wrapper function that is called around the generated
|
||||
HTML markup on displacy.render. This can be used to allow integration into
|
||||
|
|
|
@ -1,12 +1,15 @@
|
|||
from typing import Dict, Any, List, Optional, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
import uuid
|
||||
import itertools
|
||||
|
||||
from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_WORDS_LEMMA, TPL_DEP_ARCS
|
||||
from .templates import TPL_ENT, TPL_ENT_RTL, TPL_FIGURE, TPL_TITLE, TPL_PAGE
|
||||
from .templates import TPL_ENTS, TPL_KB_LINK
|
||||
from ..util import minify_html, escape_html, registry
|
||||
from ..errors import Errors
|
||||
|
||||
from ..util import escape_html, minify_html, registry
|
||||
from .templates import TPL_DEP_ARCS, TPL_DEP_SVG, TPL_DEP_WORDS
|
||||
from .templates import TPL_DEP_WORDS_LEMMA, TPL_ENT, TPL_ENT_RTL, TPL_ENTS
|
||||
from .templates import TPL_FIGURE, TPL_KB_LINK, TPL_PAGE, TPL_SPAN
|
||||
from .templates import TPL_SPAN_RTL, TPL_SPAN_SLICE, TPL_SPAN_SLICE_RTL
|
||||
from .templates import TPL_SPAN_START, TPL_SPAN_START_RTL, TPL_SPANS
|
||||
from .templates import TPL_TITLE
|
||||
|
||||
DEFAULT_LANG = "en"
|
||||
DEFAULT_DIR = "ltr"
|
||||
|
@ -33,6 +36,168 @@ DEFAULT_LABEL_COLORS = {
|
|||
}
|
||||
|
||||
|
||||
class SpanRenderer:
|
||||
"""Render Spans as SVGs."""
|
||||
|
||||
style = "span"
|
||||
|
||||
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
||||
"""Initialise span renderer
|
||||
|
||||
options (dict): Visualiser-specific options (colors, spans)
|
||||
"""
|
||||
# Set up the colors and overall look
|
||||
colors = dict(DEFAULT_LABEL_COLORS)
|
||||
user_colors = registry.displacy_colors.get_all()
|
||||
for user_color in user_colors.values():
|
||||
if callable(user_color):
|
||||
# Since this comes from the function registry, we want to make
|
||||
# sure we support functions that *return* a dict of colors
|
||||
user_color = user_color()
|
||||
if not isinstance(user_color, dict):
|
||||
raise ValueError(Errors.E925.format(obj=type(user_color)))
|
||||
colors.update(user_color)
|
||||
colors.update(options.get("colors", {}))
|
||||
self.default_color = DEFAULT_ENTITY_COLOR
|
||||
self.colors = {label.upper(): color for label, color in colors.items()}
|
||||
|
||||
# Set up how the text and labels will be rendered
|
||||
self.direction = DEFAULT_DIR
|
||||
self.lang = DEFAULT_LANG
|
||||
self.top_offset = options.get("top_offset", 40)
|
||||
self.top_offset_step = options.get("top_offset_step", 17)
|
||||
|
||||
# Set up which templates will be used
|
||||
template = options.get("template")
|
||||
if template:
|
||||
self.span_template = template["span"]
|
||||
self.span_slice_template = template["slice"]
|
||||
self.span_start_template = template["start"]
|
||||
else:
|
||||
if self.direction == "rtl":
|
||||
self.span_template = TPL_SPAN_RTL
|
||||
self.span_slice_template = TPL_SPAN_SLICE_RTL
|
||||
self.span_start_template = TPL_SPAN_START_RTL
|
||||
else:
|
||||
self.span_template = TPL_SPAN
|
||||
self.span_slice_template = TPL_SPAN_SLICE
|
||||
self.span_start_template = TPL_SPAN_START
|
||||
|
||||
def render(
|
||||
self, parsed: List[Dict[str, Any]], page: bool = False, minify: bool = False
|
||||
) -> str:
|
||||
"""Render complete markup.
|
||||
|
||||
parsed (list): Dependency parses to render.
|
||||
page (bool): Render parses wrapped as full HTML page.
|
||||
minify (bool): Minify HTML markup.
|
||||
RETURNS (str): Rendered HTML markup.
|
||||
"""
|
||||
rendered = []
|
||||
for i, p in enumerate(parsed):
|
||||
if i == 0:
|
||||
settings = p.get("settings", {})
|
||||
self.direction = settings.get("direction", DEFAULT_DIR)
|
||||
self.lang = settings.get("lang", DEFAULT_LANG)
|
||||
rendered.append(self.render_spans(p["tokens"], p["spans"], p.get("title")))
|
||||
|
||||
if page:
|
||||
docs = "".join([TPL_FIGURE.format(content=doc) for doc in rendered])
|
||||
markup = TPL_PAGE.format(content=docs, lang=self.lang, dir=self.direction)
|
||||
else:
|
||||
markup = "".join(rendered)
|
||||
if minify:
|
||||
return minify_html(markup)
|
||||
return markup
|
||||
|
||||
def render_spans(
|
||||
self,
|
||||
tokens: List[str],
|
||||
spans: List[Dict[str, Any]],
|
||||
title: Optional[str],
|
||||
) -> str:
|
||||
"""Render span types in text.
|
||||
|
||||
Spans are rendered per-token, this means that for each token, we check if it's part
|
||||
of a span slice (a member of a span type) or a span start (the starting token of a
|
||||
given span type).
|
||||
|
||||
tokens (list): Individual tokens in the text
|
||||
spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
|
||||
title (str / None): Document title set in Doc.user_data['title'].
|
||||
"""
|
||||
per_token_info = []
|
||||
for idx, token in enumerate(tokens):
|
||||
# Identify if a token belongs to a Span (and which) and if it's a
|
||||
# start token of said Span. We'll use this for the final HTML render
|
||||
token_markup: Dict[str, Any] = {}
|
||||
token_markup["text"] = token
|
||||
entities = []
|
||||
for span in spans:
|
||||
ent = {}
|
||||
if span["start_token"] <= idx < span["end_token"]:
|
||||
ent["label"] = span["label"]
|
||||
ent["is_start"] = True if idx == span["start_token"] else False
|
||||
kb_id = span.get("kb_id", "")
|
||||
kb_url = span.get("kb_url", "#")
|
||||
ent["kb_link"] = (
|
||||
TPL_KB_LINK.format(kb_id=kb_id, kb_url=kb_url) if kb_id else ""
|
||||
)
|
||||
entities.append(ent)
|
||||
token_markup["entities"] = entities
|
||||
per_token_info.append(token_markup)
|
||||
|
||||
markup = self._render_markup(per_token_info)
|
||||
markup = TPL_SPANS.format(content=markup, dir=self.direction)
|
||||
if title:
|
||||
markup = TPL_TITLE.format(title=title) + markup
|
||||
return markup
|
||||
|
||||
def _render_markup(self, per_token_info: List[Dict[str, Any]]) -> str:
|
||||
"""Render the markup from per-token information"""
|
||||
markup = ""
|
||||
for token in per_token_info:
|
||||
entities = sorted(token["entities"], key=lambda d: d["label"])
|
||||
if entities:
|
||||
slices = self._get_span_slices(token["entities"])
|
||||
starts = self._get_span_starts(token["entities"])
|
||||
markup += self.span_template.format(
|
||||
text=token["text"], span_slices=slices, span_starts=starts
|
||||
)
|
||||
else:
|
||||
markup += escape_html(token["text"] + " ")
|
||||
return markup
|
||||
|
||||
def _get_span_slices(self, entities: List[Dict]) -> str:
|
||||
"""Get the rendered markup of all Span slices"""
|
||||
span_slices = []
|
||||
for entity, step in zip(entities, itertools.count(step=self.top_offset_step)):
|
||||
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||
span_slice = self.span_slice_template.format(
|
||||
bg=color, top_offset=self.top_offset + step
|
||||
)
|
||||
span_slices.append(span_slice)
|
||||
return "".join(span_slices)
|
||||
|
||||
def _get_span_starts(self, entities: List[Dict]) -> str:
|
||||
"""Get the rendered markup of all Span start tokens"""
|
||||
span_starts = []
|
||||
for entity, step in zip(entities, itertools.count(step=self.top_offset_step)):
|
||||
color = self.colors.get(entity["label"].upper(), self.default_color)
|
||||
span_start = (
|
||||
self.span_start_template.format(
|
||||
bg=color,
|
||||
top_offset=self.top_offset + step,
|
||||
label=entity["label"],
|
||||
kb_link=entity["kb_link"],
|
||||
)
|
||||
if entity["is_start"]
|
||||
else ""
|
||||
)
|
||||
span_starts.append(span_start)
|
||||
return "".join(span_starts)
|
||||
|
||||
|
||||
class DependencyRenderer:
|
||||
"""Render dependency parses as SVGs."""
|
||||
|
||||
|
@ -242,7 +407,7 @@ class EntityRenderer:
|
|||
style = "ent"
|
||||
|
||||
def __init__(self, options: Dict[str, Any] = {}) -> None:
|
||||
"""Initialise dependency renderer.
|
||||
"""Initialise entity renderer.
|
||||
|
||||
options (dict): Visualiser-specific options (colors, ents)
|
||||
"""
|
||||
|
|
|
@ -62,6 +62,55 @@ TPL_ENT_RTL = """
|
|||
</mark>
|
||||
"""
|
||||
|
||||
TPL_SPANS = """
|
||||
<div class="spans" style="line-height: 2.5; direction: {dir}">{content}</div>
|
||||
"""
|
||||
|
||||
TPL_SPAN = """
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
{text}
|
||||
{span_slices}
|
||||
{span_starts}
|
||||
</span>
|
||||
"""
|
||||
|
||||
TPL_SPAN_SLICE = """
|
||||
<span style="background: {bg}; top: {top_offset}px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
"""
|
||||
|
||||
|
||||
TPL_SPAN_START = """
|
||||
<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
<span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||
{label}{kb_link}
|
||||
</span>
|
||||
</span>
|
||||
|
||||
"""
|
||||
|
||||
TPL_SPAN_RTL = """
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
{text}
|
||||
{span_slices}
|
||||
{span_starts}
|
||||
</span>
|
||||
"""
|
||||
|
||||
TPL_SPAN_SLICE_RTL = """
|
||||
<span style="background: {bg}; top: {top_offset}px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
"""
|
||||
|
||||
TPL_SPAN_START_RTL = """
|
||||
<span style="background: {bg}; top: {top_offset}px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
<span style="background: {bg}; z-index: 10; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||
{label}{kb_link}
|
||||
</span>
|
||||
</span>
|
||||
"""
|
||||
|
||||
|
||||
# Important: this needs to start with a space!
|
||||
TPL_KB_LINK = """
|
||||
<a style="text-decoration: none; color: inherit; font-weight: normal" href="{kb_url}">{kb_id}</a>
|
||||
|
|
|
@ -192,6 +192,10 @@ class Warnings(metaclass=ErrorsWithCodes):
|
|||
W115 = ("Skipping {method}: the floret vector table cannot be modified. "
|
||||
"Vectors are calculated from character ngrams.")
|
||||
W116 = ("Unable to clean attribute '{attr}'.")
|
||||
W117 = ("No spans to visualize found in Doc object with spans_key: '{spans_key}'. If this is "
|
||||
"surprising to you, make sure the Doc was processed using a model "
|
||||
"that supports span categorization, and check the `doc.spans[spans_key]` "
|
||||
"property manually if necessary.")
|
||||
|
||||
|
||||
class Errors(metaclass=ErrorsWithCodes):
|
||||
|
|
|
@ -96,6 +96,92 @@ def test_issue5838():
|
|||
assert found == 4
|
||||
|
||||
|
||||
def test_displacy_parse_spans(en_vocab):
|
||||
"""Test that spans on a Doc are converted into displaCy's format."""
|
||||
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||
doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
|
||||
spans = displacy.parse_spans(doc)
|
||||
assert isinstance(spans, dict)
|
||||
assert spans["text"] == "Welcome to the Bank of China "
|
||||
assert spans["spans"] == [
|
||||
{
|
||||
"start": 15,
|
||||
"end": 28,
|
||||
"start_token": 3,
|
||||
"end_token": 6,
|
||||
"label": "ORG",
|
||||
"kb_id": "",
|
||||
"kb_url": "#",
|
||||
},
|
||||
{
|
||||
"start": 23,
|
||||
"end": 28,
|
||||
"start_token": 5,
|
||||
"end_token": 6,
|
||||
"label": "GPE",
|
||||
"kb_id": "",
|
||||
"kb_url": "#",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_parse_spans_with_kb_id_options(en_vocab):
|
||||
"""Test that spans with kb_id on a Doc are converted into displaCy's format"""
|
||||
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||
doc.spans["sc"] = [
|
||||
Span(doc, 3, 6, "ORG", kb_id="Q790068"),
|
||||
Span(doc, 5, 6, "GPE", kb_id="Q148"),
|
||||
]
|
||||
|
||||
spans = displacy.parse_spans(
|
||||
doc, {"kb_url_template": "https://wikidata.org/wiki/{}"}
|
||||
)
|
||||
assert isinstance(spans, dict)
|
||||
assert spans["text"] == "Welcome to the Bank of China "
|
||||
assert spans["spans"] == [
|
||||
{
|
||||
"start": 15,
|
||||
"end": 28,
|
||||
"start_token": 3,
|
||||
"end_token": 6,
|
||||
"label": "ORG",
|
||||
"kb_id": "Q790068",
|
||||
"kb_url": "https://wikidata.org/wiki/Q790068",
|
||||
},
|
||||
{
|
||||
"start": 23,
|
||||
"end": 28,
|
||||
"start_token": 5,
|
||||
"end_token": 6,
|
||||
"label": "GPE",
|
||||
"kb_id": "Q148",
|
||||
"kb_url": "https://wikidata.org/wiki/Q148",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_parse_spans_different_spans_key(en_vocab):
|
||||
"""Test that spans in a different spans key will be parsed"""
|
||||
doc = Doc(en_vocab, words=["Welcome", "to", "the", "Bank", "of", "China"])
|
||||
doc.spans["sc"] = [Span(doc, 3, 6, "ORG"), Span(doc, 5, 6, "GPE")]
|
||||
doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
|
||||
spans = displacy.parse_spans(doc, options={"spans_key": "custom"})
|
||||
|
||||
assert isinstance(spans, dict)
|
||||
assert spans["text"] == "Welcome to the Bank of China "
|
||||
assert spans["spans"] == [
|
||||
{
|
||||
"start": 15,
|
||||
"end": 28,
|
||||
"start_token": 3,
|
||||
"end_token": 6,
|
||||
"label": "BANK",
|
||||
"kb_id": "",
|
||||
"kb_url": "#",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_parse_ents(en_vocab):
|
||||
"""Test that named entities on a Doc are converted into displaCy's format."""
|
||||
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
||||
|
|
|
@ -320,12 +320,31 @@ If a setting is not present in the options, the default value will be used.
|
|||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
|
||||
|
||||
By default, displaCy comes with colors for all entity types used by
|
||||
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
||||
can use the `colors` setting to add your own colors for them. Your application
|
||||
or pipeline package can also expose a
|
||||
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
|
||||
to add custom labels and their colors automatically.
|
||||
|
||||
#### Span Visualizer options {#displacy_options-span}
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> options = {"spans_key": "sc"}
|
||||
> displacy.serve(doc, style="span", options=options)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
|-----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ |
|
||||
| `templates` | Dictionary containing the keys `"span"`, `"slice"`, and `"start"`. These dictate how the overall span, a span slice, and the starting token will be rendered. ~~Optional[Dict[str, str]~~ |
|
||||
| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ |
|
||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||
|
||||
|
||||
By default, displaCy comes with colors for all entity types used by [spaCy's
|
||||
trained pipelines](/models) for both entity and span visualizer. If you're
|
||||
using custom entity types, you can use the `colors` setting to add your own
|
||||
colors for them. Your application or pipeline package can also expose a
|
||||
[`spacy_displacy_colors` entry
|
||||
point](/usage/saving-loading#entry-points-displacy) to add custom labels and
|
||||
their colors automatically.
|
||||
|
||||
By default, displaCy links to `#` for entities without a `kb_id` set on their
|
||||
span. If you wish to link an entity to their URL then consider using the
|
||||
|
@ -335,6 +354,7 @@ span. If you wish to link an entity to their URL then consider using the
|
|||
should redirect you to their Wikidata page, in this case
|
||||
`https://www.wikidata.org/wiki/Q95`.
|
||||
|
||||
|
||||
## registry {#registry source="spacy/util.py" new="3"}
|
||||
|
||||
spaCy's function registry extends
|
||||
|
|
31
website/docs/images/displacy-span-custom.html
Normal file
31
website/docs/images/displacy-span-custom.html
Normal file
|
@ -0,0 +1,31 @@
|
|||
<div class="spans"
|
||||
style="line-height: 2.5; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 18px; direction: ltr">
|
||||
Welcome to the
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
Bank
|
||||
<span
|
||||
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
<span
|
||||
style="background: #ddd; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
<span
|
||||
style="background: #ddd; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||
BANK
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
of
|
||||
<span
|
||||
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
China
|
||||
|
||||
<span
|
||||
style="background: #ddd; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
</span>
|
||||
.
|
||||
</div>
|
41
website/docs/images/displacy-span.html
Normal file
41
website/docs/images/displacy-span.html
Normal file
|
@ -0,0 +1,41 @@
|
|||
<div class="spans"
|
||||
style="line-height: 2.5; direction: ltr; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'; font-size: 18px">
|
||||
Welcome to the
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
Bank
|
||||
<span
|
||||
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
<span
|
||||
style="background: #7aecec; top: 40px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
<span
|
||||
style="background: #7aecec; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||
ORG
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
of
|
||||
|
||||
<span
|
||||
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
</span>
|
||||
<span style="font-weight: bold; display: inline-block; position: relative;">
|
||||
China
|
||||
<span
|
||||
style="background: #7aecec; top: 40px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
<span
|
||||
style="background: #feca74; top: 57px; height: 4px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
</span>
|
||||
<span
|
||||
style="background: #feca74; top: 57px; height: 4px; border-top-left-radius: 3px; border-bottom-left-radius: 3px; left: -1px; width: calc(100% + 2px); position: absolute;">
|
||||
<span
|
||||
style="background: #feca74; color: #000; top: -0.5em; padding: 2px 3px; position: absolute; font-size: 0.6em; font-weight: bold; line-height: 1; border-radius: 3px">
|
||||
GPE
|
||||
</span>
|
||||
</span>
|
||||
</span>
|
||||
.
|
||||
</div>
|
|
@ -167,6 +167,59 @@ This feature is especially handy if you're using displaCy to compare performance
|
|||
at different stages of a process, e.g. during training. Here you could use the
|
||||
title for a brief description of the text example and the number of iterations.
|
||||
|
||||
## Visualizing spans {#span}
|
||||
|
||||
The span visualizer, `span`, highlights overlapping spans in a text.
|
||||
|
||||
```python
|
||||
### Span example
|
||||
import spacy
|
||||
from spacy import displacy
|
||||
from spacy.tokens import Span
|
||||
|
||||
text = "Welcome to the Bank of China."
|
||||
|
||||
nlp = spacy.blank("en")
|
||||
doc = nlp(text)
|
||||
|
||||
doc.spans["sc"] = [
|
||||
Span(doc, 3, 6, "ORG"),
|
||||
Span(doc, 5, 6, "GPE"),
|
||||
]
|
||||
|
||||
displacy.serve(doc, style="span")
|
||||
```
|
||||
|
||||
import DisplacySpanHtml from 'images/displacy-span.html'
|
||||
|
||||
<Iframe title="displaCy visualizer for entities" html={DisplacySpanHtml} height={180} />
|
||||
|
||||
|
||||
The span visualizer lets you customize the following `options`:
|
||||
|
||||
| Argument | Description |
|
||||
|-----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ |
|
||||
| `templates` | Dictionary containing the keys `"span"`, `"slice"`, and `"start"`. These dictate how the overall span, a span slice, and the starting token will be rendered. ~~Optional[Dict[str, str]~~ |
|
||||
| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ |
|
||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||
|
||||
Because spans can be stored across different keys in `doc.spans`, you need to specify
|
||||
which one displaCy should use with `spans_key` (`sc` is the default).
|
||||
|
||||
> #### Options example
|
||||
>
|
||||
> ```python
|
||||
> doc.spans["custom"] = [Span(doc, 3, 6, "BANK")]
|
||||
> options = {"spans_key": "custom"}
|
||||
> displacy.serve(doc, style="span", options=options)
|
||||
|
||||
import DisplacySpanCustomHtml from 'images/displacy-span-custom.html'
|
||||
|
||||
<Iframe title="displaCy visualizer for spans (custom spans_key)" html={DisplacySpanCustomHtml} height={225} />
|
||||
|
||||
|
||||
|
||||
## Using displaCy in Jupyter notebooks {#jupyter}
|
||||
|
||||
displaCy is able to detect whether you're working in a
|
||||
|
|
Loading…
Reference in New Issue
Block a user