mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Displacy serve entity linking support without manual=True
support. (#9748)
* Add support for kb_id to be displayed via displacy.serve. The current support is only limited to the manual option in displacy.render * Commit to check pre-commit hooks are run. * Update spacy/displacy/__init__.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Changes as per suggestions on the PR. * Update website/docs/api/top-level.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update website/docs/api/top-level.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * tag option as new from 3.2.1 onwards Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: svlandeg <sofie.vanlandeghem@gmail.com>
This commit is contained in:
parent
6763cbfdc0
commit
1be8a4dab3
|
@ -181,11 +181,19 @@ def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||||
def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||||
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
|
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
|
||||||
|
|
||||||
doc (Doc): Document do parse.
|
doc (Doc): Document to parse.
|
||||||
|
options (Dict[str, Any]): NER-specific visualisation options.
|
||||||
RETURNS (dict): Generated entities keyed by text (original text) and ents.
|
RETURNS (dict): Generated entities keyed by text (original text) and ents.
|
||||||
"""
|
"""
|
||||||
|
kb_url_template = options.get("kb_url_template", None)
|
||||||
ents = [
|
ents = [
|
||||||
{"start": ent.start_char, "end": ent.end_char, "label": ent.label_}
|
{
|
||||||
|
"start": ent.start_char,
|
||||||
|
"end": ent.end_char,
|
||||||
|
"label": ent.label_,
|
||||||
|
"kb_id": ent.kb_id_ if ent.kb_id_ else "",
|
||||||
|
"kb_url": kb_url_template.format(ent.kb_id_) if kb_url_template else "#",
|
||||||
|
}
|
||||||
for ent in doc.ents
|
for ent in doc.ents
|
||||||
]
|
]
|
||||||
if not ents:
|
if not ents:
|
||||||
|
|
|
@ -1,8 +1,9 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from spacy import displacy
|
from spacy import displacy
|
||||||
from spacy.displacy.render import DependencyRenderer, EntityRenderer
|
from spacy.displacy.render import DependencyRenderer, EntityRenderer
|
||||||
from spacy.tokens import Span, Doc
|
|
||||||
from spacy.lang.fa import Persian
|
from spacy.lang.fa import Persian
|
||||||
|
from spacy.tokens import Span, Doc
|
||||||
|
|
||||||
|
|
||||||
def test_displacy_parse_ents(en_vocab):
|
def test_displacy_parse_ents(en_vocab):
|
||||||
|
@ -12,7 +13,38 @@ def test_displacy_parse_ents(en_vocab):
|
||||||
ents = displacy.parse_ents(doc)
|
ents = displacy.parse_ents(doc)
|
||||||
assert isinstance(ents, dict)
|
assert isinstance(ents, dict)
|
||||||
assert ents["text"] == "But Google is starting from behind "
|
assert ents["text"] == "But Google is starting from behind "
|
||||||
assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
|
assert ents["ents"] == [
|
||||||
|
{"start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#"}
|
||||||
|
]
|
||||||
|
|
||||||
|
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
|
||||||
|
ents = displacy.parse_ents(doc)
|
||||||
|
assert isinstance(ents, dict)
|
||||||
|
assert ents["text"] == "But Google is starting from behind "
|
||||||
|
assert ents["ents"] == [
|
||||||
|
{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#"}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_parse_ents_with_kb_id_options(en_vocab):
|
||||||
|
"""Test that named entities with kb_id on a Doc are converted into displaCy's format."""
|
||||||
|
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
||||||
|
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
|
||||||
|
|
||||||
|
ents = displacy.parse_ents(
|
||||||
|
doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"}
|
||||||
|
)
|
||||||
|
assert isinstance(ents, dict)
|
||||||
|
assert ents["text"] == "But Google is starting from behind "
|
||||||
|
assert ents["ents"] == [
|
||||||
|
{
|
||||||
|
"start": 4,
|
||||||
|
"end": 10,
|
||||||
|
"label": "ORG",
|
||||||
|
"kb_id": "Q95",
|
||||||
|
"kb_url": "https://www.wikidata.org/wiki/Q95",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def test_displacy_parse_deps(en_vocab):
|
def test_displacy_parse_deps(en_vocab):
|
||||||
|
|
|
@ -313,11 +313,12 @@ If a setting is not present in the options, the default value will be used.
|
||||||
> displacy.serve(doc, style="ent", options=options)
|
> displacy.serve(doc, style="ent", options=options)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
|
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
|
||||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||||
|
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
|
||||||
|
|
||||||
By default, displaCy comes with colors for all entity types used by
|
By default, displaCy comes with colors for all entity types used by
|
||||||
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
||||||
|
@ -326,6 +327,14 @@ or pipeline package can also expose a
|
||||||
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
|
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
|
||||||
to add custom labels and their colors automatically.
|
to add custom labels and their colors automatically.
|
||||||
|
|
||||||
|
By default, displaCy links to `#` for entities without a `kb_id` set on their
|
||||||
|
span. If you wish to link an entity to their URL then consider using the
|
||||||
|
`kb_url_template` option from above. For example if the `kb_id` on a span is
|
||||||
|
`Q95` and this is a Wikidata identifier then this option can be set to
|
||||||
|
`https://www.wikidata.org/wiki/{}`. Clicking on your entity in the rendered HTML
|
||||||
|
should redirect you to their Wikidata page, in this case
|
||||||
|
`https://www.wikidata.org/wiki/Q95`.
|
||||||
|
|
||||||
## registry {#registry source="spacy/util.py" new="3"}
|
## registry {#registry source="spacy/util.py" new="3"}
|
||||||
|
|
||||||
spaCy's function registry extends
|
spaCy's function registry extends
|
||||||
|
@ -414,8 +423,8 @@ and the accuracy scores on the development set.
|
||||||
The built-in, default logger is the ConsoleLogger, which prints results to the
|
The built-in, default logger is the ConsoleLogger, which prints results to the
|
||||||
console in tabular format. The
|
console in tabular format. The
|
||||||
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
||||||
a dependency of spaCy, enables other loggers: currently it provides one that sends
|
a dependency of spaCy, enables other loggers: currently it provides one that
|
||||||
results to a [Weights & Biases](https://www.wandb.com/) dashboard.
|
sends results to a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||||
|
|
||||||
Instead of using one of the built-in loggers, you can
|
Instead of using one of the built-in loggers, you can
|
||||||
[implement your own](/usage/training#custom-logging).
|
[implement your own](/usage/training#custom-logging).
|
||||||
|
@ -466,7 +475,6 @@ start decreasing across epochs.
|
||||||
|
|
||||||
</Accordion>
|
</Accordion>
|
||||||
|
|
||||||
|
|
||||||
## Readers {#readers}
|
## Readers {#readers}
|
||||||
|
|
||||||
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user