Displacy serve entity linking support without manual=True support. (#9748)

* Add support for kb_id to be displayed via displacy.serve. The current support is only limited to the manual option in displacy.render

* Commit to check pre-commit hooks are run.

* Update spacy/displacy/__init__.py

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Changes as per suggestions on the PR.

* Update website/docs/api/top-level.md

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* Update website/docs/api/top-level.md

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>

* tag option as new from 3.2.1 onwards

Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Co-authored-by: svlandeg <sofie.vanlandeghem@gmail.com>
This commit is contained in:
Narayan Acharya 2021-11-29 11:13:26 -05:00 committed by GitHub
parent 6763cbfdc0
commit 1be8a4dab3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 13 deletions

View File

@ -181,11 +181,19 @@ def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]: def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
"""Generate named entities in [{start: i, end: i, label: 'label'}] format. """Generate named entities in [{start: i, end: i, label: 'label'}] format.
doc (Doc): Document do parse. doc (Doc): Document to parse.
options (Dict[str, Any]): NER-specific visualisation options.
RETURNS (dict): Generated entities keyed by text (original text) and ents. RETURNS (dict): Generated entities keyed by text (original text) and ents.
""" """
kb_url_template = options.get("kb_url_template", None)
ents = [ ents = [
{"start": ent.start_char, "end": ent.end_char, "label": ent.label_} {
"start": ent.start_char,
"end": ent.end_char,
"label": ent.label_,
"kb_id": ent.kb_id_ if ent.kb_id_ else "",
"kb_url": kb_url_template.format(ent.kb_id_) if kb_url_template else "#",
}
for ent in doc.ents for ent in doc.ents
] ]
if not ents: if not ents:

View File

@ -1,8 +1,9 @@
import pytest import pytest
from spacy import displacy from spacy import displacy
from spacy.displacy.render import DependencyRenderer, EntityRenderer from spacy.displacy.render import DependencyRenderer, EntityRenderer
from spacy.tokens import Span, Doc
from spacy.lang.fa import Persian from spacy.lang.fa import Persian
from spacy.tokens import Span, Doc
def test_displacy_parse_ents(en_vocab): def test_displacy_parse_ents(en_vocab):
@ -12,7 +13,38 @@ def test_displacy_parse_ents(en_vocab):
ents = displacy.parse_ents(doc) ents = displacy.parse_ents(doc)
assert isinstance(ents, dict) assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind " assert ents["text"] == "But Google is starting from behind "
assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}] assert ents["ents"] == [
{"start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#"}
]
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
ents = displacy.parse_ents(doc)
assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind "
assert ents["ents"] == [
{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#"}
]
def test_displacy_parse_ents_with_kb_id_options(en_vocab):
"""Test that named entities with kb_id on a Doc are converted into displaCy's format."""
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
ents = displacy.parse_ents(
doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"}
)
assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind "
assert ents["ents"] == [
{
"start": 4,
"end": 10,
"label": "ORG",
"kb_id": "Q95",
"kb_url": "https://www.wikidata.org/wiki/Q95",
}
]
def test_displacy_parse_deps(en_vocab): def test_displacy_parse_deps(en_vocab):

View File

@ -313,11 +313,12 @@ If a setting is not present in the options, the default value will be used.
> displacy.serve(doc, style="ent", options=options) > displacy.serve(doc, style="ent", options=options)
> ``` > ```
| Name | Description | | Name | Description |
| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ | | `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ | | `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ | | `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
By default, displaCy comes with colors for all entity types used by By default, displaCy comes with colors for all entity types used by
[spaCy's trained pipelines](/models). If you're using custom entity types, you [spaCy's trained pipelines](/models). If you're using custom entity types, you
@ -326,6 +327,14 @@ or pipeline package can also expose a
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy) [`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
to add custom labels and their colors automatically. to add custom labels and their colors automatically.
By default, displaCy links to `#` for entities without a `kb_id` set on their
span. If you wish to link an entity to their URL then consider using the
`kb_url_template` option from above. For example if the `kb_id` on a span is
`Q95` and this is a Wikidata identifier then this option can be set to
`https://www.wikidata.org/wiki/{}`. Clicking on your entity in the rendered HTML
should redirect you to their Wikidata page, in this case
`https://www.wikidata.org/wiki/Q95`.
## registry {#registry source="spacy/util.py" new="3"} ## registry {#registry source="spacy/util.py" new="3"}
spaCy's function registry extends spaCy's function registry extends
@ -412,10 +421,10 @@ finished. To log each training step, a
and the accuracy scores on the development set. and the accuracy scores on the development set.
The built-in, default logger is the ConsoleLogger, which prints results to the The built-in, default logger is the ConsoleLogger, which prints results to the
console in tabular format. The console in tabular format. The
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as [spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
a dependency of spaCy, enables other loggers: currently it provides one that sends a dependency of spaCy, enables other loggers: currently it provides one that
results to a [Weights & Biases](https://www.wandb.com/) dashboard. sends results to a [Weights & Biases](https://www.wandb.com/) dashboard.
Instead of using one of the built-in loggers, you can Instead of using one of the built-in loggers, you can
[implement your own](/usage/training#custom-logging). [implement your own](/usage/training#custom-logging).
@ -466,7 +475,6 @@ start decreasing across epochs.
</Accordion> </Accordion>
## Readers {#readers} ## Readers {#readers}
### File readers {#file-readers source="github.com/explosion/srsly" new="3"} ### File readers {#file-readers source="github.com/explosion/srsly" new="3"}