mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Displacy serve entity linking support without manual=True
support. (#9748)
* Add support for kb_id to be displayed via displacy.serve. The current support is only limited to the manual option in displacy.render * Commit to check pre-commit hooks are run. * Update spacy/displacy/__init__.py Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Changes as per suggestions on the PR. * Update website/docs/api/top-level.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Update website/docs/api/top-level.md Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * tag option as new from 3.2.1 onwards Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: svlandeg <sofie.vanlandeghem@gmail.com>
This commit is contained in:
parent
6763cbfdc0
commit
1be8a4dab3
|
@ -181,11 +181,19 @@ def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
|||
def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
|
||||
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
|
||||
|
||||
doc (Doc): Document do parse.
|
||||
doc (Doc): Document to parse.
|
||||
options (Dict[str, Any]): NER-specific visualisation options.
|
||||
RETURNS (dict): Generated entities keyed by text (original text) and ents.
|
||||
"""
|
||||
kb_url_template = options.get("kb_url_template", None)
|
||||
ents = [
|
||||
{"start": ent.start_char, "end": ent.end_char, "label": ent.label_}
|
||||
{
|
||||
"start": ent.start_char,
|
||||
"end": ent.end_char,
|
||||
"label": ent.label_,
|
||||
"kb_id": ent.kb_id_ if ent.kb_id_ else "",
|
||||
"kb_url": kb_url_template.format(ent.kb_id_) if kb_url_template else "#",
|
||||
}
|
||||
for ent in doc.ents
|
||||
]
|
||||
if not ents:
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
import pytest
|
||||
|
||||
from spacy import displacy
|
||||
from spacy.displacy.render import DependencyRenderer, EntityRenderer
|
||||
from spacy.tokens import Span, Doc
|
||||
from spacy.lang.fa import Persian
|
||||
from spacy.tokens import Span, Doc
|
||||
|
||||
|
||||
def test_displacy_parse_ents(en_vocab):
|
||||
|
@ -12,7 +13,38 @@ def test_displacy_parse_ents(en_vocab):
|
|||
ents = displacy.parse_ents(doc)
|
||||
assert isinstance(ents, dict)
|
||||
assert ents["text"] == "But Google is starting from behind "
|
||||
assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
|
||||
assert ents["ents"] == [
|
||||
{"start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#"}
|
||||
]
|
||||
|
||||
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
|
||||
ents = displacy.parse_ents(doc)
|
||||
assert isinstance(ents, dict)
|
||||
assert ents["text"] == "But Google is starting from behind "
|
||||
assert ents["ents"] == [
|
||||
{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#"}
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_parse_ents_with_kb_id_options(en_vocab):
|
||||
"""Test that named entities with kb_id on a Doc are converted into displaCy's format."""
|
||||
doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
|
||||
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
|
||||
|
||||
ents = displacy.parse_ents(
|
||||
doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"}
|
||||
)
|
||||
assert isinstance(ents, dict)
|
||||
assert ents["text"] == "But Google is starting from behind "
|
||||
assert ents["ents"] == [
|
||||
{
|
||||
"start": 4,
|
||||
"end": 10,
|
||||
"label": "ORG",
|
||||
"kb_id": "Q95",
|
||||
"kb_url": "https://www.wikidata.org/wiki/Q95",
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_displacy_parse_deps(en_vocab):
|
||||
|
|
|
@ -313,11 +313,12 @@ If a setting is not present in the options, the default value will be used.
|
|||
> displacy.serve(doc, style="ent", options=options)
|
||||
> ```
|
||||
|
||||
| Name | Description |
|
||||
| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
|
||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||
| Name | Description |
|
||||
| ------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
|
||||
| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
|
||||
| `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
|
||||
| `kb_url_template` <Tag variant="new">3.2.1</Tag> | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
|
||||
|
||||
By default, displaCy comes with colors for all entity types used by
|
||||
[spaCy's trained pipelines](/models). If you're using custom entity types, you
|
||||
|
@ -326,6 +327,14 @@ or pipeline package can also expose a
|
|||
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
|
||||
to add custom labels and their colors automatically.
|
||||
|
||||
By default, displaCy links to `#` for entities without a `kb_id` set on their
|
||||
span. If you wish to link an entity to their URL then consider using the
|
||||
`kb_url_template` option from above. For example if the `kb_id` on a span is
|
||||
`Q95` and this is a Wikidata identifier then this option can be set to
|
||||
`https://www.wikidata.org/wiki/{}`. Clicking on your entity in the rendered HTML
|
||||
should redirect you to their Wikidata page, in this case
|
||||
`https://www.wikidata.org/wiki/Q95`.
|
||||
|
||||
## registry {#registry source="spacy/util.py" new="3"}
|
||||
|
||||
spaCy's function registry extends
|
||||
|
@ -412,10 +421,10 @@ finished. To log each training step, a
|
|||
and the accuracy scores on the development set.
|
||||
|
||||
The built-in, default logger is the ConsoleLogger, which prints results to the
|
||||
console in tabular format. The
|
||||
console in tabular format. The
|
||||
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
|
||||
a dependency of spaCy, enables other loggers: currently it provides one that sends
|
||||
results to a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||
a dependency of spaCy, enables other loggers: currently it provides one that
|
||||
sends results to a [Weights & Biases](https://www.wandb.com/) dashboard.
|
||||
|
||||
Instead of using one of the built-in loggers, you can
|
||||
[implement your own](/usage/training#custom-logging).
|
||||
|
@ -466,7 +475,6 @@ start decreasing across epochs.
|
|||
|
||||
</Accordion>
|
||||
|
||||
|
||||
## Readers {#readers}
|
||||
|
||||
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}
|
||||
|
|
Loading…
Reference in New Issue
Block a user