#10672: fixes displacy output for manual unsorted entities (#10673)

* #10672: fixes displacy output for manual unsorted entities

* #10672: removed unused import

* fix prettier formatting

Co-authored-by: Harm Buisman <h.buisman@iknl.nl>
Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
harmbuisman 2022-04-27 09:51:58 +02:00 committed by GitHub
parent b3717ba53a
commit c066fb8a4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 23 additions and 5 deletions

View File

@ -4,6 +4,7 @@ repos:
hooks: hooks:
- id: black - id: black
language_version: python3.7 language_version: python3.7
additional_dependencies: ['click==8.0.4']
- repo: https://gitlab.com/pycqa/flake8 - repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2 rev: 3.9.2
hooks: hooks:

View File

@ -4,7 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities.
DOCS: https://spacy.io/api/top-level#displacy DOCS: https://spacy.io/api/top-level#displacy
USAGE: https://spacy.io/usage/visualizers USAGE: https://spacy.io/usage/visualizers
""" """
from typing import List, Union, Iterable, Optional, Dict, Any, Callable from typing import Union, Iterable, Optional, Dict, Any, Callable
import warnings import warnings
from .render import DependencyRenderer, EntityRenderer, SpanRenderer from .render import DependencyRenderer, EntityRenderer, SpanRenderer
@ -56,6 +56,10 @@ def render(
renderer_func, converter = factories[style] renderer_func, converter = factories[style]
renderer = renderer_func(options=options) renderer = renderer_func(options=options)
parsed = [converter(doc, options) for doc in docs] if not manual else docs # type: ignore parsed = [converter(doc, options) for doc in docs] if not manual else docs # type: ignore
if manual:
for doc in docs:
if isinstance(doc, dict) and "ents" in doc:
doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"]))
_html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore
html = _html["parsed"] html = _html["parsed"]
if RENDER_WRAPPER is not None: if RENDER_WRAPPER is not None:

View File

@ -338,3 +338,18 @@ def test_displacy_options_case():
assert "green" in result[1] and "bar" in result[1] assert "green" in result[1] and "bar" in result[1]
assert "red" in result[2] and "FOO" in result[2] assert "red" in result[2] and "FOO" in result[2]
assert "green" in result[3] and "BAR" in result[3] assert "green" in result[3] and "BAR" in result[3]
@pytest.mark.issue(10672)
def test_displacy_manual_sorted_entities():
doc = {
"text": "But Google is starting from behind.",
"ents": [
{"start": 14, "end": 22, "label": "SECOND"},
{"start": 4, "end": 10, "label": "FIRST"},
],
"title": None,
}
html = displacy.render(doc, style="ent", manual=True)
assert html.find("FIRST") < html.find("SECOND")

View File

@ -263,7 +263,7 @@ Render a dependency parse tree or named entity visualization.
| Name | Description | | Name | Description |
| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ | | `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span, dict]], Doc, Span, dict]~~ |
| `style` | Visualization style, `"dep"` or `"ent"`. Defaults to `"dep"`. ~~str~~ | | `style` | Visualization style, `"dep"` or `"ent"`. Defaults to `"dep"`. ~~str~~ |
| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | | `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ |
| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | | `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ |

View File

@ -342,9 +342,7 @@ want to visualize output from other libraries, like [NLTK](http://www.nltk.org)
or or
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet). [SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
If you set `manual=True` on either `render()` or `serve()`, you can pass in data If you set `manual=True` on either `render()` or `serve()`, you can pass in data
in displaCy's format (instead of `Doc` objects). When setting `ents` manually, in displaCy's format as a dictionary (instead of `Doc` objects).
make sure to supply them in the right order, i.e. starting with the lowest start
position.
> #### Example > #### Example
> >