Add displaCy data structures to docs (2) (#12875)

* Add data structures to docs * Adjusted descriptions for more consistency * Add _optional_ flag to parameters * Add tests and adjust optional title key in doc * Add title to dep visualizations * fix typo --------- Co-authored-by: thomashacker <EdwardSchmuhl@web.de>
2025-11-13 14:25:52 +03:00 · 2023-07-31 10:47:57 +02:00 · 2023-07-31 10:47:57 +02:00 · c9e9dccf79
commit c9e9dccf79
parent 49055ed7c8
4 changed files with 200 additions and 1 deletions
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -313,6 +313,8 @@ class DependencyRenderer:
                self.lang = settings.get("lang", DEFAULT_LANG)
            render_id = f"{id_prefix}-{i}"
            svg = self.render_svg(render_id, p["words"], p["arcs"])
            if p.get("title"):
                svg = TPL_TITLE.format(title=p.get("title")) + svg
            rendered.append(svg)
        if page:
            content = "".join([TPL_FIGURE.format(content=svg) for svg in rendered])
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -350,6 +350,78 @@ def test_displacy_render_wrapper(en_vocab):
    displacy.set_render_wrapper(lambda html: html)
 def test_displacy_render_manual_dep():
    """Test displacy.render with manual data for dep style"""
    parsed_dep = {
        "words": [
            {"text": "This", "tag": "DT"},
            {"text": "is", "tag": "VBZ"},
            {"text": "a", "tag": "DT"},
            {"text": "sentence", "tag": "NN"},
        ],
        "arcs": [
            {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
            {"start": 2, "end": 3, "label": "det", "dir": "left"},
            {"start": 1, "end": 3, "label": "attr", "dir": "right"},
        ],
        "title": "Title",
    }
    html = displacy.render([parsed_dep], style="dep", manual=True)
    for word in parsed_dep["words"]:
        assert word["text"] in html
        assert word["tag"] in html
 def test_displacy_render_manual_ent():
    """Test displacy.render with manual data for ent style"""
    parsed_ents = [
        {
            "text": "But Google is starting from behind.",
            "ents": [{"start": 4, "end": 10, "label": "ORG"}],
        },
        {
            "text": "But Google is starting from behind.",
            "ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
            "title": "Title",
        },
    ]
    html = displacy.render(parsed_ents, style="ent", manual=True)
    for parsed_ent in parsed_ents:
        assert parsed_ent["ents"][0]["label"] in html
        if "title" in parsed_ent:
            assert parsed_ent["title"] in html
 def test_displacy_render_manual_span():
    """Test displacy.render with manual data for span style"""
    parsed_spans = [
        {
            "text": "Welcome to the Bank of China.",
            "spans": [
                {"start_token": 3, "end_token": 6, "label": "ORG"},
                {"start_token": 5, "end_token": 6, "label": "GPE"},
            ],
            "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
        },
        {
            "text": "Welcome to the Bank of China.",
            "spans": [
                {"start_token": 3, "end_token": 6, "label": "ORG"},
                {"start_token": 5, "end_token": 6, "label": "GPE"},
            ],
            "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
            "title": "Title",
        },
    ]
    html = displacy.render(parsed_spans, style="span", manual=True)
    for parsed_span in parsed_spans:
        assert parsed_span["spans"][0]["label"] in html
        if "title" in parsed_span:
            assert parsed_span["title"] in html
 def test_displacy_options_case():
    ents = ["foo", "BAR"]
    colors = {"FOO": "red", "bar": "green"}
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@ -343,6 +343,130 @@ use with the `manual=True` argument in `displacy.render`.
 | `options`   | Span-specific visualisation options. ~~Dict[str, Any]~~             |
 | **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ |
 ### Visualizer data structures {id="displacy_structures"}
 You can use displaCy's data format to manually render data. This can be useful
 if you want to visualize output from other libraries. You can find examples of
 displaCy's different data formats below.
 > #### DEP example data structure
 >
 > ```json
 > {
 >   "words": [
 >     { "text": "This", "tag": "DT" },
 >     { "text": "is", "tag": "VBZ" },
 >     { "text": "a", "tag": "DT" },
 >     { "text": "sentence", "tag": "NN" }
 >   ],
 >   "arcs": [
 >     { "start": 0, "end": 1, "label": "nsubj", "dir": "left" },
 >     { "start": 2, "end": 3, "label": "det", "dir": "left" },
 >     { "start": 1, "end": 3, "label": "attr", "dir": "right" }
 >   ]
 > }
 > ```
 #### Dependency Visualizer data structure {id="structure-dep"}
 | Dictionary Key | Description                                                                                                 |
 | -------------- | ----------------------------------------------------------------------------------------------------------- |
 | `words`        | List of dictionaries describing a word token (see structure below). ~~List[Dict[str, Any]]~~                |
 | `arcs`         | List of dictionaries describing the relations between words (see structure below). ~~List[Dict[str, Any]]~~ |
 | _Optional_     |                                                                                                             |
 | `title`        | Title of the visualization. ~~Optional[str]~~                                                               |
 | `settings`     | Dependency Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~             |
 <Accordion title="Words data structure">
 | Dictionary Key | Description                              |
 | -------------- | ---------------------------------------- |
 | `text`         | Text content of the word. ~~str~~        |
 | `tag`          | Fine-grained part-of-speech. ~~str~~     |
 | `lemma`        | Base form of the word. ~~Optional[str]~~ |
 </Accordion>
 <Accordion title="Arcs data structure">
 | Dictionary Key | Description                                          |
 | -------------- | ---------------------------------------------------- |
 | `start`        | The index of the starting token. ~~int~~             |
 | `end`          | The index of the ending token. ~~int~~               |
 | `label`        | The type of dependency relation. ~~str~~             |
 | `dir`          | Direction of the relation (`left`, `right`). ~~str~~ |
 </Accordion>
 > #### ENT example data structure
 >
 > ```json
 > {
 >   "text": "But Google is starting from behind.",
 >   "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
 > }
 > ```
 #### Named Entity Recognition data structure {id="structure-ent"}
 | Dictionary Key | Description                                                                                 |
 | -------------- | ------------------------------------------------------------------------------------------- |
 | `text`         | String representation of the document text. ~~str~~                                         |
 | `ents`         | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~    |
 | _Optional_     |                                                                                             |
 | `title`        | Title of the visualization. ~~Optional[str]~~                                               |
 | `settings`     | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
 <Accordion title="Ents data structure">
 | Dictionary Key | Description                                                            |
 | -------------- | ---------------------------------------------------------------------- |
 | `start`        | The index of the first character of the entity. ~~int~~                |
 | `end`          | The index of the last character of the entity. (not inclusive) ~~int~~ |
 | `label`        | Label attached to the entity. ~~str~~                                  |
 | _Optional_     |                                                                        |
 | `kb_id`        | `KnowledgeBase` ID. ~~str~~                                            |
 | `kb_url`       | `KnowledgeBase` URL. ~~str~~                                           |
 </Accordion>
 > #### SPAN example data structure
 >
 > ```json
 > {
 >   "text": "Welcome to the Bank of China.",
 >   "spans": [
 >     { "start_token": 3, "end_token": 6, "label": "ORG" },
 >     { "start_token": 5, "end_token": 6, "label": "GPE" }
 >   ],
 >   "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."]
 > }
 > ```
 #### Span Classification data structure {id="structure-span"}
 | Dictionary Key | Description                                                                               |
 | -------------- | ----------------------------------------------------------------------------------------- |
 | `text`         | String representation of the document text. ~~str~~                                       |
 | `spans`        | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~     |
 | `tokens`       | List of word tokens. ~~List[str]~~                                                        |
 | _Optional_     |                                                                                           |
 | `title`        | Title of the visualization. ~~Optional[str]~~                                             |
 | `settings`     | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
 <Accordion title="Spans data structure">
 | Dictionary Key | Description                                                   |
 | -------------- | ------------------------------------------------------------- |
 | `start_token`  | The index of the first token of the span in `tokens`. ~~int~~ |
 | `end_token`    | The index of the last token of the span in `tokens`. ~~int~~  |
 | `label`        | Label attached to the span. ~~str~~                           |
 | _Optional_     |                                                               |
 | `kb_id`        | `KnowledgeBase` ID. ~~str~~                                   |
 | `kb_url`       | `KnowledgeBase` URL. ~~str~~                                  |
 </Accordion>
 ### Visualizer options {id="displacy_options"}
 The `options` argument lets you specify additional settings for each visualizer.
--- a/website/docs/usage/visualizers.mdx
+++ b/website/docs/usage/visualizers.mdx
@ -349,7 +349,8 @@ or
 [SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
 If you set `manual=True` on either `render()` or `serve()`, you can pass in data
 in displaCy's format as a dictionary (instead of `Doc` objects). There are
-helper functions for converting `Doc` objects to displaCy's format for use with
+helper functions for converting `Doc` objects to
 [displaCy's format](/api/top-level#displacy_structures) for use with
 `manual=True`: [`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
 [`displacy.parse_ents`](/api/top-level#displacy.parse_ents), and
 [`displacy.parse_spans`](/api/top-level#displacy.parse_spans).