Add tests and adjust optional title key in doc

2026-01-04 16:03:12 +03:00 · 2023-02-14 13:37:41 +01:00 · 2023-02-14 13:37:41 +01:00 · 0ca01fa5d2
commit 0ca01fa5d2
parent c8a0968346
2 changed files with 85 additions and 17 deletions
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -350,6 +350,77 @@ def test_displacy_render_wrapper(en_vocab):
    displacy.set_render_wrapper(lambda html: html)


+def test_displacy_render_manual_dep():
+    """Test displacy.render with manual data for dep style"""
+    parsed_dep = {
+        "words": [
+            {"text": "This", "tag": "DT"},
+            {"text": "is", "tag": "VBZ"},
+            {"text": "a", "tag": "DT"},
+            {"text": "sentence", "tag": "NN"},
+        ],
+        "arcs": [
+            {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
+            {"start": 2, "end": 3, "label": "det", "dir": "left"},
+            {"start": 1, "end": 3, "label": "attr", "dir": "right"},
+        ],
+    }
+    html = displacy.render([parsed_dep], style="dep", manual=True)
+    for word in parsed_dep["words"]:
+        assert word["text"] in html
+        assert word["tag"] in html
+
+
+def test_displacy_render_manual_ent():
+    """Test displacy.render with manual data for ent style"""
+    parsed_ents = [
+        {
+            "text": "But Google is starting from behind.",
+            "ents": [{"start": 4, "end": 10, "label": "ORG"}],
+        },
+        {
+            "text": "But Google is starting from behind.",
+            "ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
+            "title": "Title",
+        },
+    ]
+
+    html = displacy.render(parsed_ents, style="ent", manual=True)
+    for parsed_ent in parsed_ents:
+        assert parsed_ent["ents"][0]["label"] in html
+        if "title" in parsed_ent:
+            assert parsed_ent["title"] in html
+
+
+def test_displacy_render_manual_span():
+    """Test displacy.render with manual data for span style"""
+    parsed_spans = [
+        {
+            "text": "Welcome to the Bank of China.",
+            "spans": [
+                {"start_token": 3, "end_token": 6, "label": "ORG"},
+                {"start_token": 5, "end_token": 6, "label": "GPE"},
+            ],
+            "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
+        },
+        {
+            "text": "Welcome to the Bank of China.",
+            "spans": [
+                {"start_token": 3, "end_token": 6, "label": "ORG"},
+                {"start_token": 5, "end_token": 6, "label": "GPE"},
+            ],
+            "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
+            "title": "Title",
+        },
+    ]
+
+    html = displacy.render(parsed_spans, style="span", manual=True)
+    for parsed_span in parsed_spans:
+        assert parsed_span["spans"][0]["label"] in html
+        if "title" in parsed_span:
+            assert parsed_span["title"] in html
+
+
 def test_displacy_options_case():
    ents = ["foo", "BAR"]
    colors = {"FOO": "red", "bar": "green"}
--- a/website/docs/api/top-level.mdx
+++ b/website/docs/api/top-level.mdx
@ -400,11 +400,10 @@ displaCy's different data formats below.
 > #### ENT example data structure
 >
 > ```json
->  {
->    "text": "But Google is starting from behind.",
->    "ents": [{"start": 4, "end": 10, "label": "ORG"}],
->    "title": None
->  }
+> {
+>   "text": "But Google is starting from behind.",
+>   "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
+> }
 > ```

 #### Named Entity Recognition data structure {id="structure-ent"}
@ -413,20 +412,20 @@ displaCy's different data formats below.
 | -------------- | ------------------------------------------------------------------------------------------- |
 | `text`         | String representation of the document text. ~~str~~                                         |
 | `ents`         | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~    |
-| `title`        | Title of the visualization. ~~str~~                                                         |
 | _Optional_     |                                                                                             |
+| `title`        | Title of the visualization. ~~Optional[str]~~                                               |
 | `settings`     | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |

 <Accordion title="Ents data structure">

-| Dictionary Key | Description                                         |
-| -------------- | --------------------------------------------------- |
-| `start`        | The index of the first token of the entity. ~~int~~ |
-| `end`          | The index of the last token of the entity. ~~int~~  |
-| `label`        | Label attached to the entity. ~~str~~               |
-| _Optional_     |                                                     |
-| `kb_id`        | `KnowledgeBase` ID. ~~str~~                         |
-| `kb_url`       | `KnowledgeBase` URL. ~~str~~                        |
+| Dictionary Key | Description                                                            |
+| -------------- | ---------------------------------------------------------------------- |
+| `start`        | The index of the first character of the entity. ~~int~~                |
+| `end`          | The index of the last character of the entity. (not inclusive) ~~int~~ |
+| `label`        | Label attached to the entity. ~~str~~                                  |
+| _Optional_     |                                                                        |
+| `kb_id`        | `KnowledgeBase` ID. ~~str~~                                            |
+| `kb_url`       | `KnowledgeBase` URL. ~~str~~                                           |

 </Accordion>

@ -449,17 +448,15 @@ displaCy's different data formats below.
 | -------------- | ----------------------------------------------------------------------------------------- |
 | `text`         | String representation of the document text. ~~str~~                                       |
 | `spans`        | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~     |
-| `title`        | Title of the visualization. ~~str~~                                                       |
 | `tokens`       | List of word tokens. ~~List[str]~~                                                        |
 | _Optional_     |                                                                                           |
+| `title`        | Title of the visualization. ~~Optional[str]~~                                             |
 | `settings`     | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |

 <Accordion title="Spans data structure">

 | Dictionary Key | Description                                                   |
 | -------------- | ------------------------------------------------------------- |
-| `start`        | The index of the first token of the span. ~~int~~             |
-| `end`          | The index of the last token of the span. ~~int~~              |
 | `start_token`  | The index of the first token of the span in `tokens`. ~~int~~ |
 | `end_token`    | The index of the last token of the span in `tokens`. ~~int~~  |
 | `label`        | Label attached to the span. ~~str~~                           |