diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py index 1570f8d09..773089677 100644 --- a/spacy/tests/test_displacy.py +++ b/spacy/tests/test_displacy.py @@ -350,6 +350,77 @@ def test_displacy_render_wrapper(en_vocab): displacy.set_render_wrapper(lambda html: html) +def test_displacy_render_manual_dep(): + """Test displacy.render with manual data for dep style""" + parsed_dep = { + "words": [ + {"text": "This", "tag": "DT"}, + {"text": "is", "tag": "VBZ"}, + {"text": "a", "tag": "DT"}, + {"text": "sentence", "tag": "NN"}, + ], + "arcs": [ + {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, + {"start": 2, "end": 3, "label": "det", "dir": "left"}, + {"start": 1, "end": 3, "label": "attr", "dir": "right"}, + ], + } + html = displacy.render([parsed_dep], style="dep", manual=True) + for word in parsed_dep["words"]: + assert word["text"] in html + assert word["tag"] in html + + +def test_displacy_render_manual_ent(): + """Test displacy.render with manual data for ent style""" + parsed_ents = [ + { + "text": "But Google is starting from behind.", + "ents": [{"start": 4, "end": 10, "label": "ORG"}], + }, + { + "text": "But Google is starting from behind.", + "ents": [{"start": -100, "end": 100, "label": "COMPANY"}], + "title": "Title", + }, + ] + + html = displacy.render(parsed_ents, style="ent", manual=True) + for parsed_ent in parsed_ents: + assert parsed_ent["ents"][0]["label"] in html + if "title" in parsed_ent: + assert parsed_ent["title"] in html + + +def test_displacy_render_manual_span(): + """Test displacy.render with manual data for span style""" + parsed_spans = [ + { + "text": "Welcome to the Bank of China.", + "spans": [ + {"start_token": 3, "end_token": 6, "label": "ORG"}, + {"start_token": 5, "end_token": 6, "label": "GPE"}, + ], + "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."], + }, + { + "text": "Welcome to the Bank of China.", + "spans": [ + {"start_token": 3, "end_token": 6, "label": "ORG"}, + {"start_token": 5, "end_token": 6, "label": "GPE"}, + ], + "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."], + "title": "Title", + }, + ] + + html = displacy.render(parsed_spans, style="span", manual=True) + for parsed_span in parsed_spans: + assert parsed_span["spans"][0]["label"] in html + if "title" in parsed_span: + assert parsed_span["title"] in html + + def test_displacy_options_case(): ents = ["foo", "BAR"] colors = {"FOO": "red", "bar": "green"} diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx index dac8b93e0..c29ae93a3 100644 --- a/website/docs/api/top-level.mdx +++ b/website/docs/api/top-level.mdx @@ -400,11 +400,10 @@ displaCy's different data formats below. > #### ENT example data structure > > ```json -> { -> "text": "But Google is starting from behind.", -> "ents": [{"start": 4, "end": 10, "label": "ORG"}], -> "title": None -> } +> { +> "text": "But Google is starting from behind.", +> "ents": [{ "start": 4, "end": 10, "label": "ORG" }] +> } > ``` #### Named Entity Recognition data structure {id="structure-ent"} @@ -413,20 +412,20 @@ displaCy's different data formats below. | -------------- | ------------------------------------------------------------------------------------------- | | `text` | String representation of the document text. ~~str~~ | | `ents` | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~ | -| `title` | Title of the visualization. ~~str~~ | | _Optional_ | | +| `title` | Title of the visualization. ~~Optional[str]~~ | | `settings` | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ | -| Dictionary Key | Description | -| -------------- | --------------------------------------------------- | -| `start` | The index of the first token of the entity. ~~int~~ | -| `end` | The index of the last token of the entity. ~~int~~ | -| `label` | Label attached to the entity. ~~str~~ | -| _Optional_ | | -| `kb_id` | `KnowledgeBase` ID. ~~str~~ | -| `kb_url` | `KnowledgeBase` URL. ~~str~~ | +| Dictionary Key | Description | +| -------------- | ---------------------------------------------------------------------- | +| `start` | The index of the first character of the entity. ~~int~~ | +| `end` | The index of the last character of the entity. (not inclusive) ~~int~~ | +| `label` | Label attached to the entity. ~~str~~ | +| _Optional_ | | +| `kb_id` | `KnowledgeBase` ID. ~~str~~ | +| `kb_url` | `KnowledgeBase` URL. ~~str~~ | @@ -449,17 +448,15 @@ displaCy's different data formats below. | -------------- | ----------------------------------------------------------------------------------------- | | `text` | String representation of the document text. ~~str~~ | | `spans` | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~ | -| `title` | Title of the visualization. ~~str~~ | | `tokens` | List of word tokens. ~~List[str]~~ | | _Optional_ | | +| `title` | Title of the visualization. ~~Optional[str]~~ | | `settings` | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ | | Dictionary Key | Description | | -------------- | ------------------------------------------------------------- | -| `start` | The index of the first token of the span. ~~int~~ | -| `end` | The index of the last token of the span. ~~int~~ | | `start_token` | The index of the first token of the span in `tokens`. ~~int~~ | | `end_token` | The index of the last token of the span in `tokens`. ~~int~~ | | `label` | Label attached to the span. ~~str~~ |