Add tests and adjust optional title key in doc

This commit is contained in:
thomashacker 2023-02-14 13:37:41 +01:00
parent 091bb43055
commit ee0be6e484
2 changed files with 85 additions and 17 deletions

View File

@ -336,6 +336,77 @@ def test_displacy_render_wrapper(en_vocab):
displacy.set_render_wrapper(lambda html: html)
def test_displacy_render_manual_dep():
"""Test displacy.render with manual data for dep style"""
parsed_dep = {
"words": [
{"text": "This", "tag": "DT"},
{"text": "is", "tag": "VBZ"},
{"text": "a", "tag": "DT"},
{"text": "sentence", "tag": "NN"},
],
"arcs": [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
],
}
html = displacy.render([parsed_dep], style="dep", manual=True)
for word in parsed_dep["words"]:
assert word["text"] in html
assert word["tag"] in html
def test_displacy_render_manual_ent():
"""Test displacy.render with manual data for ent style"""
parsed_ents = [
{
"text": "But Google is starting from behind.",
"ents": [{"start": 4, "end": 10, "label": "ORG"}],
},
{
"text": "But Google is starting from behind.",
"ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
"title": "Title",
},
]
html = displacy.render(parsed_ents, style="ent", manual=True)
for parsed_ent in parsed_ents:
assert parsed_ent["ents"][0]["label"] in html
if "title" in parsed_ent:
assert parsed_ent["title"] in html
def test_displacy_render_manual_span():
"""Test displacy.render with manual data for span style"""
parsed_spans = [
{
"text": "Welcome to the Bank of China.",
"spans": [
{"start_token": 3, "end_token": 6, "label": "ORG"},
{"start_token": 5, "end_token": 6, "label": "GPE"},
],
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
},
{
"text": "Welcome to the Bank of China.",
"spans": [
{"start_token": 3, "end_token": 6, "label": "ORG"},
{"start_token": 5, "end_token": 6, "label": "GPE"},
],
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
"title": "Title",
},
]
html = displacy.render(parsed_spans, style="span", manual=True)
for parsed_span in parsed_spans:
assert parsed_span["spans"][0]["label"] in html
if "title" in parsed_span:
assert parsed_span["title"] in html
def test_displacy_options_case():
ents = ["foo", "BAR"]
colors = {"FOO": "red", "bar": "green"}

View File

@ -397,11 +397,10 @@ displaCy's different data formats below.
> #### ENT example data structure
>
> ```json
> {
> "text": "But Google is starting from behind.",
> "ents": [{"start": 4, "end": 10, "label": "ORG"}],
> "title": None
> }
> {
> "text": "But Google is starting from behind.",
> "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
> }
> ```
#### Named Entity Recognition data structure {id="structure-ent"}
@ -410,20 +409,20 @@ displaCy's different data formats below.
| -------------- | ------------------------------------------------------------------------------------------- |
| `text` | String representation of the document text. ~~str~~ |
| `ents` | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~ |
| `title` | Title of the visualization. ~~str~~ |
| _Optional_ | |
| `title` | Title of the visualization. ~~Optional[str]~~ |
| `settings` | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
<Accordion title="Ents data structure">
| Dictionary Key | Description |
| -------------- | --------------------------------------------------- |
| `start` | The index of the first token of the entity. ~~int~~ |
| `end` | The index of the last token of the entity. ~~int~~ |
| `label` | Label attached to the entity. ~~str~~ |
| _Optional_ | |
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
| Dictionary Key | Description |
| -------------- | ---------------------------------------------------------------------- |
| `start` | The index of the first character of the entity. ~~int~~ |
| `end` | The index of the last character of the entity. (not inclusive) ~~int~~ |
| `label` | Label attached to the entity. ~~str~~ |
| _Optional_ | |
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
</Accordion>
@ -446,17 +445,15 @@ displaCy's different data formats below.
| -------------- | ----------------------------------------------------------------------------------------- |
| `text` | String representation of the document text. ~~str~~ |
| `spans` | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~ |
| `title` | Title of the visualization. ~~str~~ |
| `tokens` | List of word tokens. ~~List[str]~~ |
| _Optional_ | |
| `title` | Title of the visualization. ~~Optional[str]~~ |
| `settings` | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
<Accordion title="Spans data structure">
| Dictionary Key | Description |
| -------------- | ------------------------------------------------------------- |
| `start` | The index of the first token of the span. ~~int~~ |
| `end` | The index of the last token of the span. ~~int~~ |
| `start_token` | The index of the first token of the span in `tokens`. ~~int~~ |
| `end_token` | The index of the last token of the span in `tokens`. ~~int~~ |
| `label` | Label attached to the span. ~~str~~ |