mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-21 17:41:59 +03:00
Add tests and adjust optional title key in doc
This commit is contained in:
parent
c8a0968346
commit
0ca01fa5d2
|
@ -350,6 +350,77 @@ def test_displacy_render_wrapper(en_vocab):
|
|||
displacy.set_render_wrapper(lambda html: html)
|
||||
|
||||
|
||||
def test_displacy_render_manual_dep():
|
||||
"""Test displacy.render with manual data for dep style"""
|
||||
parsed_dep = {
|
||||
"words": [
|
||||
{"text": "This", "tag": "DT"},
|
||||
{"text": "is", "tag": "VBZ"},
|
||||
{"text": "a", "tag": "DT"},
|
||||
{"text": "sentence", "tag": "NN"},
|
||||
],
|
||||
"arcs": [
|
||||
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
|
||||
{"start": 2, "end": 3, "label": "det", "dir": "left"},
|
||||
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
|
||||
],
|
||||
}
|
||||
html = displacy.render([parsed_dep], style="dep", manual=True)
|
||||
for word in parsed_dep["words"]:
|
||||
assert word["text"] in html
|
||||
assert word["tag"] in html
|
||||
|
||||
|
||||
def test_displacy_render_manual_ent():
|
||||
"""Test displacy.render with manual data for ent style"""
|
||||
parsed_ents = [
|
||||
{
|
||||
"text": "But Google is starting from behind.",
|
||||
"ents": [{"start": 4, "end": 10, "label": "ORG"}],
|
||||
},
|
||||
{
|
||||
"text": "But Google is starting from behind.",
|
||||
"ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
|
||||
"title": "Title",
|
||||
},
|
||||
]
|
||||
|
||||
html = displacy.render(parsed_ents, style="ent", manual=True)
|
||||
for parsed_ent in parsed_ents:
|
||||
assert parsed_ent["ents"][0]["label"] in html
|
||||
if "title" in parsed_ent:
|
||||
assert parsed_ent["title"] in html
|
||||
|
||||
|
||||
def test_displacy_render_manual_span():
|
||||
"""Test displacy.render with manual data for span style"""
|
||||
parsed_spans = [
|
||||
{
|
||||
"text": "Welcome to the Bank of China.",
|
||||
"spans": [
|
||||
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||
],
|
||||
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||
},
|
||||
{
|
||||
"text": "Welcome to the Bank of China.",
|
||||
"spans": [
|
||||
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||
],
|
||||
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||
"title": "Title",
|
||||
},
|
||||
]
|
||||
|
||||
html = displacy.render(parsed_spans, style="span", manual=True)
|
||||
for parsed_span in parsed_spans:
|
||||
assert parsed_span["spans"][0]["label"] in html
|
||||
if "title" in parsed_span:
|
||||
assert parsed_span["title"] in html
|
||||
|
||||
|
||||
def test_displacy_options_case():
|
||||
ents = ["foo", "BAR"]
|
||||
colors = {"FOO": "red", "bar": "green"}
|
||||
|
|
|
@ -400,11 +400,10 @@ displaCy's different data formats below.
|
|||
> #### ENT example data structure
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "text": "But Google is starting from behind.",
|
||||
> "ents": [{"start": 4, "end": 10, "label": "ORG"}],
|
||||
> "title": None
|
||||
> }
|
||||
> {
|
||||
> "text": "But Google is starting from behind.",
|
||||
> "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
|
||||
> }
|
||||
> ```
|
||||
|
||||
#### Named Entity Recognition data structure {id="structure-ent"}
|
||||
|
@ -413,20 +412,20 @@ displaCy's different data formats below.
|
|||
| -------------- | ------------------------------------------------------------------------------------------- |
|
||||
| `text` | String representation of the document text. ~~str~~ |
|
||||
| `ents` | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| `title` | Title of the visualization. ~~str~~ |
|
||||
| _Optional_ | |
|
||||
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||
| `settings` | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||
|
||||
<Accordion title="Ents data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| `start` | The index of the first token of the entity. ~~int~~ |
|
||||
| `end` | The index of the last token of the entity. ~~int~~ |
|
||||
| `label` | Label attached to the entity. ~~str~~ |
|
||||
| _Optional_ | |
|
||||
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ---------------------------------------------------------------------- |
|
||||
| `start` | The index of the first character of the entity. ~~int~~ |
|
||||
| `end` | The index of the last character of the entity. (not inclusive) ~~int~~ |
|
||||
| `label` | Label attached to the entity. ~~str~~ |
|
||||
| _Optional_ | |
|
||||
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||
|
||||
</Accordion>
|
||||
|
||||
|
@ -449,17 +448,15 @@ displaCy's different data formats below.
|
|||
| -------------- | ----------------------------------------------------------------------------------------- |
|
||||
| `text` | String representation of the document text. ~~str~~ |
|
||||
| `spans` | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| `title` | Title of the visualization. ~~str~~ |
|
||||
| `tokens` | List of word tokens. ~~List[str]~~ |
|
||||
| _Optional_ | |
|
||||
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||
| `settings` | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||
|
||||
<Accordion title="Spans data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ------------------------------------------------------------- |
|
||||
| `start` | The index of the first token of the span. ~~int~~ |
|
||||
| `end` | The index of the last token of the span. ~~int~~ |
|
||||
| `start_token` | The index of the first token of the span in `tokens`. ~~int~~ |
|
||||
| `end_token` | The index of the last token of the span in `tokens`. ~~int~~ |
|
||||
| `label` | Label attached to the span. ~~str~~ |
|
||||
|
|
Loading…
Reference in New Issue
Block a user