mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Add displaCy data structures to docs (2) (#12875)
* Add data structures to docs * Adjusted descriptions for more consistency * Add _optional_ flag to parameters * Add tests and adjust optional title key in doc * Add title to dep visualizations * fix typo --------- Co-authored-by: thomashacker <EdwardSchmuhl@web.de>
This commit is contained in:
parent
49055ed7c8
commit
c9e9dccf79
|
@ -313,6 +313,8 @@ class DependencyRenderer:
|
||||||
self.lang = settings.get("lang", DEFAULT_LANG)
|
self.lang = settings.get("lang", DEFAULT_LANG)
|
||||||
render_id = f"{id_prefix}-{i}"
|
render_id = f"{id_prefix}-{i}"
|
||||||
svg = self.render_svg(render_id, p["words"], p["arcs"])
|
svg = self.render_svg(render_id, p["words"], p["arcs"])
|
||||||
|
if p.get("title"):
|
||||||
|
svg = TPL_TITLE.format(title=p.get("title")) + svg
|
||||||
rendered.append(svg)
|
rendered.append(svg)
|
||||||
if page:
|
if page:
|
||||||
content = "".join([TPL_FIGURE.format(content=svg) for svg in rendered])
|
content = "".join([TPL_FIGURE.format(content=svg) for svg in rendered])
|
||||||
|
|
|
@ -350,6 +350,78 @@ def test_displacy_render_wrapper(en_vocab):
|
||||||
displacy.set_render_wrapper(lambda html: html)
|
displacy.set_render_wrapper(lambda html: html)
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_render_manual_dep():
|
||||||
|
"""Test displacy.render with manual data for dep style"""
|
||||||
|
parsed_dep = {
|
||||||
|
"words": [
|
||||||
|
{"text": "This", "tag": "DT"},
|
||||||
|
{"text": "is", "tag": "VBZ"},
|
||||||
|
{"text": "a", "tag": "DT"},
|
||||||
|
{"text": "sentence", "tag": "NN"},
|
||||||
|
],
|
||||||
|
"arcs": [
|
||||||
|
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
|
||||||
|
{"start": 2, "end": 3, "label": "det", "dir": "left"},
|
||||||
|
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
|
||||||
|
],
|
||||||
|
"title": "Title",
|
||||||
|
}
|
||||||
|
html = displacy.render([parsed_dep], style="dep", manual=True)
|
||||||
|
for word in parsed_dep["words"]:
|
||||||
|
assert word["text"] in html
|
||||||
|
assert word["tag"] in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_render_manual_ent():
|
||||||
|
"""Test displacy.render with manual data for ent style"""
|
||||||
|
parsed_ents = [
|
||||||
|
{
|
||||||
|
"text": "But Google is starting from behind.",
|
||||||
|
"ents": [{"start": 4, "end": 10, "label": "ORG"}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "But Google is starting from behind.",
|
||||||
|
"ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
|
||||||
|
"title": "Title",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
html = displacy.render(parsed_ents, style="ent", manual=True)
|
||||||
|
for parsed_ent in parsed_ents:
|
||||||
|
assert parsed_ent["ents"][0]["label"] in html
|
||||||
|
if "title" in parsed_ent:
|
||||||
|
assert parsed_ent["title"] in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_displacy_render_manual_span():
|
||||||
|
"""Test displacy.render with manual data for span style"""
|
||||||
|
parsed_spans = [
|
||||||
|
{
|
||||||
|
"text": "Welcome to the Bank of China.",
|
||||||
|
"spans": [
|
||||||
|
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||||
|
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||||
|
],
|
||||||
|
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"text": "Welcome to the Bank of China.",
|
||||||
|
"spans": [
|
||||||
|
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||||
|
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||||
|
],
|
||||||
|
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||||
|
"title": "Title",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
html = displacy.render(parsed_spans, style="span", manual=True)
|
||||||
|
for parsed_span in parsed_spans:
|
||||||
|
assert parsed_span["spans"][0]["label"] in html
|
||||||
|
if "title" in parsed_span:
|
||||||
|
assert parsed_span["title"] in html
|
||||||
|
|
||||||
|
|
||||||
def test_displacy_options_case():
|
def test_displacy_options_case():
|
||||||
ents = ["foo", "BAR"]
|
ents = ["foo", "BAR"]
|
||||||
colors = {"FOO": "red", "bar": "green"}
|
colors = {"FOO": "red", "bar": "green"}
|
||||||
|
|
|
@ -343,6 +343,130 @@ use with the `manual=True` argument in `displacy.render`.
|
||||||
| `options` | Span-specific visualisation options. ~~Dict[str, Any]~~ |
|
| `options` | Span-specific visualisation options. ~~Dict[str, Any]~~ |
|
||||||
| **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ |
|
| **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ |
|
||||||
|
|
||||||
|
### Visualizer data structures {id="displacy_structures"}
|
||||||
|
|
||||||
|
You can use displaCy's data format to manually render data. This can be useful
|
||||||
|
if you want to visualize output from other libraries. You can find examples of
|
||||||
|
displaCy's different data formats below.
|
||||||
|
|
||||||
|
> #### DEP example data structure
|
||||||
|
>
|
||||||
|
> ```json
|
||||||
|
> {
|
||||||
|
> "words": [
|
||||||
|
> { "text": "This", "tag": "DT" },
|
||||||
|
> { "text": "is", "tag": "VBZ" },
|
||||||
|
> { "text": "a", "tag": "DT" },
|
||||||
|
> { "text": "sentence", "tag": "NN" }
|
||||||
|
> ],
|
||||||
|
> "arcs": [
|
||||||
|
> { "start": 0, "end": 1, "label": "nsubj", "dir": "left" },
|
||||||
|
> { "start": 2, "end": 3, "label": "det", "dir": "left" },
|
||||||
|
> { "start": 1, "end": 3, "label": "attr", "dir": "right" }
|
||||||
|
> ]
|
||||||
|
> }
|
||||||
|
> ```
|
||||||
|
|
||||||
|
#### Dependency Visualizer data structure {id="structure-dep"}
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ----------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `words` | List of dictionaries describing a word token (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||||
|
| `arcs` | List of dictionaries describing the relations between words (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||||
|
| _Optional_ | |
|
||||||
|
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||||
|
| `settings` | Dependency Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||||
|
|
||||||
|
<Accordion title="Words data structure">
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ---------------------------------------- |
|
||||||
|
| `text` | Text content of the word. ~~str~~ |
|
||||||
|
| `tag` | Fine-grained part-of-speech. ~~str~~ |
|
||||||
|
| `lemma` | Base form of the word. ~~Optional[str]~~ |
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
<Accordion title="Arcs data structure">
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ---------------------------------------------------- |
|
||||||
|
| `start` | The index of the starting token. ~~int~~ |
|
||||||
|
| `end` | The index of the ending token. ~~int~~ |
|
||||||
|
| `label` | The type of dependency relation. ~~str~~ |
|
||||||
|
| `dir` | Direction of the relation (`left`, `right`). ~~str~~ |
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
> #### ENT example data structure
|
||||||
|
>
|
||||||
|
> ```json
|
||||||
|
> {
|
||||||
|
> "text": "But Google is starting from behind.",
|
||||||
|
> "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
|
||||||
|
> }
|
||||||
|
> ```
|
||||||
|
|
||||||
|
#### Named Entity Recognition data structure {id="structure-ent"}
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ------------------------------------------------------------------------------------------- |
|
||||||
|
| `text` | String representation of the document text. ~~str~~ |
|
||||||
|
| `ents` | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||||
|
| _Optional_ | |
|
||||||
|
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||||
|
| `settings` | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||||
|
|
||||||
|
<Accordion title="Ents data structure">
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ---------------------------------------------------------------------- |
|
||||||
|
| `start` | The index of the first character of the entity. ~~int~~ |
|
||||||
|
| `end` | The index of the last character of the entity. (not inclusive) ~~int~~ |
|
||||||
|
| `label` | Label attached to the entity. ~~str~~ |
|
||||||
|
| _Optional_ | |
|
||||||
|
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||||
|
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
> #### SPAN example data structure
|
||||||
|
>
|
||||||
|
> ```json
|
||||||
|
> {
|
||||||
|
> "text": "Welcome to the Bank of China.",
|
||||||
|
> "spans": [
|
||||||
|
> { "start_token": 3, "end_token": 6, "label": "ORG" },
|
||||||
|
> { "start_token": 5, "end_token": 6, "label": "GPE" }
|
||||||
|
> ],
|
||||||
|
> "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."]
|
||||||
|
> }
|
||||||
|
> ```
|
||||||
|
|
||||||
|
#### Span Classification data structure {id="structure-span"}
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ----------------------------------------------------------------------------------------- |
|
||||||
|
| `text` | String representation of the document text. ~~str~~ |
|
||||||
|
| `spans` | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||||
|
| `tokens` | List of word tokens. ~~List[str]~~ |
|
||||||
|
| _Optional_ | |
|
||||||
|
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||||
|
| `settings` | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||||
|
|
||||||
|
<Accordion title="Spans data structure">
|
||||||
|
|
||||||
|
| Dictionary Key | Description |
|
||||||
|
| -------------- | ------------------------------------------------------------- |
|
||||||
|
| `start_token` | The index of the first token of the span in `tokens`. ~~int~~ |
|
||||||
|
| `end_token` | The index of the last token of the span in `tokens`. ~~int~~ |
|
||||||
|
| `label` | Label attached to the span. ~~str~~ |
|
||||||
|
| _Optional_ | |
|
||||||
|
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||||
|
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||||
|
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
### Visualizer options {id="displacy_options"}
|
### Visualizer options {id="displacy_options"}
|
||||||
|
|
||||||
The `options` argument lets you specify additional settings for each visualizer.
|
The `options` argument lets you specify additional settings for each visualizer.
|
||||||
|
|
|
@ -349,7 +349,8 @@ or
|
||||||
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
|
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
|
||||||
If you set `manual=True` on either `render()` or `serve()`, you can pass in data
|
If you set `manual=True` on either `render()` or `serve()`, you can pass in data
|
||||||
in displaCy's format as a dictionary (instead of `Doc` objects). There are
|
in displaCy's format as a dictionary (instead of `Doc` objects). There are
|
||||||
helper functions for converting `Doc` objects to displaCy's format for use with
|
helper functions for converting `Doc` objects to
|
||||||
|
[displaCy's format](/api/top-level#displacy_structures) for use with
|
||||||
`manual=True`: [`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
|
`manual=True`: [`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
|
||||||
[`displacy.parse_ents`](/api/top-level#displacy.parse_ents), and
|
[`displacy.parse_ents`](/api/top-level#displacy.parse_ents), and
|
||||||
[`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
|
[`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
|
||||||
|
|
Loading…
Reference in New Issue
Block a user