mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Add displaCy data structures to docs (2) (#12875)
* Add data structures to docs * Adjusted descriptions for more consistency * Add _optional_ flag to parameters * Add tests and adjust optional title key in doc * Add title to dep visualizations * fix typo --------- Co-authored-by: thomashacker <EdwardSchmuhl@web.de>
This commit is contained in:
parent
49055ed7c8
commit
c9e9dccf79
|
@ -313,6 +313,8 @@ class DependencyRenderer:
|
|||
self.lang = settings.get("lang", DEFAULT_LANG)
|
||||
render_id = f"{id_prefix}-{i}"
|
||||
svg = self.render_svg(render_id, p["words"], p["arcs"])
|
||||
if p.get("title"):
|
||||
svg = TPL_TITLE.format(title=p.get("title")) + svg
|
||||
rendered.append(svg)
|
||||
if page:
|
||||
content = "".join([TPL_FIGURE.format(content=svg) for svg in rendered])
|
||||
|
|
|
@ -350,6 +350,78 @@ def test_displacy_render_wrapper(en_vocab):
|
|||
displacy.set_render_wrapper(lambda html: html)
|
||||
|
||||
|
||||
def test_displacy_render_manual_dep():
|
||||
"""Test displacy.render with manual data for dep style"""
|
||||
parsed_dep = {
|
||||
"words": [
|
||||
{"text": "This", "tag": "DT"},
|
||||
{"text": "is", "tag": "VBZ"},
|
||||
{"text": "a", "tag": "DT"},
|
||||
{"text": "sentence", "tag": "NN"},
|
||||
],
|
||||
"arcs": [
|
||||
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
|
||||
{"start": 2, "end": 3, "label": "det", "dir": "left"},
|
||||
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
|
||||
],
|
||||
"title": "Title",
|
||||
}
|
||||
html = displacy.render([parsed_dep], style="dep", manual=True)
|
||||
for word in parsed_dep["words"]:
|
||||
assert word["text"] in html
|
||||
assert word["tag"] in html
|
||||
|
||||
|
||||
def test_displacy_render_manual_ent():
|
||||
"""Test displacy.render with manual data for ent style"""
|
||||
parsed_ents = [
|
||||
{
|
||||
"text": "But Google is starting from behind.",
|
||||
"ents": [{"start": 4, "end": 10, "label": "ORG"}],
|
||||
},
|
||||
{
|
||||
"text": "But Google is starting from behind.",
|
||||
"ents": [{"start": -100, "end": 100, "label": "COMPANY"}],
|
||||
"title": "Title",
|
||||
},
|
||||
]
|
||||
|
||||
html = displacy.render(parsed_ents, style="ent", manual=True)
|
||||
for parsed_ent in parsed_ents:
|
||||
assert parsed_ent["ents"][0]["label"] in html
|
||||
if "title" in parsed_ent:
|
||||
assert parsed_ent["title"] in html
|
||||
|
||||
|
||||
def test_displacy_render_manual_span():
|
||||
"""Test displacy.render with manual data for span style"""
|
||||
parsed_spans = [
|
||||
{
|
||||
"text": "Welcome to the Bank of China.",
|
||||
"spans": [
|
||||
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||
],
|
||||
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||
},
|
||||
{
|
||||
"text": "Welcome to the Bank of China.",
|
||||
"spans": [
|
||||
{"start_token": 3, "end_token": 6, "label": "ORG"},
|
||||
{"start_token": 5, "end_token": 6, "label": "GPE"},
|
||||
],
|
||||
"tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."],
|
||||
"title": "Title",
|
||||
},
|
||||
]
|
||||
|
||||
html = displacy.render(parsed_spans, style="span", manual=True)
|
||||
for parsed_span in parsed_spans:
|
||||
assert parsed_span["spans"][0]["label"] in html
|
||||
if "title" in parsed_span:
|
||||
assert parsed_span["title"] in html
|
||||
|
||||
|
||||
def test_displacy_options_case():
|
||||
ents = ["foo", "BAR"]
|
||||
colors = {"FOO": "red", "bar": "green"}
|
||||
|
|
|
@ -343,6 +343,130 @@ use with the `manual=True` argument in `displacy.render`.
|
|||
| `options` | Span-specific visualisation options. ~~Dict[str, Any]~~ |
|
||||
| **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ |
|
||||
|
||||
### Visualizer data structures {id="displacy_structures"}
|
||||
|
||||
You can use displaCy's data format to manually render data. This can be useful
|
||||
if you want to visualize output from other libraries. You can find examples of
|
||||
displaCy's different data formats below.
|
||||
|
||||
> #### DEP example data structure
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "words": [
|
||||
> { "text": "This", "tag": "DT" },
|
||||
> { "text": "is", "tag": "VBZ" },
|
||||
> { "text": "a", "tag": "DT" },
|
||||
> { "text": "sentence", "tag": "NN" }
|
||||
> ],
|
||||
> "arcs": [
|
||||
> { "start": 0, "end": 1, "label": "nsubj", "dir": "left" },
|
||||
> { "start": 2, "end": 3, "label": "det", "dir": "left" },
|
||||
> { "start": 1, "end": 3, "label": "attr", "dir": "right" }
|
||||
> ]
|
||||
> }
|
||||
> ```
|
||||
|
||||
#### Dependency Visualizer data structure {id="structure-dep"}
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------------------------------------- |
|
||||
| `words` | List of dictionaries describing a word token (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| `arcs` | List of dictionaries describing the relations between words (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| _Optional_ | |
|
||||
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||
| `settings` | Dependency Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||
|
||||
<Accordion title="Words data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ---------------------------------------- |
|
||||
| `text` | Text content of the word. ~~str~~ |
|
||||
| `tag` | Fine-grained part-of-speech. ~~str~~ |
|
||||
| `lemma` | Base form of the word. ~~Optional[str]~~ |
|
||||
|
||||
</Accordion>
|
||||
|
||||
<Accordion title="Arcs data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ---------------------------------------------------- |
|
||||
| `start` | The index of the starting token. ~~int~~ |
|
||||
| `end` | The index of the ending token. ~~int~~ |
|
||||
| `label` | The type of dependency relation. ~~str~~ |
|
||||
| `dir` | Direction of the relation (`left`, `right`). ~~str~~ |
|
||||
|
||||
</Accordion>
|
||||
|
||||
> #### ENT example data structure
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "text": "But Google is starting from behind.",
|
||||
> "ents": [{ "start": 4, "end": 10, "label": "ORG" }]
|
||||
> }
|
||||
> ```
|
||||
|
||||
#### Named Entity Recognition data structure {id="structure-ent"}
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ------------------------------------------------------------------------------------------- |
|
||||
| `text` | String representation of the document text. ~~str~~ |
|
||||
| `ents` | List of dictionaries describing entities (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| _Optional_ | |
|
||||
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||
| `settings` | Entity Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||
|
||||
<Accordion title="Ents data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ---------------------------------------------------------------------- |
|
||||
| `start` | The index of the first character of the entity. ~~int~~ |
|
||||
| `end` | The index of the last character of the entity. (not inclusive) ~~int~~ |
|
||||
| `label` | Label attached to the entity. ~~str~~ |
|
||||
| _Optional_ | |
|
||||
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||
|
||||
</Accordion>
|
||||
|
||||
> #### SPAN example data structure
|
||||
>
|
||||
> ```json
|
||||
> {
|
||||
> "text": "Welcome to the Bank of China.",
|
||||
> "spans": [
|
||||
> { "start_token": 3, "end_token": 6, "label": "ORG" },
|
||||
> { "start_token": 5, "end_token": 6, "label": "GPE" }
|
||||
> ],
|
||||
> "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."]
|
||||
> }
|
||||
> ```
|
||||
|
||||
#### Span Classification data structure {id="structure-span"}
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ----------------------------------------------------------------------------------------- |
|
||||
| `text` | String representation of the document text. ~~str~~ |
|
||||
| `spans` | List of dictionaries describing spans (see structure below). ~~List[Dict[str, Any]]~~ |
|
||||
| `tokens` | List of word tokens. ~~List[str]~~ |
|
||||
| _Optional_ | |
|
||||
| `title` | Title of the visualization. ~~Optional[str]~~ |
|
||||
| `settings` | Span Visualizer options (see [here](/api/top-level#displacy_options)). ~~Dict[str, Any]~~ |
|
||||
|
||||
<Accordion title="Spans data structure">
|
||||
|
||||
| Dictionary Key | Description |
|
||||
| -------------- | ------------------------------------------------------------- |
|
||||
| `start_token` | The index of the first token of the span in `tokens`. ~~int~~ |
|
||||
| `end_token` | The index of the last token of the span in `tokens`. ~~int~~ |
|
||||
| `label` | Label attached to the span. ~~str~~ |
|
||||
| _Optional_ | |
|
||||
| `kb_id` | `KnowledgeBase` ID. ~~str~~ |
|
||||
| `kb_url` | `KnowledgeBase` URL. ~~str~~ |
|
||||
|
||||
</Accordion>
|
||||
|
||||
### Visualizer options {id="displacy_options"}
|
||||
|
||||
The `options` argument lets you specify additional settings for each visualizer.
|
||||
|
|
|
@ -349,7 +349,8 @@ or
|
|||
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
|
||||
If you set `manual=True` on either `render()` or `serve()`, you can pass in data
|
||||
in displaCy's format as a dictionary (instead of `Doc` objects). There are
|
||||
helper functions for converting `Doc` objects to displaCy's format for use with
|
||||
helper functions for converting `Doc` objects to
|
||||
[displaCy's format](/api/top-level#displacy_structures) for use with
|
||||
`manual=True`: [`displacy.parse_deps`](/api/top-level#displacy.parse_deps),
|
||||
[`displacy.parse_ents`](/api/top-level#displacy.parse_ents), and
|
||||
[`displacy.parse_spans`](/api/top-level#displacy.parse_spans).
|
||||
|
|
Loading…
Reference in New Issue
Block a user