From db7b9938a40830f95f3674c00f122f90805b4f5a Mon Sep 17 00:00:00 2001 From: Peter Baumgartner <5107405+pmbaumgartner@users.noreply.github.com> Date: Tue, 16 Aug 2022 11:23:34 -0400 Subject: [PATCH] Docs: displaCy documentation - data types, `parse_{deps,ents,spans}`, spans example (#10950) * add in spans example and parse references * rm autoformatter * rm extra ents copy * TypedDict draft * type fixes * restore non-documentation files * docs update * fix spans example * fix hyperlinks * add parse example * example fix + argument fix * fix api arg in docs * fix bad variable replacement * fix spacing in style Co-authored-by: Sofie Van Landeghem * fix spacing on table * fix spacing on table * rm temp files Co-authored-by: Sofie Van Landeghem --- spacy/displacy/__init__.py | 5 ++- website/docs/api/top-level.md | 71 ++++++++++++++++++++++++++++++- website/docs/usage/visualizers.md | 39 ++++++++++++++--- 3 files changed, 104 insertions(+), 11 deletions(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 5d49b6eb7..7bb300afa 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -123,7 +123,8 @@ def app(environ, start_response): def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]: """Generate dependency parse in {'words': [], 'arcs': []} format. - doc (Doc): Document do parse. + orig_doc (Doc): Document to parse. + options (Dict[str, Any]): Dependency parse specific visualisation options. RETURNS (dict): Generated dependency parse keyed by words and arcs. """ doc = Doc(orig_doc.vocab).from_bytes( @@ -209,7 +210,7 @@ def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]: def parse_spans(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]: - """Generate spans in [{start: i, end: i, label: 'label'}] format. + """Generate spans in [{start_token: i, end_token: i, label: 'label'}] format. doc (Doc): Document to parse. options (Dict[str, any]): Span-specific visualisation options. diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index c96c571e9..1e1925442 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -240,7 +240,7 @@ browser. Will run a simple web server. | Name | Description | | --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ | -| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | +| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | | `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | | `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | | `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ | @@ -265,7 +265,7 @@ Render a dependency parse tree or named entity visualization. | Name | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span, dict]], Doc, Span, dict]~~ | -| `style` | Visualization style,`"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | +| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | | `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | | `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | | `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ | @@ -273,6 +273,73 @@ Render a dependency parse tree or named entity visualization. | `jupyter` | Explicitly enable or disable "[Jupyter](http://jupyter.org/) mode" to return markup ready to be rendered in a notebook. Detected automatically if `None` (default). ~~Optional[bool]~~ | | **RETURNS** | The rendered HTML markup. ~~str~~ | +### displacy.parse_deps {#displacy.parse_deps tag="method" new="2"} + +Generate dependency parse in `{'words': [], 'arcs': []}` format. +For use with the `manual=True` argument in `displacy.render`. + +> #### Example +> +> ```python +> import spacy +> from spacy import displacy +> nlp = spacy.load("en_core_web_sm") +> doc = nlp("This is a sentence.") +> deps_parse = displacy.parse_deps(doc) +> html = displacy.render(deps_parse, style="dep", manual=True) +> ``` + +| Name | Description | +| ----------- | ------------------------------------------------------------------- | +| `orig_doc` | Doc to parse dependencies. ~~Doc~~ | +| `options` | Dependency parse specific visualisation options. ~~Dict[str, Any]~~ | +| **RETURNS** | Generated dependency parse keyed by words and arcs. ~~dict~~ | + +### displacy.parse_ents {#displacy.parse_ents tag="method" new="2"} + +Generate named entities in `[{start: i, end: i, label: 'label'}]` format. +For use with the `manual=True` argument in `displacy.render`. + +> #### Example +> +> ```python +> import spacy +> from spacy import displacy +> nlp = spacy.load("en_core_web_sm") +> doc = nlp("But Google is starting from behind.") +> ents_parse = displacy.parse_ents(doc) +> html = displacy.render(ents_parse, style="ent", manual=True) +> ``` + +| Name | Description | +| ----------- | ------------------------------------------------------------------- | +| `doc` | Doc to parse entities. ~~Doc~~ | +| `options` | NER-specific visualisation options. ~~Dict[str, Any]~~ | +| **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ | + +### displacy.parse_spans {#displacy.parse_spans tag="method" new="2"} + +Generate spans in `[{start_token: i, end_token: i, label: 'label'}]` format. +For use with the `manual=True` argument in `displacy.render`. + +> #### Example +> +> ```python +> import spacy +> from spacy import displacy +> nlp = spacy.load("en_core_web_sm") +> doc = nlp("But Google is starting from behind.") +> doc.spans['orgs'] = [doc[1:2]] +> ents_parse = displacy.parse_spans(doc, options={"spans_key" : "orgs"}) +> html = displacy.render(ents_parse, style="span", manual=True) +> ``` + +| Name | Description | +| ----------- | ------------------------------------------------------------------- | +| `doc` | Doc to parse entities. ~~Doc~~ | +| `options` | Span-specific visualisation options. ~~Dict[str, Any]~~ | +| **RETURNS** | Generated entities keyed by text (original text) and ents. ~~dict~~ | + ### Visualizer options {#displacy_options} The `options` argument lets you specify additional settings for each visualizer. diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md index d2892b863..da847d939 100644 --- a/website/docs/usage/visualizers.md +++ b/website/docs/usage/visualizers.md @@ -198,12 +198,12 @@ import DisplacySpanHtml from 'images/displacy-span.html' The span visualizer lets you customize the following `options`: -| Argument | Description | -|-----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------| -| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ | +| Argument | Description | +| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `spans_key` | Which spans key to render spans from. Default is `"sc"`. ~~str~~ | | `templates` | Dictionary containing the keys `"span"`, `"slice"`, and `"start"`. These dictate how the overall span, a span slice, and the starting token will be rendered. ~~Optional[Dict[str, str]~~ | -| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ | -| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ | +| `kb_url_template` | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in ~~Optional[str]~~ | +| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ | Because spans can be stored across different keys in `doc.spans`, you need to specify which one displaCy should use with `spans_key` (`sc` is the default). @@ -343,9 +343,21 @@ want to visualize output from other libraries, like [NLTK](http://www.nltk.org) or [SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet). If you set `manual=True` on either `render()` or `serve()`, you can pass in data -in displaCy's format as a dictionary (instead of `Doc` objects). +in displaCy's format as a dictionary (instead of `Doc` objects). There are helper +functions for converting `Doc` objects to displaCy's format for use with `manual=True`: +[`displacy.parse_deps`](/api/top-level#displacy.parse_deps), +[`displacy.parse_ents`](/api/top-level#displacy.parse_ents), +and [`displacy.parse_spans`](/api/top-level#displacy.parse_spans). -> #### Example +> #### Example with parse function +> +> ```python +> doc = nlp("But Google is starting from behind.") +> ex = displacy.parse_ents(doc) +> html = displacy.render(ex, style="ent", manual=True) +> ``` + +> #### Example with raw data > > ```python > ex = [{"text": "But Google is starting from behind.", @@ -354,6 +366,7 @@ in displaCy's format as a dictionary (instead of `Doc` objects). > html = displacy.render(ex, style="ent", manual=True) > ``` + ```python ### DEP input { @@ -389,6 +402,18 @@ in displaCy's format as a dictionary (instead of `Doc` objects). } ``` +```python +### SPANS input +{ + "text": "Welcome to the Bank of China.", + "spans": [ + {"start_token": 3, "end_token": 6, "label": "ORG"}, + {"start_token": 5, "end_token": 6, "label": "GPE"}, + ], + "tokens": ["Welcome", "to", "the", "Bank", "of", "China", "."], +} +``` + ## Using displaCy in a web application {#webapp} If you want to use the visualizers as part of a web application, for example to