Tidy up displaCy and add "manual" option

Also don't require title in EntityRenderer
This commit is contained in:
ines 2017-05-22 18:48:20 +02:00
parent aa9c3bd464
commit a23f487b06
4 changed files with 43 additions and 30 deletions

View File

@ -10,27 +10,28 @@ _html = {}
IS_JUPYTER = is_in_jupyter() IS_JUPYTER = is_in_jupyter()
def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, options={}): def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER,
options={}, manual=False):
"""Render displaCy visualisation. """Render displaCy visualisation.
docs (list or Doc): Document(s) to visualise. docs (list or Doc): Document(s) to visualise.
style (unicode): Visualisation style, 'dep' or 'ent'. style (unicode): Visualisation style, 'dep' or 'ent'.
page (bool): Render markup as full HTML page. page (bool): Render markup as full HTML page.
minify (bool): Minify HTML markup. minify (bool): Minify HTML markup.
jupyter (bool): Experimental, use Jupyter's display() to output markup. jupyter (bool): Experimental, use Jupyter's `display()` to output markup.
options (dict): Visualiser-specific options, e.g. colors. options (dict): Visualiser-specific options, e.g. colors.
manual (bool): Don't parse `Doc` and instead, expect a dict or list of dicts.
RETURNS (unicode): Rendered HTML markup. RETURNS (unicode): Rendered HTML markup.
""" """
if isinstance(docs, Doc): factories = {'dep': (DependencyRenderer, parse_deps),
docs = [docs] 'ent': (EntityRenderer, parse_ents)}
if style == 'dep': if style not in factories:
renderer = DependencyRenderer(options=options)
parsed = [parse_deps(doc, options) for doc in docs]
elif style == 'ent':
renderer = EntityRenderer(options=options)
parsed = [parse_ents(doc, options) for doc in docs]
else:
raise ValueError("Unknown style: %s" % style) raise ValueError("Unknown style: %s" % style)
if isinstance(docs, Doc) or isinstance(docs, dict):
docs = [docs]
renderer, converter = factories[style]
renderer = renderer(options=options)
parsed = [converter(doc, options) for doc in docs] if not manual else docs
_html['parsed'] = renderer.render(parsed, page=page, minify=minify).strip() _html['parsed'] = renderer.render(parsed, page=page, minify=minify).strip()
html = _html['parsed'] html = _html['parsed']
if jupyter: # return HTML rendered by IPython display() if jupyter: # return HTML rendered by IPython display()
@ -39,7 +40,8 @@ def render(docs, style='dep', page=False, minify=False, jupyter=IS_JUPYTER, opti
return html return html
def serve(docs, style='dep', page=True, minify=False, options={}, port=5000): def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
port=5000):
"""Serve displaCy visualisation. """Serve displaCy visualisation.
docs (list or Doc): Document(s) to visualise. docs (list or Doc): Document(s) to visualise.
@ -47,10 +49,11 @@ def serve(docs, style='dep', page=True, minify=False, options={}, port=5000):
page (bool): Render markup as full HTML page. page (bool): Render markup as full HTML page.
minify (bool): Minify HTML markup. minify (bool): Minify HTML markup.
options (dict): Visualiser-specific options, e.g. colors. options (dict): Visualiser-specific options, e.g. colors.
manual (bool): Don't parse `Doc` and instead, expect a dict or list of dicts.
port (int): Port to serve visualisation. port (int): Port to serve visualisation.
""" """
from wsgiref import simple_server from wsgiref import simple_server
render(docs, style=style, page=page, minify=minify, options=options) render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
httpd = simple_server.make_server('0.0.0.0', port, app) httpd = simple_server.make_server('0.0.0.0', port, app)
prints("Using the '%s' visualizer" % style, title="Serving on port %d..." % port) prints("Using the '%s' visualizer" % style, title="Serving on port %d..." % port)
httpd.serve_forever() httpd.serve_forever()

View File

@ -175,7 +175,7 @@ class EntityRenderer(object):
minify (bool): Minify HTML markup. minify (bool): Minify HTML markup.
RETURNS (unicode): Rendered HTML markup. RETURNS (unicode): Rendered HTML markup.
""" """
rendered = [self.render_ents(p['text'], p['ents'], p['title']) for p in parsed] rendered = [self.render_ents(p['text'], p['ents'], p.get('title', None)) for p in parsed]
if page: if page:
docs = ''.join([TPL_FIGURE.format(content=doc) for doc in rendered]) docs = ''.join([TPL_FIGURE.format(content=doc) for doc in rendered])
markup = TPL_PAGE.format(content=docs) markup = TPL_PAGE.format(content=docs)

View File

@ -54,6 +54,15 @@ p
+cell #[+a("#options") Visualizer-specific options], e.g. colors. +cell #[+a("#options") Visualizer-specific options], e.g. colors.
+cell #[code {}] +cell #[code {}]
+row
+cell #[code manual]
+cell bool
+cell
| Don't parse #[code Doc] and instead, expect a dict or list of
| dicts. #[+a("/docs/usage/visualizers#manual-usage") See here]
| for formats and examples.
+cell #[code False]
+row +row
+cell #[code port] +cell #[code port]
+cell int +cell int
@ -111,6 +120,15 @@ p Render a dependency parse tree or named entity visualization.
+cell #[+a("#options") Visualizer-specific options], e.g. colors. +cell #[+a("#options") Visualizer-specific options], e.g. colors.
+cell #[code {}] +cell #[code {}]
+row
+cell #[code manual]
+cell bool
+cell
| Don't parse #[code Doc] and instead, expect a dict or list of
| dicts. #[+a("/docs/usage/visualizers#manual-usage") See here]
| for formats and examples.
+cell #[code False]
+footrow +footrow
+cell returns +cell returns
+cell unicode +cell unicode

View File

@ -287,24 +287,17 @@ p
| #[+a("http://www.nltk.org") NLTK] or | #[+a("http://www.nltk.org") NLTK] or
| #[+a("https://github.com/tensorflow/models/tree/master/syntaxnet") SyntaxNet]. | #[+a("https://github.com/tensorflow/models/tree/master/syntaxnet") SyntaxNet].
| Simply convert the dependency parse or recognised entities to displaCy's | Simply convert the dependency parse or recognised entities to displaCy's
| format and import #[code DependencyRenderer] or #[code EntityRenderer] | format and set #[code manual=True] on either #[code render()] or
| from #[code spacy.displacy.render]. A renderer class can be is initialised | #[code serve()].
| with a dictionary of options. To generate the visualization markup, call
| the renderer's #[code render()] method on a list of dictionaries (one
| per visualization).
+aside-code("Example"). +aside-code("Example").
from spacy.displacy.render import EntityRenderer
ex = [{'text': 'But Google is starting from behind.', ex = [{'text': 'But Google is starting from behind.',
'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}],
'title': None}] 'title': None}]
renderer = EntityRenderer() html = displacy.render(ex, style='ent', manual=True)
html = renderer.render(ex)
+code("DependencyRenderer input"). +code("DEP input").
[{ {
'words': [ 'words': [
{'text': 'This', 'tag': 'DT'}, {'text': 'This', 'tag': 'DT'},
{'text': 'is', 'tag': 'VBZ'}, {'text': 'is', 'tag': 'VBZ'},
@ -314,11 +307,10 @@ p
{'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'}, {'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'},
{'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'}, {'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'},
{'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}] {'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}]
}] }
+code("EntityRenderer input"). +code("ENT input").
[{ {
'text': 'But Google is starting from behind.', 'text': 'But Google is starting from behind.',
'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}],
'title': None 'title': None
}]