Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2018-12-20 18:45:44 +01:00
commit f6ac00fab3
5 changed files with 72 additions and 9 deletions

View File

@ -10,6 +10,7 @@ from ..util import is_in_jupyter
_html = {}
IS_JUPYTER = is_in_jupyter()
RENDER_WRAPPER = None
def render(
@ -48,6 +49,8 @@ def render(
parsed = [converter(doc, options) for doc in docs] if not manual else docs
_html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()
html = _html["parsed"]
if RENDER_WRAPPER is not None:
html = RENDER_WRAPPER(html)
if jupyter: # return HTML rendered by IPython display()
from IPython.core.display import display, HTML
@ -56,7 +59,14 @@ def render(
def serve(
docs, style="dep", page=True, minify=False, options={}, manual=False, port=5000
docs,
style="dep",
page=True,
minify=False,
options={},
manual=False,
port=5000,
host="0.0.0.0",
):
"""Serve displaCy visualisation.
@ -67,13 +77,17 @@ def serve(
options (dict): Visualiser-specific options, e.g. colors.
manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
port (int): Port to serve visualisation.
host (unicode): Host to serve visualisation.
"""
from wsgiref import simple_server
if IS_JUPYTER:
user_warning(Warnings.W011)
render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
httpd = simple_server.make_server("0.0.0.0", port, app)
httpd = simple_server.make_server(host, port, app)
print("\nUsing the '{}' visualizer".format(style))
print("Serving on port {}...\n".format(port))
print("Serving on http://{}:{} ...\n".format(host, port))
try:
httpd.serve_forever()
except KeyboardInterrupt:
@ -153,3 +167,20 @@ def parse_ents(doc, options={}):
user_warning(Warnings.W006)
title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
return {"text": doc.text, "ents": ents, "title": title}
def set_render_wrapper(func):
"""Set an optional wrapper function that is called around the generated
HTML markup on displacy.render. This can be used to allow integration into
other platforms, similar to Jupyter Notebooks that require functions to be
called around the HTML. It can also be used to implement custom callbacks
on render, or to embed the visualization in a custom page.
func (callable): Function to call around markup before rendering it. Needs
to take one argument, the HTML markup, and should return the desired
output of displacy.render.
"""
global RENDER_WRAPPER
if not hasattr(func, "__call__"):
raise ValueError(Errors.E110.format(obj=type(func)))
RENDER_WRAPPER = func

View File

@ -54,6 +54,12 @@ class Warnings(object):
"package overwrites built-in factory.")
W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length "
"limit anymore, so the max_length argument is now deprecated.")
W011 = ("It looks like you're calling displacy.serve from within a "
"Jupyter notebook or a similar environment. This likely means "
"you're already running a local web server, so there's no need to "
"make displaCy start another one. Instead, you should be able to "
"replace displacy.serve with displacy.render to show the "
"visualization.")
@add_codes
@ -289,6 +295,7 @@ class Errors(object):
"thing. For example, use `nlp.create_pipeline('sentencizer')`")
E109 = ("Model for component '{name}' not initialized. Did you forget to load "
"a model, or forget to call begin_training()?")
E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}")
@add_codes
@ -358,8 +365,12 @@ def _warn(message, warn_type="user"):
message (unicode): The message to display.
category (Warning): The Warning to show.
"""
w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string
if warn_type in SPACY_WARNING_TYPES and w_id not in SPACY_WARNING_IGNORE:
if message.startswith("["):
w_id = message.split("[", 1)[1].split("]", 1)[0] # get ID from string
else:
w_id = None
ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE
if warn_type in SPACY_WARNING_TYPES and not ignore_warning:
category = WARNINGS[warn_type]
stack = inspect.stack()[-1]
with warnings.catch_warnings():

View File

@ -72,6 +72,20 @@ def test_displacy_spans(en_vocab):
assert html.startswith("<div")
def test_displacy_render_wrapper(en_vocab):
"""Test that displaCy accepts custom rendering wrapper."""
def wrapper(html):
return "TEST" + html + "TEST"
displacy.set_render_wrapper(wrapper)
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
html = displacy.render(doc, style="ent")
assert html.startswith("TEST<div")
assert html.endswith("/div>TEST")
def test_displacy_raises_for_wrong_type(en_vocab):
with pytest.raises(ValueError):
displacy.render("hello world")

View File

@ -236,12 +236,13 @@ def is_in_jupyter():
RETURNS (bool): True if in Jupyter, False if not.
"""
# https://stackoverflow.com/a/39662359/6400719
try:
cfg = get_ipython().config
if cfg["IPKernelApp"]["parent_appname"] == "ipython-notebook":
return True
shell = get_ipython().__class__.__name__
if shell == "ZMQInteractiveShell":
return True # Jupyter notebook or qtconsole
except NameError:
return False
return False # Probably standard Python interpreter
return False

View File

@ -68,6 +68,12 @@ p
+cell Port to serve visualization.
+cell #[code 5000]
+row
+cell #[code host]
+cell unicode
+cell Host to serve visualization.
+cell #[code '0.0.0.0']
+h(3, "displacy.render") displacy.render
+tag method
+tag-new(2)