mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						f6ac00fab3
					
				| 
						 | 
					@ -10,6 +10,7 @@ from ..util import is_in_jupyter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_html = {}
 | 
					_html = {}
 | 
				
			||||||
IS_JUPYTER = is_in_jupyter()
 | 
					IS_JUPYTER = is_in_jupyter()
 | 
				
			||||||
 | 
					RENDER_WRAPPER = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def render(
 | 
					def render(
 | 
				
			||||||
| 
						 | 
					@ -48,6 +49,8 @@ def render(
 | 
				
			||||||
    parsed = [converter(doc, options) for doc in docs] if not manual else docs
 | 
					    parsed = [converter(doc, options) for doc in docs] if not manual else docs
 | 
				
			||||||
    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()
 | 
					    _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip()
 | 
				
			||||||
    html = _html["parsed"]
 | 
					    html = _html["parsed"]
 | 
				
			||||||
 | 
					    if RENDER_WRAPPER is not None:
 | 
				
			||||||
 | 
					        html = RENDER_WRAPPER(html)
 | 
				
			||||||
    if jupyter:  # return HTML rendered by IPython display()
 | 
					    if jupyter:  # return HTML rendered by IPython display()
 | 
				
			||||||
        from IPython.core.display import display, HTML
 | 
					        from IPython.core.display import display, HTML
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -56,7 +59,14 @@ def render(
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def serve(
 | 
					def serve(
 | 
				
			||||||
    docs, style="dep", page=True, minify=False, options={}, manual=False, port=5000
 | 
					    docs,
 | 
				
			||||||
 | 
					    style="dep",
 | 
				
			||||||
 | 
					    page=True,
 | 
				
			||||||
 | 
					    minify=False,
 | 
				
			||||||
 | 
					    options={},
 | 
				
			||||||
 | 
					    manual=False,
 | 
				
			||||||
 | 
					    port=5000,
 | 
				
			||||||
 | 
					    host="0.0.0.0",
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """Serve displaCy visualisation.
 | 
					    """Serve displaCy visualisation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -67,13 +77,17 @@ def serve(
 | 
				
			||||||
    options (dict): Visualiser-specific options, e.g. colors.
 | 
					    options (dict): Visualiser-specific options, e.g. colors.
 | 
				
			||||||
    manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
 | 
					    manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts.
 | 
				
			||||||
    port (int): Port to serve visualisation.
 | 
					    port (int): Port to serve visualisation.
 | 
				
			||||||
 | 
					    host (unicode): Host to serve visualisation.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    from wsgiref import simple_server
 | 
					    from wsgiref import simple_server
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if IS_JUPYTER:
 | 
				
			||||||
 | 
					        user_warning(Warnings.W011)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
 | 
					    render(docs, style=style, page=page, minify=minify, options=options, manual=manual)
 | 
				
			||||||
    httpd = simple_server.make_server("0.0.0.0", port, app)
 | 
					    httpd = simple_server.make_server(host, port, app)
 | 
				
			||||||
    print("\nUsing the '{}' visualizer".format(style))
 | 
					    print("\nUsing the '{}' visualizer".format(style))
 | 
				
			||||||
    print("Serving on port {}...\n".format(port))
 | 
					    print("Serving on http://{}:{} ...\n".format(host, port))
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        httpd.serve_forever()
 | 
					        httpd.serve_forever()
 | 
				
			||||||
    except KeyboardInterrupt:
 | 
					    except KeyboardInterrupt:
 | 
				
			||||||
| 
						 | 
					@ -153,3 +167,20 @@ def parse_ents(doc, options={}):
 | 
				
			||||||
        user_warning(Warnings.W006)
 | 
					        user_warning(Warnings.W006)
 | 
				
			||||||
    title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
 | 
					    title = doc.user_data.get("title", None) if hasattr(doc, "user_data") else None
 | 
				
			||||||
    return {"text": doc.text, "ents": ents, "title": title}
 | 
					    return {"text": doc.text, "ents": ents, "title": title}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def set_render_wrapper(func):
 | 
				
			||||||
 | 
					    """Set an optional wrapper function that is called around the generated
 | 
				
			||||||
 | 
					    HTML markup on displacy.render. This can be used to allow integration into
 | 
				
			||||||
 | 
					    other platforms, similar to Jupyter Notebooks that require functions to be
 | 
				
			||||||
 | 
					    called around the HTML. It can also be used to implement custom callbacks
 | 
				
			||||||
 | 
					    on render, or to embed the visualization in a custom page.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    func (callable): Function to call around markup before rendering it. Needs
 | 
				
			||||||
 | 
					        to take one argument, the HTML markup, and should return the desired
 | 
				
			||||||
 | 
					        output of displacy.render.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    global RENDER_WRAPPER
 | 
				
			||||||
 | 
					    if not hasattr(func, "__call__"):
 | 
				
			||||||
 | 
					        raise ValueError(Errors.E110.format(obj=type(func)))
 | 
				
			||||||
 | 
					    RENDER_WRAPPER = func
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -54,6 +54,12 @@ class Warnings(object):
 | 
				
			||||||
            "package overwrites built-in factory.")
 | 
					            "package overwrites built-in factory.")
 | 
				
			||||||
    W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length "
 | 
					    W010 = ("As of v2.1.0, the PhraseMatcher doesn't have a phrase length "
 | 
				
			||||||
            "limit anymore, so the max_length argument is now deprecated.")
 | 
					            "limit anymore, so the max_length argument is now deprecated.")
 | 
				
			||||||
 | 
					    W011 = ("It looks like you're calling displacy.serve from within a "
 | 
				
			||||||
 | 
					            "Jupyter notebook or a similar environment. This likely means "
 | 
				
			||||||
 | 
					            "you're already running a local web server, so there's no need to "
 | 
				
			||||||
 | 
					            "make displaCy start another one. Instead, you should be able to "
 | 
				
			||||||
 | 
					            "replace displacy.serve with displacy.render to show the "
 | 
				
			||||||
 | 
					            "visualization.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@add_codes
 | 
					@add_codes
 | 
				
			||||||
| 
						 | 
					@ -289,6 +295,7 @@ class Errors(object):
 | 
				
			||||||
            "thing. For example, use `nlp.create_pipeline('sentencizer')`")
 | 
					            "thing. For example, use `nlp.create_pipeline('sentencizer')`")
 | 
				
			||||||
    E109 = ("Model for component '{name}' not initialized. Did you forget to load "
 | 
					    E109 = ("Model for component '{name}' not initialized. Did you forget to load "
 | 
				
			||||||
            "a model, or forget to call begin_training()?")
 | 
					            "a model, or forget to call begin_training()?")
 | 
				
			||||||
 | 
					    E110 = ("Invalid displaCy render wrapper. Expected callable, got: {obj}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@add_codes
 | 
					@add_codes
 | 
				
			||||||
| 
						 | 
					@ -358,8 +365,12 @@ def _warn(message, warn_type="user"):
 | 
				
			||||||
    message (unicode): The message to display.
 | 
					    message (unicode): The message to display.
 | 
				
			||||||
    category (Warning): The Warning to show.
 | 
					    category (Warning): The Warning to show.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    w_id = message.split("[", 1)[1].split("]", 1)[0]  # get ID from string
 | 
					    if message.startswith("["):
 | 
				
			||||||
    if warn_type in SPACY_WARNING_TYPES and w_id not in SPACY_WARNING_IGNORE:
 | 
					        w_id = message.split("[", 1)[1].split("]", 1)[0]  # get ID from string
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        w_id = None
 | 
				
			||||||
 | 
					    ignore_warning = w_id and w_id in SPACY_WARNING_IGNORE
 | 
				
			||||||
 | 
					    if warn_type in SPACY_WARNING_TYPES and not ignore_warning:
 | 
				
			||||||
        category = WARNINGS[warn_type]
 | 
					        category = WARNINGS[warn_type]
 | 
				
			||||||
        stack = inspect.stack()[-1]
 | 
					        stack = inspect.stack()[-1]
 | 
				
			||||||
        with warnings.catch_warnings():
 | 
					        with warnings.catch_warnings():
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -72,6 +72,20 @@ def test_displacy_spans(en_vocab):
 | 
				
			||||||
    assert html.startswith("<div")
 | 
					    assert html.startswith("<div")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_displacy_render_wrapper(en_vocab):
 | 
				
			||||||
 | 
					    """Test that displaCy accepts custom rendering wrapper."""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def wrapper(html):
 | 
				
			||||||
 | 
					        return "TEST" + html + "TEST"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    displacy.set_render_wrapper(wrapper)
 | 
				
			||||||
 | 
					    doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
 | 
				
			||||||
 | 
					    doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
 | 
				
			||||||
 | 
					    html = displacy.render(doc, style="ent")
 | 
				
			||||||
 | 
					    assert html.startswith("TEST<div")
 | 
				
			||||||
 | 
					    assert html.endswith("/div>TEST")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_displacy_raises_for_wrong_type(en_vocab):
 | 
					def test_displacy_raises_for_wrong_type(en_vocab):
 | 
				
			||||||
    with pytest.raises(ValueError):
 | 
					    with pytest.raises(ValueError):
 | 
				
			||||||
        displacy.render("hello world")
 | 
					        displacy.render("hello world")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -236,12 +236,13 @@ def is_in_jupyter():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    RETURNS (bool): True if in Jupyter, False if not.
 | 
					    RETURNS (bool): True if in Jupyter, False if not.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					    # https://stackoverflow.com/a/39662359/6400719
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        cfg = get_ipython().config
 | 
					        shell = get_ipython().__class__.__name__
 | 
				
			||||||
        if cfg["IPKernelApp"]["parent_appname"] == "ipython-notebook":
 | 
					        if shell == "ZMQInteractiveShell":
 | 
				
			||||||
            return True
 | 
					            return True  # Jupyter notebook or qtconsole
 | 
				
			||||||
    except NameError:
 | 
					    except NameError:
 | 
				
			||||||
        return False
 | 
					        return False  # Probably standard Python interpreter
 | 
				
			||||||
    return False
 | 
					    return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,12 @@ p
 | 
				
			||||||
        +cell Port to serve visualization.
 | 
					        +cell Port to serve visualization.
 | 
				
			||||||
        +cell #[code 5000]
 | 
					        +cell #[code 5000]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    +row
 | 
				
			||||||
 | 
					        +cell #[code host]
 | 
				
			||||||
 | 
					        +cell unicode
 | 
				
			||||||
 | 
					        +cell Host to serve visualization.
 | 
				
			||||||
 | 
					        +cell #[code '0.0.0.0']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
+h(3, "displacy.render") displacy.render
 | 
					+h(3, "displacy.render") displacy.render
 | 
				
			||||||
    +tag method
 | 
					    +tag method
 | 
				
			||||||
    +tag-new(2)
 | 
					    +tag-new(2)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user