From eb8bb35c13a5f59826761065e4eeccee69d4c5a7 Mon Sep 17 00:00:00 2001 From: Zhangrp Date: Tue, 10 Jan 2023 14:52:57 +0800 Subject: [PATCH] improve ux for displacy when the serve port is in use (#11948) * check port in use and add itself * check port in use and add itself * Auto switch to nearest available port. * Use bind to check port instead of connect_ex. * Reformat. * Add auto_select_port argument. * update docs for displacy.serve * Update spacy/errors.py Co-authored-by: Paul O'Leary McCann * Update website/docs/api/top-level.md Co-authored-by: Paul O'Leary McCann * Update spacy/errors.py Co-authored-by: Paul O'Leary McCann * Add test using multiprocessing * fix argument name * Increase sleep times Want to rule this out as a cause of test failure * Don't terminate a process that isn't alive * Refactor port finding logic This moves all the port logic into its own util function, which can be tested without having to background a server directly. * Use with for the server This ensures the server is closed correctly. * Pass in the host when checking port availability * Shorten argument name * Update error codes following merge * Add types for arguments, specify docstrings. * Add typing for arguments with default value. * Update docstring to match spaCy format. * Update docstring to match spaCy format. * Fix docs Arg name changed from `auto_select_port` to just `auto_select`. * Revert "Fix docs" This reverts commit 356966fe849660c0c08b670c6aee1aa2af05c1c1. Co-authored-by: zhiiw <1302593554@qq.com> Co-authored-by: Paul O'Leary McCann Co-authored-by: Raphael Mitsch --- spacy/displacy/__init__.py | 9 ++++++- spacy/errors.py | 5 ++++ spacy/tests/test_misc.py | 15 ++++++++++- spacy/util.py | 48 +++++++++++++++++++++++++++++++++++ website/docs/api/top-level.md | 21 +++++++-------- 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 2f2058b8e..a3cfd96dd 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -11,6 +11,7 @@ from .render import DependencyRenderer, EntityRenderer, SpanRenderer from ..tokens import Doc, Span from ..errors import Errors, Warnings from ..util import is_in_jupyter +from ..util import find_available_port _html = {} @@ -82,6 +83,7 @@ def serve( manual: bool = False, port: int = 5000, host: str = "0.0.0.0", + auto_select_port: bool = False, ) -> None: """Serve displaCy visualisation. @@ -93,15 +95,20 @@ def serve( manual (bool): Don't parse `Doc` and instead expect a dict/list of dicts. port (int): Port to serve visualisation. host (str): Host to serve visualisation. + auto_select_port (bool): Automatically select a port if the specified port is in use. DOCS: https://spacy.io/api/top-level#displacy.serve USAGE: https://spacy.io/usage/visualizers """ from wsgiref import simple_server + port = find_available_port(port, host, auto_select_port) + if is_in_jupyter(): warnings.warn(Warnings.W011) - render(docs, style=style, page=page, minify=minify, options=options, manual=manual) + render( + docs, style=style, page=page, minify=minify, options=options, manual=manual + ) httpd = simple_server.make_server(host, port, app) print(f"\nUsing the '{style}' visualizer") print(f"Serving on http://{host}:{port} ...\n") diff --git a/spacy/errors.py b/spacy/errors.py index cd9281e91..498df0320 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -214,6 +214,7 @@ class Warnings(metaclass=ErrorsWithCodes): "is a Cython extension type.") W123 = ("Argument `enable` with value {enable} does not contain all values specified in the config option " "`enabled` ({enabled}). Be aware that this might affect other components in your pipeline.") + W124 = ("{host}:{port} is already in use, using the nearest available port {serve_port} as an alternative.") class Errors(metaclass=ErrorsWithCodes): @@ -963,6 +964,10 @@ class Errors(metaclass=ErrorsWithCodes): "knowledge base, use `InMemoryLookupKB`.") E1047 = ("`find_threshold()` only supports components with a `scorer` attribute.") E1048 = ("Got '{unexpected}' as console progress bar type, but expected one of the following: {expected}") + E1049 = ("No available port found for displaCy on host {host}. Please specify an available port " + "with `displacy.serve(doc, port)`") + E1050 = ("Port {port} is already in use. Please specify an available port with `displacy.serve(doc, port)` " + "or use `auto_switch_port=True` to pick an available port automatically.") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index 1c9b045ac..618f17334 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -8,7 +8,7 @@ from spacy import prefer_gpu, require_gpu, require_cpu from spacy.ml._precomputable_affine import PrecomputableAffine from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding from spacy.util import dot_to_object, SimpleFrozenList, import_file -from spacy.util import to_ternary_int +from spacy.util import to_ternary_int, find_available_port from thinc.api import Config, Optimizer, ConfigValidationError from thinc.api import get_current_ops, set_current_ops, NumpyOps, CupyOps, MPSOps from thinc.compat import has_cupy_gpu, has_torch_mps_gpu @@ -434,3 +434,16 @@ def test_to_ternary_int(): assert to_ternary_int(-10) == -1 assert to_ternary_int("string") == -1 assert to_ternary_int([0, "string"]) == -1 + + +def test_find_available_port(): + host = "0.0.0.0" + port = 5000 + assert find_available_port(port, host) == port, "Port 5000 isn't free" + + from wsgiref.simple_server import make_server, demo_app + + with make_server(host, port, demo_app) as httpd: + with pytest.warns(UserWarning, match="already in use"): + found_port = find_available_port(port, host, auto_select=True) + assert found_port == port + 1, "Didn't find next port" diff --git a/spacy/util.py b/spacy/util.py index 8d211a9a5..8bf8fb1b0 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -31,6 +31,7 @@ import shlex import inspect import pkgutil import logging +import socket try: import cupy.random @@ -1736,3 +1737,50 @@ def all_equal(iterable): (or if the input is an empty sequence), False otherwise.""" g = itertools.groupby(iterable) return next(g, True) and not next(g, False) + + +def _is_port_in_use(port: int, host: str = "localhost") -> bool: + """Check if 'host:port' is in use. Return True if it is, False otherwise. + + port (int): the port to check + host (str): the host to check (default "localhost") + RETURNS (bool): Whether 'host:port' is in use. + """ + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + s.bind((host, port)) + return False + except socket.error: + return True + finally: + s.close() + + +def find_available_port(start: int, host: str, auto_select: bool = False) -> int: + """Given a starting port and a host, handle finding a port. + + If `auto_select` is False, a busy port will raise an error. + + If `auto_select` is True, the next free higher port will be used. + + start (int): the port to start looking from + host (str): the host to find a port on + auto_select (bool): whether to automatically select a new port if the given port is busy (default False) + RETURNS (int): The port to use. + """ + if not _is_port_in_use(start, host): + return start + + port = start + if not auto_select: + raise ValueError(Errors.E1050.format(port=port)) + + while _is_port_in_use(port, host) and port < 65535: + port += 1 + + if port == 65535 and _is_port_in_use(port, host): + raise ValueError(Errors.E1049.format(host=host)) + + # if we get here, the port changed + warnings.warn(Warnings.W124.format(host=host, port=start, serve_port=port)) + return port diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index 6a63e07da..9d3e463d8 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -237,16 +237,17 @@ browser. Will run a simple web server. > displacy.serve([doc1, doc2], style="dep") > ``` -| Name | Description | -| --------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ | -| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | -| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | -| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | -| `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ | -| `manual` | Don't parse `Doc` and instead expect a dict or list of dicts. [See here](/usage/visualizers#manual-usage) for formats and examples. Defaults to `False`. ~~bool~~ | -| `port` | Port to serve visualization. Defaults to `5000`. ~~int~~ | -| `host` | Host to serve visualization. Defaults to `"0.0.0.0"`. ~~str~~ | +| Name | Description | +| ------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ | +| `style` | Visualization style, `"dep"`, `"ent"` or `"span"` 3.3. Defaults to `"dep"`. ~~str~~ | +| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | +| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | +| `options` | [Visualizer-specific options](#displacy_options), e.g. colors. ~~Dict[str, Any]~~ | +| `manual` | Don't parse `Doc` and instead expect a dict or list of dicts. [See here](/usage/visualizers#manual-usage) for formats and examples. Defaults to `False`. ~~bool~~ | +| `port` | Port to serve visualization. Defaults to `5000`. ~~int~~ | +| `host` | Host to serve visualization. Defaults to `"0.0.0.0"`. ~~str~~ | +| `auto_select_port` | If `True`, automatically switch to a different port if the specified port is already in use. Defaults to `False`. ~~bool~~ | ### displacy.render {#displacy.render tag="method" new="2"}