From ce258670b7188f253f81c51129317ae3430f27bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ye=20Lei=20=28=E5=8F=B6=E7=A3=8A=29?= Date: Fri, 31 Mar 2023 15:44:01 +0800 Subject: [PATCH] Allow passing a Span to displacy.parse_deps (#12477) * Allow passing a Span to displacy.parse_deps * Update docstring Co-authored-by: Adriane Boyd * Update API docs --------- Co-authored-by: Adriane Boyd --- spacy/displacy/__init__.py | 8 ++++++-- spacy/tests/test_displacy.py | 14 ++++++++++++++ website/docs/api/top-level.mdx | 2 +- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index ea6bba2c9..f42dad0c9 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -125,13 +125,17 @@ def app(environ, start_response): return [res] -def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]: +def parse_deps( + orig_doc: Union[Doc, Span], options: Dict[str, Any] = {} +) -> Dict[str, Any]: """Generate dependency parse in {'words': [], 'arcs': []} format. - orig_doc (Doc): Document to parse. + orig_doc (Union[Doc, Span]): Document to parse. options (Dict[str, Any]): Dependency parse specific visualisation options. RETURNS (dict): Generated dependency parse keyed by words and arcs. """ + if isinstance(orig_doc, Span): + orig_doc = orig_doc.as_doc() doc = Doc(orig_doc.vocab).from_bytes( orig_doc.to_bytes(exclude=["user_data", "user_hooks"]) ) diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py index f298b38e0..837a92e02 100644 --- a/spacy/tests/test_displacy.py +++ b/spacy/tests/test_displacy.py @@ -275,6 +275,20 @@ def test_displacy_parse_deps(en_vocab): {"start": 2, "end": 3, "label": "det", "dir": "left"}, {"start": 1, "end": 3, "label": "attr", "dir": "right"}, ] + # Test that displacy.parse_deps converts Span to Doc + deps = displacy.parse_deps(doc[:]) + assert isinstance(deps, dict) + assert deps["words"] == [ + {"lemma": None, "text": words[0], "tag": pos[0]}, + {"lemma": None, "text": words[1], "tag": pos[1]}, + {"lemma": None, "text": words[2], "tag": pos[2]}, + {"lemma": None, "text": words[3], "tag": pos[3]}, + ] + assert deps["arcs"] == [ + {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, + {"start": 2, "end": 3, "label": "det", "dir": "left"}, + {"start": 1, "end": 3, "label": "attr", "dir": "right"}, + ] def test_displacy_invalid_arcs(): diff --git a/website/docs/api/top-level.mdx b/website/docs/api/top-level.mdx index d0851a59f..9193b2a7b 100644 --- a/website/docs/api/top-level.mdx +++ b/website/docs/api/top-level.mdx @@ -291,7 +291,7 @@ the `manual=True` argument in `displacy.render`. | Name | Description | | ----------- | ------------------------------------------------------------------- | -| `orig_doc` | Doc to parse dependencies. ~~Doc~~ | +| `orig_doc` | Doc or span to parse dependencies. ~~Union[Doc, Span]~~ | | `options` | Dependency parse specific visualisation options. ~~Dict[str, Any]~~ | | **RETURNS** | Generated dependency parse keyed by words and arcs. ~~dict~~ |