Allow passing a Span to displacy.parse_deps

This commit is contained in:
BLKSerene 2023-03-28 13:56:23 +08:00
parent dba4e7bece
commit 57ef7ea361
2 changed files with 20 additions and 2 deletions

View File

@ -125,13 +125,17 @@ def app(environ, start_response):
return [res]
def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
def parse_deps(
orig_doc: Union[Doc, Span], options: Dict[str, Any] = {}
) -> Dict[str, Any]:
"""Generate dependency parse in {'words': [], 'arcs': []} format.
orig_doc (Doc): Document to parse.
orig_doc Union[Doc, Span]: Document to parse.
options (Dict[str, Any]): Dependency parse specific visualisation options.
RETURNS (dict): Generated dependency parse keyed by words and arcs.
"""
if isinstance(orig_doc, Span):
orig_doc = orig_doc.as_doc()
doc = Doc(orig_doc.vocab).from_bytes(
orig_doc.to_bytes(exclude=["user_data", "user_hooks"])
)

View File

@ -275,6 +275,20 @@ def test_displacy_parse_deps(en_vocab):
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
]
# Test that displacy.parse_deps converts Span to Doc
deps = displacy.parse_deps(doc[:])
assert isinstance(deps, dict)
assert deps["words"] == [
{"lemma": None, "text": words[0], "tag": pos[0]},
{"lemma": None, "text": words[1], "tag": pos[1]},
{"lemma": None, "text": words[2], "tag": pos[2]},
{"lemma": None, "text": words[3], "tag": pos[3]},
]
assert deps["arcs"] == [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
]
def test_displacy_invalid_arcs():