displaCy: Avoid increasing levels for identical arcs (#10639)

* Test for arc levels for identical arcs

Also moves the test in order with the other numbered tests.

* displaCy: filter identical arcs

Avoid increased levels due to identical arcs by first
filtering any identical arcs.

* Sort keys before filtering

Manual entry with keys out of order would previously become
different tuples and therefore not filtered correctly.

Co-authored-by: Joachim Fainberg <joachimfainberg@Joachims-MBP.lan>
This commit is contained in:
Joachim Fainberg 2022-04-14 16:48:00 +02:00 committed by GitHub
parent e63a5d4888
commit 4e1716223c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 20 deletions

View File

@ -397,6 +397,7 @@ class DependencyRenderer:
args (list): Individual arcs and their start, end, direction and label.
RETURNS (dict): Arc levels keyed by (start, end, label).
"""
arcs = [dict(t) for t in {tuple(sorted(arc.items())) for arc in arcs}]
length = max([arc["end"] for arc in arcs], default=0)
max_level = [0] * length
levels = {}

View File

@ -8,26 +8,6 @@ from spacy.lang.fa import Persian
from spacy.tokens import Span, Doc
@pytest.mark.issue(5447)
def test_issue5447():
"""Test that overlapping arcs get separate levels."""
renderer = DependencyRenderer()
words = [
{"text": "This", "tag": "DT"},
{"text": "is", "tag": "VBZ"},
{"text": "a", "tag": "DT"},
{"text": "sentence.", "tag": "NN"},
]
arcs = [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 2, "end": 3, "label": "overlap", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "left"},
]
html = renderer.render([{"words": words, "arcs": arcs}])
assert renderer.highest_level == 3
@pytest.mark.issue(2361)
def test_issue2361(de_vocab):
"""Test if < is escaped when rendering"""
@ -103,6 +83,27 @@ def test_issue3882(en_vocab):
displacy.parse_deps(doc)
@pytest.mark.issue(5447)
def test_issue5447():
"""Test that overlapping arcs get separate levels, unless they're identical."""
renderer = DependencyRenderer()
words = [
{"text": "This", "tag": "DT"},
{"text": "is", "tag": "VBZ"},
{"text": "a", "tag": "DT"},
{"text": "sentence.", "tag": "NN"},
]
arcs = [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 2, "end": 3, "label": "overlap", "dir": "left"},
{"end": 3, "label": "overlap", "start": 2, "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "left"},
]
renderer.render([{"words": words, "arcs": arcs}])
assert renderer.highest_level == 3
@pytest.mark.issue(5838)
def test_issue5838():
# Displacy's EntityRenderer break line