From 4e1716223c99402efc97698116905c04d39ef6be Mon Sep 17 00:00:00 2001 From: Joachim Fainberg Date: Thu, 14 Apr 2022 16:48:00 +0200 Subject: [PATCH] displaCy: Avoid increasing levels for identical arcs (#10639) * Test for arc levels for identical arcs Also moves the test in order with the other numbered tests. * displaCy: filter identical arcs Avoid increased levels due to identical arcs by first filtering any identical arcs. * Sort keys before filtering Manual entry with keys out of order would previously become different tuples and therefore not filtered correctly. Co-authored-by: Joachim Fainberg --- spacy/displacy/render.py | 1 + spacy/tests/test_displacy.py | 41 ++++++++++++++++++------------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index 8d39e62f5..247ad996b 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -397,6 +397,7 @@ class DependencyRenderer: args (list): Individual arcs and their start, end, direction and label. RETURNS (dict): Arc levels keyed by (start, end, label). """ + arcs = [dict(t) for t in {tuple(sorted(arc.items())) for arc in arcs}] length = max([arc["end"] for arc in arcs], default=0) max_level = [0] * length levels = {} diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py index 95dc47a19..f52c36889 100644 --- a/spacy/tests/test_displacy.py +++ b/spacy/tests/test_displacy.py @@ -8,26 +8,6 @@ from spacy.lang.fa import Persian from spacy.tokens import Span, Doc -@pytest.mark.issue(5447) -def test_issue5447(): - """Test that overlapping arcs get separate levels.""" - renderer = DependencyRenderer() - words = [ - {"text": "This", "tag": "DT"}, - {"text": "is", "tag": "VBZ"}, - {"text": "a", "tag": "DT"}, - {"text": "sentence.", "tag": "NN"}, - ] - arcs = [ - {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, - {"start": 2, "end": 3, "label": "det", "dir": "left"}, - {"start": 2, "end": 3, "label": "overlap", "dir": "left"}, - {"start": 1, "end": 3, "label": "attr", "dir": "left"}, - ] - html = renderer.render([{"words": words, "arcs": arcs}]) - assert renderer.highest_level == 3 - - @pytest.mark.issue(2361) def test_issue2361(de_vocab): """Test if < is escaped when rendering""" @@ -103,6 +83,27 @@ def test_issue3882(en_vocab): displacy.parse_deps(doc) +@pytest.mark.issue(5447) +def test_issue5447(): + """Test that overlapping arcs get separate levels, unless they're identical.""" + renderer = DependencyRenderer() + words = [ + {"text": "This", "tag": "DT"}, + {"text": "is", "tag": "VBZ"}, + {"text": "a", "tag": "DT"}, + {"text": "sentence.", "tag": "NN"}, + ] + arcs = [ + {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, + {"start": 2, "end": 3, "label": "det", "dir": "left"}, + {"start": 2, "end": 3, "label": "overlap", "dir": "left"}, + {"end": 3, "label": "overlap", "start": 2, "dir": "left"}, + {"start": 1, "end": 3, "label": "attr", "dir": "left"}, + ] + renderer.render([{"words": words, "arcs": arcs}]) + assert renderer.highest_level == 3 + + @pytest.mark.issue(5838) def test_issue5838(): # Displacy's EntityRenderer break line