add lemma option to displacy 'dep' visualiser (#5041)

* add lemma option to displacy 'dep' visualiser * more compact list comprehension * add option to doc * fix test and add lemmas to util.get_doc * fix capital * remove lemma from get_doc * cleanup
2025-08-31 09:25:01 +03:00 · 2020-02-22 14:11:51 +01:00 · 2020-02-22 14:11:51 +01:00 · 479bd8d09f
commit 479bd8d09f
parent 2164e71ea8
5 changed files with 25 additions and 11 deletions
--- a/spacy/displacy/init.py
+++ b/spacy/displacy/init.py
@ -144,10 +144,12 @@ def parse_deps(orig_doc, options={}):
            for span, tag, lemma, ent_type in spans:
                attrs = {"tag": tag, "lemma": lemma, "ent_type": ent_type}
                retokenizer.merge(span, attrs=attrs)
-    if options.get("fine_grained"):
-        words = [{"text": w.text, "tag": w.tag_} for w in doc]
-    else:
-        words = [{"text": w.text, "tag": w.pos_} for w in doc]
+    fine_grained = options.get("fine_grained")
+    add_lemma = options.get("add_lemma")
+    words = [{"text": w.text,
+              "tag": w.tag_ if fine_grained else w.pos_,
+              "lemma": w.lemma_ if add_lemma else None} for w in doc]
+
    arcs = []
    for word in doc:
        if word.i < word.head.i:
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@ -3,7 +3,7 @@ from __future__ import unicode_literals

 import uuid

-from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_ARCS, TPL_ENTS
+from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_WORDS_LEMMA, TPL_DEP_ARCS, TPL_ENTS
 from .templates import TPL_ENT, TPL_ENT_RTL, TPL_FIGURE, TPL_TITLE, TPL_PAGE
 from ..util import minify_html, escape_html, registry
 from ..errors import Errors
@ -83,7 +83,7 @@ class DependencyRenderer(object):
        self.width = self.offset_x + len(words) * self.distance
        self.height = self.offset_y + 3 * self.word_spacing
        self.id = render_id
-        words = [self.render_word(w["text"], w["tag"], i) for i, w in enumerate(words)]
+        words = [self.render_word(w["text"], w["tag"],  w.get("lemma", None), i) for i, w in enumerate(words)]
        arcs = [
            self.render_arrow(a["label"], a["start"], a["end"], a["dir"], i)
            for i, a in enumerate(arcs)
@ -101,7 +101,7 @@ class DependencyRenderer(object):
            lang=self.lang,
        )

-    def render_word(self, text, tag, i):
+    def render_word(self, text, tag, lemma, i,):
        """Render individual word.

        text (unicode): Word text.
@ -114,6 +114,8 @@ class DependencyRenderer(object):
        if self.direction == "rtl":
            x = self.width - x
        html_text = escape_html(text)
+        if lemma is not None:
+            return TPL_DEP_WORDS_LEMMA.format(text=html_text, tag=tag, lemma=lemma, x=x, y=y)
        return TPL_DEP_WORDS.format(text=html_text, tag=tag, x=x, y=y)

    def render_arrow(self, label, start, end, direction, i):
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@ -18,6 +18,15 @@ TPL_DEP_WORDS = """
 """


+TPL_DEP_WORDS_LEMMA = """
+<text class="displacy-token" fill="currentColor" text-anchor="middle" y="{y}">
+    <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
+    <tspan class="displacy-lemma" dy="2em" fill="currentColor" x="{x}">{lemma}</tspan>
+    <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
+</text>
+"""
+
+
 TPL_DEP_ARCS = """
 <g class="displacy-arrow">
    <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="currentColor"/>
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@ -31,10 +31,10 @@ def test_displacy_parse_deps(en_vocab):
    deps = displacy.parse_deps(doc)
    assert isinstance(deps, dict)
    assert deps["words"] == [
-        {"text": "This", "tag": "DET"},
-        {"text": "is", "tag": "AUX"},
-        {"text": "a", "tag": "DET"},
-        {"text": "sentence", "tag": "NOUN"},
+        {"lemma": None, "text": "This", "tag": "DET"},
+        {"lemma": None, "text": "is", "tag": "AUX"},
+        {"lemma": None, "text": "a", "tag": "DET"},
+        {"lemma": None, "text": "sentence", "tag": "NOUN"},
    ]
    assert deps["arcs"] == [
        {"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@ -239,6 +239,7 @@ If a setting is not present in the options, the default value will be used.
 | Name               | Type    | Description                                                                                                     | Default                 |
 | ------------------ | ------- | --------------------------------------------------------------------------------------------------------------- | ----------------------- |
 | `fine_grained`     | bool    | Use fine-grained part-of-speech tags (`Token.tag_`) instead of coarse-grained tags (`Token.pos_`).              | `False`                 |
+| `add_lemma`        | bool    | Print the lemma's in a separate row below the token texts in the `dep` visualisation.                           | `False`                 |
 | `collapse_punct`   | bool    | Attach punctuation to tokens. Can make the parse more readable, as it prevents long arcs to attach punctuation. | `True`                  |
 | `collapse_phrases` | bool    | Merge noun phrases into one token.                                                                              | `False`                 |
 | `compact`          | bool    | "Compact mode" with square arrows that takes up less space.                                                     | `False`                 |