diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index c17b80aef..e13b0403b 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -144,10 +144,12 @@ def parse_deps(orig_doc, options={}):
for span, tag, lemma, ent_type in spans:
attrs = {"tag": tag, "lemma": lemma, "ent_type": ent_type}
retokenizer.merge(span, attrs=attrs)
- if options.get("fine_grained"):
- words = [{"text": w.text, "tag": w.tag_} for w in doc]
- else:
- words = [{"text": w.text, "tag": w.pos_} for w in doc]
+ fine_grained = options.get("fine_grained")
+ add_lemma = options.get("add_lemma")
+ words = [{"text": w.text,
+ "tag": w.tag_ if fine_grained else w.pos_,
+ "lemma": w.lemma_ if add_lemma else None} for w in doc]
+
arcs = []
for word in doc:
if word.i < word.head.i:
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index d6e33437b..68df324d6 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -3,7 +3,7 @@ from __future__ import unicode_literals
import uuid
-from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_ARCS, TPL_ENTS
+from .templates import TPL_DEP_SVG, TPL_DEP_WORDS, TPL_DEP_WORDS_LEMMA, TPL_DEP_ARCS, TPL_ENTS
from .templates import TPL_ENT, TPL_ENT_RTL, TPL_FIGURE, TPL_TITLE, TPL_PAGE
from ..util import minify_html, escape_html, registry
from ..errors import Errors
@@ -83,7 +83,7 @@ class DependencyRenderer(object):
self.width = self.offset_x + len(words) * self.distance
self.height = self.offset_y + 3 * self.word_spacing
self.id = render_id
- words = [self.render_word(w["text"], w["tag"], i) for i, w in enumerate(words)]
+ words = [self.render_word(w["text"], w["tag"], w.get("lemma", None), i) for i, w in enumerate(words)]
arcs = [
self.render_arrow(a["label"], a["start"], a["end"], a["dir"], i)
for i, a in enumerate(arcs)
@@ -101,7 +101,7 @@ class DependencyRenderer(object):
lang=self.lang,
)
- def render_word(self, text, tag, i):
+ def render_word(self, text, tag, lemma, i,):
"""Render individual word.
text (unicode): Word text.
@@ -114,6 +114,8 @@ class DependencyRenderer(object):
if self.direction == "rtl":
x = self.width - x
html_text = escape_html(text)
+ if lemma is not None:
+ return TPL_DEP_WORDS_LEMMA.format(text=html_text, tag=tag, lemma=lemma, x=x, y=y)
return TPL_DEP_WORDS.format(text=html_text, tag=tag, x=x, y=y)
def render_arrow(self, label, start, end, direction, i):
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index ade75d1d6..f29eab86f 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -18,6 +18,15 @@ TPL_DEP_WORDS = """
"""
+TPL_DEP_WORDS_LEMMA = """
+
+ {text}
+ {lemma}
+ {tag}
+
+"""
+
+
TPL_DEP_ARCS = """
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index 2d1f1bd8f..d04c0506f 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -31,10 +31,10 @@ def test_displacy_parse_deps(en_vocab):
deps = displacy.parse_deps(doc)
assert isinstance(deps, dict)
assert deps["words"] == [
- {"text": "This", "tag": "DET"},
- {"text": "is", "tag": "AUX"},
- {"text": "a", "tag": "DET"},
- {"text": "sentence", "tag": "NOUN"},
+ {"lemma": None, "text": "This", "tag": "DET"},
+ {"lemma": None, "text": "is", "tag": "AUX"},
+ {"lemma": None, "text": "a", "tag": "DET"},
+ {"lemma": None, "text": "sentence", "tag": "NOUN"},
]
assert deps["arcs"] == [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 50ba0e3d9..266df87f0 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -239,6 +239,7 @@ If a setting is not present in the options, the default value will be used.
| Name | Type | Description | Default |
| ------------------ | ------- | --------------------------------------------------------------------------------------------------------------- | ----------------------- |
| `fine_grained` | bool | Use fine-grained part-of-speech tags (`Token.tag_`) instead of coarse-grained tags (`Token.pos_`). | `False` |
+| `add_lemma` | bool | Print the lemma's in a separate row below the token texts in the `dep` visualisation. | `False` |
| `collapse_punct` | bool | Attach punctuation to tokens. Can make the parse more readable, as it prevents long arcs to attach punctuation. | `True` |
| `collapse_phrases` | bool | Merge noun phrases into one token. | `False` |
| `compact` | bool | "Compact mode" with square arrows that takes up less space. | `False` |