Fix escaping of HTML in displacy ENT (closes #2728)

This commit is contained in:
Ines Montani 2019-02-21 14:30:39 +01:00
parent 250e88ef55
commit 80bdcb99c5
2 changed files with 19 additions and 3 deletions

View File

@ -253,10 +253,10 @@ class EntityRenderer(object):
label = span["label"]
start = span["start"]
end = span["end"]
entity = text[start:end]
entity = escape_html(text[start:end])
fragments = text[offset:start].split("\n")
for i, fragment in enumerate(fragments):
markup += fragment
markup += escape_html(fragment)
if len(fragments) > 1 and i != len(fragments) - 1:
markup += "</br>"
if self.ents is None or label.upper() in self.ents:
@ -265,7 +265,7 @@ class EntityRenderer(object):
else:
markup += entity
offset = end
markup += text[offset:]
markup += escape_html(text[offset:])
markup = TPL_ENTS.format(content=markup, colors=self.colors)
if title:
markup = TPL_TITLE.format(title=title) + markup

View File

@ -0,0 +1,16 @@
# coding: utf8
from __future__ import unicode_literals
from spacy import displacy
from spacy.tokens import Doc, Span
def test_issue2728(en_vocab):
"""Test that displaCy ENT visualizer escapes HTML correctly."""
doc = Doc(en_vocab, words=["test", "<RELEASE>", "test"])
doc.ents = [Span(doc, 0, 1, label="TEST")]
html = displacy.render(doc, style="ent")
assert "&lt;RELEASE&gt;" in html
doc.ents = [Span(doc, 1, 2, label="TEST")]
html = displacy.render(doc, style="ent")
assert "&lt;RELEASE&gt;" in html