mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
🐛 Escape annotated HTML tags in span renderer (#12817)
These changes add a missing call to `escape_html` in the displaCy span renderer. Previously span-annotated tokens would be inserted into the page markup without being escaped, resulting in potentially incorrect rendering. When I encountered this issue, it resulted in some docs and span underlines being superimposed on top of properly rendered docs and span underlines near the beginning of the visualization (due to an unescaped `<span>` tag).
This commit is contained in:
parent
ddffd09602
commit
0566c3a166
|
@ -1,4 +1,3 @@
|
|||
import itertools
|
||||
import uuid
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
|
@ -218,7 +217,7 @@ class SpanRenderer:
|
|||
+ (self.offset_step * (len(entities) - 1))
|
||||
)
|
||||
markup += self.span_template.format(
|
||||
text=token["text"],
|
||||
text=escape_html(token["text"]),
|
||||
span_slices=slices,
|
||||
span_starts=starts,
|
||||
total_height=total_height,
|
||||
|
|
|
@ -377,3 +377,22 @@ def test_displacy_manual_sorted_entities():
|
|||
|
||||
html = displacy.render(doc, style="ent", manual=True)
|
||||
assert html.find("FIRST") < html.find("SECOND")
|
||||
|
||||
|
||||
@pytest.mark.issue(12816)
|
||||
def test_issue12816(en_vocab) -> None:
|
||||
"""Test that displaCy's span visualizer escapes annotated HTML tags correctly."""
|
||||
# Create a doc containing an annotated word and an unannotated HTML tag
|
||||
doc = Doc(en_vocab, words=["test", "<TEST>"])
|
||||
doc.spans["sc"] = [Span(doc, 0, 1, label="test")]
|
||||
|
||||
# Verify that the HTML tag is escaped when unannotated
|
||||
html = displacy.render(doc, style="span")
|
||||
assert "<TEST>" in html
|
||||
|
||||
# Annotate the HTML tag
|
||||
doc.spans["sc"].append(Span(doc, 1, 2, label="test"))
|
||||
|
||||
# Verify that the HTML tag is still escaped
|
||||
html = displacy.render(doc, style="span")
|
||||
assert "<TEST>" in html
|
||||
|
|
Loading…
Reference in New Issue
Block a user