Add support for unparsing nested entities into HTML (#1209)

This commit is contained in:
Tulir Asokan 2019-06-24 13:28:14 +03:00 committed by Lonami
parent 962949008f
commit 8b28f4ffbf

View File

@ -5,13 +5,15 @@ import struct
from collections import deque
from html import escape, unescape
from html.parser import HTMLParser
from typing import Iterable, Optional, Tuple, List
from .. import helpers
from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
MessageEntityTextUrl, MessageEntityMentionName,
MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote
MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote,
TypeMessageEntity
)
@ -121,7 +123,7 @@ class HTMLToTelegramParser(HTMLParser):
self.entities.append(entity)
def parse(html):
def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
"""
Parses the given HTML message and returns its stripped representation
plus a list of the MessageEntity's that were found.
@ -138,7 +140,8 @@ def parse(html):
return _del_surrogate(text), parser.entities
def unparse(text, entities):
def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
_length: Optional[int] = None) -> str:
"""
Performs the reverse operation to .parse(), effectively returning HTML
given a normal text and its MessageEntity's.
@ -147,20 +150,29 @@ def unparse(text, entities):
:param entities: the MessageEntity's applied to the text.
:return: a HTML representation of the combination of both inputs.
"""
if not text or not entities:
if not text:
return text
elif not entities:
return escape(text)
text = _add_surrogate(text)
if _length is None:
_length = len(text)
html = []
last_offset = 0
for entity in entities:
if entity.offset > last_offset:
html.append(escape(text[last_offset:entity.offset]))
elif entity.offset < last_offset:
for i, entity in enumerate(entities):
if entity.offset > _offset + _length:
break
relative_offset = entity.offset - _offset
if relative_offset > last_offset:
html.append(escape(text[last_offset:relative_offset]))
elif relative_offset < last_offset:
continue
skip_entity = False
entity_text = escape(text[entity.offset:entity.offset + entity.length])
entity_text = unparse(text=text[relative_offset:relative_offset + entity.length],
entities=entities[i + 1:],
_offset=entity.offset, _length=entity.length)
entity_type = type(entity)
if entity_type == MessageEntityBold:
@ -198,6 +210,6 @@ def unparse(text, entities):
.format(entity.user_id, entity_text))
else:
skip_entity = True
last_offset = entity.offset + (0 if skip_entity else entity.length)
html.append(text[last_offset:])
last_offset = relative_offset + (0 if skip_entity else entity.length)
html.append(escape(text[last_offset:]))
return _del_surrogate(''.join(html))