Tuple[str, List[TypeMessageEntity]]:
return html, []
parser = HTMLToTelegramParser()
- parser.feed(_add_surrogate(html))
- text = helpers.strip_text(parser.text, parser.entities)
- return _del_surrogate(text), parser.entities
+ parser.feed(add_surrogate(html))
+ text = strip_text(parser.text, parser.entities)
+ return del_surrogate(text), parser.entities
-def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
- _length: Optional[int] = None) -> str:
+ENTITY_TO_FORMATTER = {
+ MessageEntityBold: ('', ''),
+ MessageEntityItalic: ('', ''),
+ MessageEntityCode: ('', '
'),
+ MessageEntityUnderline: ('', ''),
+ MessageEntityStrike: ('', ''),
+ MessageEntityBlockquote: ('', '
'),
+ MessageEntityPre: lambda e, _: (
+ "\n"
+ " \n"
+ " ".format(e.language), "{}\n"
+ "
\n"
+ "
"
+ ),
+ MessageEntityEmail: lambda _, t: (''.format(t), ''),
+ MessageEntityUrl: lambda _, t: (''.format(t), ''),
+ MessageEntityTextUrl: lambda e, _: (''.format(escape(e.url)), ''),
+ MessageEntityMentionName: lambda e, _: (''.format(e.user_id), ''),
+}
+
+
+def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str:
"""
Performs the reverse operation to .parse(), effectively returning HTML
given a normal text and its MessageEntity's.
@@ -153,77 +162,32 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
elif not entities:
return escape(text)
- text = _add_surrogate(text)
- if _length is None:
- _length = len(text)
- html = []
- last_offset = 0
- for i, entity in enumerate(entities):
- if entity.offset >= _offset + _length:
- break
- relative_offset = entity.offset - _offset
- if relative_offset > last_offset:
- html.append(escape(text[last_offset:relative_offset]))
- elif relative_offset < last_offset:
- continue
+ if isinstance(entities, TLObject):
+ entities = (entities,)
- skip_entity = False
- length = entity.length
+ text = add_surrogate(text)
+ insert_at = []
+ for entity in entities:
+ s = entity.offset
+ e = entity.offset + entity.length
+ delimiter = ENTITY_TO_FORMATTER.get(type(entity), None)
+ if delimiter:
+ if callable(delimiter):
+ delimiter = delimiter(entity, text[s:e])
+ insert_at.append((s, delimiter[0]))
+ insert_at.append((e, delimiter[1]))
- # If we are in the middle of a surrogate nudge the position by +1.
- # Otherwise we would end up with malformed text and fail to encode.
- # For example of bad input: "Hi \ud83d\ude1c"
- # https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
- while helpers.within_surrogate(text, relative_offset, length=_length):
- relative_offset += 1
+ insert_at.sort(key=lambda t: t[0])
+ next_escape_bound = len(text)
+ while insert_at:
+ # Same logic as markdown.py
+ at, what = insert_at.pop()
+ while within_surrogate(text, at):
+ at += 1
- while helpers.within_surrogate(text, relative_offset + length, length=_length):
- length += 1
+ text = text[:at] + what + escape(text[at:next_escape_bound]) + text[next_escape_bound:]
+ next_escape_bound = at
- entity_text = unparse(text=text[relative_offset:relative_offset + length],
- entities=entities[i + 1:],
- _offset=entity.offset, _length=length)
- entity_type = type(entity)
+ text = escape(text[:next_escape_bound]) + text[next_escape_bound:]
- if entity_type == MessageEntityBold:
- html.append('{}'.format(entity_text))
- elif entity_type == MessageEntityItalic:
- html.append('{}'.format(entity_text))
- elif entity_type == MessageEntityCode:
- html.append('{}
'.format(entity_text))
- elif entity_type == MessageEntityUnderline:
- html.append('{}'.format(entity_text))
- elif entity_type == MessageEntityStrike:
- html.append('{}'.format(entity_text))
- elif entity_type == MessageEntityBlockquote:
- html.append('{}
'.format(entity_text))
- elif entity_type == MessageEntityPre:
- if entity.language:
- html.append(
- "\n"
- " \n"
- " {}\n"
- "
\n"
- "
".format(entity.language, entity_text))
- else:
- html.append('{}
'
- .format(entity_text))
- elif entity_type == MessageEntityEmail:
- html.append('{0}'.format(entity_text))
- elif entity_type == MessageEntityUrl:
- html.append('{0}'.format(entity_text))
- elif entity_type == MessageEntityTextUrl:
- html.append('{}'
- .format(escape(entity.url), entity_text))
- elif entity_type == MessageEntityMentionName:
- html.append('{}'
- .format(entity.user_id, entity_text))
- else:
- skip_entity = True
- last_offset = relative_offset + (0 if skip_entity else length)
-
- while helpers.within_surrogate(text, last_offset, length=_length):
- last_offset += 1
-
- html.append(escape(text[last_offset:]))
- return _del_surrogate(''.join(html))
+ return del_surrogate(text)