From ad19987cd6375cbcd7b7de6c6f3ce7d35180e913 Mon Sep 17 00:00:00 2001 From: Alexander Goryushkin <43167451+alexgoryushkin@users.noreply.github.com> Date: Thu, 14 Sep 2023 21:52:04 +0500 Subject: [PATCH] Fixed sorting of markup entities with the same offsets (#4201) --- telethon/extensions/html.py | 10 +++++----- telethon/extensions/markdown.py | 14 +++++++------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/telethon/extensions/html.py b/telethon/extensions/html.py index 564dcf13..201312ac 100644 --- a/telethon/extensions/html.py +++ b/telethon/extensions/html.py @@ -167,21 +167,21 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str: text = add_surrogate(text) insert_at = [] - for entity in entities: + for i, entity in enumerate(entities): s = entity.offset e = entity.offset + entity.length delimiter = ENTITY_TO_FORMATTER.get(type(entity), None) if delimiter: if callable(delimiter): delimiter = delimiter(entity, text[s:e]) - insert_at.append((s, delimiter[0])) - insert_at.append((e, delimiter[1])) + insert_at.append((s, i, delimiter[0])) + insert_at.append((e, len(entities) - i, delimiter[1])) - insert_at.sort(key=lambda t: t[0]) + insert_at.sort(key=lambda t: (t[0], t[1])) next_escape_bound = len(text) while insert_at: # Same logic as markdown.py - at, what = insert_at.pop() + at, _, what = insert_at.pop() while within_surrogate(text, at): at += 1 diff --git a/telethon/extensions/markdown.py b/telethon/extensions/markdown.py index d52fc347..78f28385 100644 --- a/telethon/extensions/markdown.py +++ b/telethon/extensions/markdown.py @@ -164,13 +164,13 @@ def unparse(text, entities, delimiters=None, url_fmt=None): text = add_surrogate(text) delimiters = {v: k for k, v in delimiters.items()} insert_at = [] - for entity in entities: + for i, entity in enumerate(entities): s = entity.offset e = entity.offset + entity.length delimiter = delimiters.get(type(entity), None) if delimiter: - insert_at.append((s, delimiter)) - insert_at.append((e, delimiter)) + insert_at.append((s, i, delimiter)) + insert_at.append((e, len(entities) - i, delimiter)) else: url = None if isinstance(entity, MessageEntityTextUrl): @@ -178,12 +178,12 @@ def unparse(text, entities, delimiters=None, url_fmt=None): elif isinstance(entity, MessageEntityMentionName): url = 'tg://user?id={}'.format(entity.user_id) if url: - insert_at.append((s, '[')) - insert_at.append((e, ']({})'.format(url))) + insert_at.append((s, i, '[')) + insert_at.append((e, len(entities) - i, ']({})'.format(url))) - insert_at.sort(key=lambda t: t[0]) + insert_at.sort(key=lambda t: (t[0], t[1])) while insert_at: - at, what = insert_at.pop() + at, _, what = insert_at.pop() # If we are in the middle of a surrogate nudge the position by -1. # Otherwise we would end up with malformed text and fail to encode.