Fixed sorting of markup entities with the same offsets (#4201)

This commit is contained in:
Alexander Goryushkin 2023-09-14 21:52:04 +05:00 committed by GitHub
parent 7325718f0e
commit ad19987cd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 12 deletions

View File

@ -167,21 +167,21 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str:
text = add_surrogate(text) text = add_surrogate(text)
insert_at = [] insert_at = []
for entity in entities: for i, entity in enumerate(entities):
s = entity.offset s = entity.offset
e = entity.offset + entity.length e = entity.offset + entity.length
delimiter = ENTITY_TO_FORMATTER.get(type(entity), None) delimiter = ENTITY_TO_FORMATTER.get(type(entity), None)
if delimiter: if delimiter:
if callable(delimiter): if callable(delimiter):
delimiter = delimiter(entity, text[s:e]) delimiter = delimiter(entity, text[s:e])
insert_at.append((s, delimiter[0])) insert_at.append((s, i, delimiter[0]))
insert_at.append((e, delimiter[1])) insert_at.append((e, len(entities) - i, delimiter[1]))
insert_at.sort(key=lambda t: t[0]) insert_at.sort(key=lambda t: (t[0], t[1]))
next_escape_bound = len(text) next_escape_bound = len(text)
while insert_at: while insert_at:
# Same logic as markdown.py # Same logic as markdown.py
at, what = insert_at.pop() at, _, what = insert_at.pop()
while within_surrogate(text, at): while within_surrogate(text, at):
at += 1 at += 1

View File

@ -164,13 +164,13 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
text = add_surrogate(text) text = add_surrogate(text)
delimiters = {v: k for k, v in delimiters.items()} delimiters = {v: k for k, v in delimiters.items()}
insert_at = [] insert_at = []
for entity in entities: for i, entity in enumerate(entities):
s = entity.offset s = entity.offset
e = entity.offset + entity.length e = entity.offset + entity.length
delimiter = delimiters.get(type(entity), None) delimiter = delimiters.get(type(entity), None)
if delimiter: if delimiter:
insert_at.append((s, delimiter)) insert_at.append((s, i, delimiter))
insert_at.append((e, delimiter)) insert_at.append((e, len(entities) - i, delimiter))
else: else:
url = None url = None
if isinstance(entity, MessageEntityTextUrl): if isinstance(entity, MessageEntityTextUrl):
@ -178,12 +178,12 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
elif isinstance(entity, MessageEntityMentionName): elif isinstance(entity, MessageEntityMentionName):
url = 'tg://user?id={}'.format(entity.user_id) url = 'tg://user?id={}'.format(entity.user_id)
if url: if url:
insert_at.append((s, '[')) insert_at.append((s, i, '['))
insert_at.append((e, ']({})'.format(url))) insert_at.append((e, len(entities) - i, ']({})'.format(url)))
insert_at.sort(key=lambda t: t[0]) insert_at.sort(key=lambda t: (t[0], t[1]))
while insert_at: while insert_at:
at, what = insert_at.pop() at, _, what = insert_at.pop()
# If we are in the middle of a surrogate nudge the position by -1. # If we are in the middle of a surrogate nudge the position by -1.
# Otherwise we would end up with malformed text and fail to encode. # Otherwise we would end up with malformed text and fail to encode.