Fix ordering of closing tags of sequential entities (#4268)

This commit is contained in:
Lonami 2023-12-08 08:12:02 +01:00 committed by GitHub
commit 3d58dc355e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 36 additions and 3 deletions

View File

@ -124,6 +124,8 @@ def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
parser = HTMLToTelegramParser()
parser.feed(add_surrogate(html))
text = strip_text(parser.text, parser.entities)
parser.entities.reverse()
parser.entities.sort(key=lambda entity: entity.offset)
return del_surrogate(text), parser.entities
@ -175,7 +177,7 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str:
if callable(delimiter):
delimiter = delimiter(entity, text[s:e])
insert_at.append((s, i, delimiter[0]))
insert_at.append((e, len(entities) - i, delimiter[1]))
insert_at.append((e, -i, delimiter[1]))
insert_at.sort(key=lambda t: (t[0], t[1]))
next_escape_bound = len(text)

View File

@ -170,7 +170,7 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
delimiter = delimiters.get(type(entity), None)
if delimiter:
insert_at.append((s, i, delimiter))
insert_at.append((e, len(entities) - i, delimiter))
insert_at.append((e, -i, delimiter))
else:
url = None
if isinstance(entity, MessageEntityTextUrl):
@ -179,7 +179,7 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
url = 'tg://user?id={}'.format(entity.user_id)
if url:
insert_at.append((s, i, '['))
insert_at.append((e, len(entities) - i, ']({})'.format(url)))
insert_at.append((e, -i, ']({})'.format(url)))
insert_at.sort(key=lambda t: (t[0], t[1]))
while insert_at:

View File

@ -53,6 +53,22 @@ def test_entities_together():
assert text == original
def test_nested_entities():
"""
Test that an entity nested inside another one behaves well.
"""
original = '<a href="https://example.com"><strong>Example</strong></a>'
original_entities = [MessageEntityTextUrl(0, 7, url='https://example.com'), MessageEntityBold(0, 7)]
stripped = 'Example'
text, entities = html.parse(original)
assert text == stripped
assert entities == original_entities
text = html.unparse(text, entities)
assert text == original
def test_offset_at_emoji():
"""
Tests that an entity starting at a emoji preserves the emoji.

View File

@ -53,6 +53,21 @@ def test_entities_together():
assert text == original
def test_nested_entities():
"""
Test that an entity nested inside another one behaves well.
"""
original = '**[Example](https://example.com)**'
stripped = 'Example'
text, entities = markdown.parse(original)
assert text == stripped
assert entities == [MessageEntityBold(0, 7), MessageEntityTextUrl(0, 7, url='https://example.com')]
text = markdown.unparse(text, entities)
assert text == original
def test_offset_at_emoji():
"""
Tests that an entity starting at a emoji preserves the emoji.