Fix unparsing of entities that are together

This commit is contained in:
Lonami Exo 2020-02-20 09:43:37 +01:00
parent 7c6fe5c4e9
commit 9f73c35621
3 changed files with 33 additions and 3 deletions

View File

@ -159,7 +159,7 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
html = []
last_offset = 0
for i, entity in enumerate(entities):
if entity.offset > _offset + _length:
if entity.offset >= _offset + _length:
break
relative_offset = entity.offset - _offset
if relative_offset > last_offset:

View File

@ -2,7 +2,7 @@
Tests for `telethon.extensions.html`.
"""
from telethon.extensions import html
from telethon.tl.types import MessageEntityBold, MessageEntityTextUrl
from telethon.tl.types import MessageEntityBold, MessageEntityItalic, MessageEntityTextUrl
def test_entity_edges():
@ -36,3 +36,18 @@ def test_trailing_malformed_entities():
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = html.unparse(text, entities)
assert result == '🏆<a href="https://example.com">Telegram Official Android Challenge is over🏆</a>'
def test_entities_together():
"""
Test that an entity followed immediately by a different one behaves well.
"""
original = '<strong>⚙️</strong><em>Settings</em>'
stripped = 'Settings'
text, entities = html.parse(original)
assert text == stripped
assert entities == [MessageEntityBold(0, 2), MessageEntityItalic(2, 8)]
text = html.unparse(text, entities)
assert text == original

View File

@ -2,7 +2,7 @@
Tests for `telethon.extensions.markdown`.
"""
from telethon.extensions import markdown
from telethon.tl.types import MessageEntityBold, MessageEntityTextUrl
from telethon.tl.types import MessageEntityBold, MessageEntityItalic, MessageEntityTextUrl
def test_entity_edges():
@ -36,3 +36,18 @@ def test_trailing_malformed_entities():
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = markdown.unparse(text, entities)
assert result == "🏆[Telegram Official Android Challenge is over🏆](https://example.com)"
def test_entities_together():
"""
Test that an entity followed immediately by a different one behaves well.
"""
original = '**⚙️**__Settings__'
stripped = 'Settings'
text, entities = markdown.parse(original)
assert text == stripped
assert entities == [MessageEntityBold(0, 2), MessageEntityItalic(2, 8)]
text = markdown.unparse(text, entities)
assert text == original