Fix unparsing malformed entities, bump v1.10.10

This commit is contained in:
Lonami Exo 2019-12-30 10:57:03 +01:00
parent be8838b5f8
commit d196c89825
6 changed files with 94 additions and 11 deletions

View File

@ -174,10 +174,12 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
# Otherwise we would end up with malformed text and fail to encode.
# For example of bad input: "Hi \ud83d\ude1c"
# https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
if '\ud800' <= text[relative_offset] <= '\udfff':
while (relative_offset < _length
and '\ud800' <= text[relative_offset] <= '\udfff'):
relative_offset += 1
if '\ud800' <= text[relative_offset + length] <= '\udfff':
while (relative_offset + length < _length
and '\ud800' <= text[relative_offset + length] <= '\udfff'):
length += 1
entity_text = unparse(text=text[relative_offset:relative_offset + length],
@ -222,7 +224,7 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity], _offset: int = 0,
skip_entity = True
last_offset = relative_offset + (0 if skip_entity else length)
if last_offset < len(text) and '\ud800' <= text[last_offset] <= '\udfff':
while last_offset < _length and '\ud800' <= text[last_offset] <= '\udfff':
last_offset += 1
html.append(escape(text[last_offset:]))

View File

@ -189,7 +189,7 @@ def unparse(text, entities, delimiters=None, url_fmt=None):
# Otherwise we would end up with malformed text and fail to encode.
# For example of bad input: "Hi \ud83d\ude1c"
# https://en.wikipedia.org/wiki/UTF-16#U+010000_to_U+10FFFF
if '\ud800' <= text[at] <= '\udfff':
while at < len(text) and '\ud800' <= text[at] <= '\udfff':
at += 1
text = text[:at] + what + text[at:]

View File

@ -1,3 +1,3 @@
# Versions should comply with PEP440.
# This line is parsed in setup.py:
__version__ = '1.10.9'
__version__ = '1.10.10'

View File

@ -1,13 +1,14 @@
"""
tests for telethon.crypto.rsa
Tests for `telethon.crypto.rsa`.
"""
import pytest
from telethon.crypto import rsa
@pytest.fixture
def server_key_fp():
"""factory to return a key, old if so chosen"""
"""Factory to return a key, old if so chosen."""
def _server_key_fp(old: bool):
for fp, data in rsa._server_keys.items():
_, old_key = data
@ -16,22 +17,26 @@ def server_key_fp():
return _server_key_fp
def test_encryption_inv_key():
"""test for #1324"""
"""Test for #1324."""
assert rsa.encrypt("invalid", b"testdata") is None
def test_encryption_old_key(server_key_fp):
"""test for #1324"""
"""Test for #1324."""
assert rsa.encrypt(server_key_fp(old=True), b"testdata") is None
def test_encryption_allowed_old_key(server_key_fp):
data = rsa.encrypt(server_key_fp(old=True), b"testdata", use_old=True)
# we can't verify the data is actually valid because we don't have
# We can't verify the data is actually valid because we don't have
# the decryption keys
assert data is not None and len(data) == 256
def test_encryption_current_key(server_key_fp):
data = rsa.encrypt(server_key_fp(old=False), b"testdata")
# we can't verify the data is actually valid because we don't have
# We can't verify the data is actually valid because we don't have
# the decryption keys
assert data is not None and len(data) == 256

View File

@ -0,0 +1,38 @@
"""
Tests for `telethon.extensions.html`.
"""
from telethon.extensions import html
from telethon.tl.types import MessageEntityBold, MessageEntityTextUrl
def test_entity_edges():
"""
Test that entities at the edges (start and end) don't crash.
"""
text = 'Hello, world'
entities = [MessageEntityBold(0, 5), MessageEntityBold(7, 5)]
result = html.unparse(text, entities)
assert result == '<strong>Hello</strong>, <strong>world</strong>'
def test_malformed_entities():
"""
Test that malformed entity offsets from bad clients
don't crash and produce the expected results.
"""
text = '🏆Telegram Official Android Challenge is over🏆.'
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = html.unparse(text, entities)
assert result == '🏆<a href="https://example.com">Telegram Official Android Challenge is over🏆</a>.'
def test_trailing_malformed_entities():
"""
Similar to `test_malformed_entities`, but for the edge
case where the malformed entity offset is right at the end
(note the lack of a trailing dot in the text string).
"""
text = '🏆Telegram Official Android Challenge is over🏆'
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = html.unparse(text, entities)
assert result == '🏆<a href="https://example.com">Telegram Official Android Challenge is over🏆</a>'

View File

@ -0,0 +1,38 @@
"""
Tests for `telethon.extensions.markdown`.
"""
from telethon.extensions import markdown
from telethon.tl.types import MessageEntityBold, MessageEntityTextUrl
def test_entity_edges():
"""
Test that entities at the edges (start and end) don't crash.
"""
text = 'Hello, world'
entities = [MessageEntityBold(0, 5), MessageEntityBold(7, 5)]
result = markdown.unparse(text, entities)
assert result == '**Hello**, **world**'
def test_malformed_entities():
"""
Test that malformed entity offsets from bad clients
don't crash and produce the expected results.
"""
text = '🏆Telegram Official Android Challenge is over🏆.'
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = markdown.unparse(text, entities)
assert result == "🏆[Telegram Official Android Challenge is over🏆](https://example.com)."
def test_trailing_malformed_entities():
"""
Similar to `test_malformed_entities`, but for the edge
case where the malformed entity offset is right at the end
(note the lack of a trailing dot in the text string).
"""
text = '🏆Telegram Official Android Challenge is over🏆'
entities = [MessageEntityTextUrl(offset=2, length=43, url='https://example.com')]
result = markdown.unparse(text, entities)
assert result == "🏆[Telegram Official Android Challenge is over🏆](https://example.com)"