""" Simple markdown parser which does not support nesting. Intended primarily for use within the library, which attempts to handle emojies correctly, since they seem to count as two characters and it's a bit strange. """ import re import struct from ..tl import TLObject from ..tl.types import ( MessageEntityBold, MessageEntityItalic, MessageEntityCode, MessageEntityPre, MessageEntityTextUrl ) DEFAULT_DELIMITERS = { '**': MessageEntityBold, '__': MessageEntityItalic, '`': MessageEntityCode, '```': MessageEntityPre } DEFAULT_URL_RE = re.compile(r'\[([^\]]+)\]\((.+?)\)') DEFAULT_URL_FORMAT = '[{0}]({1})' def _add_surrogate(text): return ''.join( # SMP -> Surrogate Pairs (Telegram offsets are calculated with these). # See https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview for more. ''.join(chr(y) for y in struct.unpack(' 'y!'. :param text: the original text. :param entity: the entity or entities that must be matched. :return: a single result or a list of the text surrounded by the entities. """ if isinstance(entity, TLObject): entity = (entity,) multiple = True else: multiple = False text = _add_surrogate(text) result = [] for e in entity: start = e.offset end = e.offset + e.length result.append(_del_surrogate(text[start:end])) return result if multiple else result[0]