Add ability to parse inline URLs

This commit is contained in:
Lonami Exo 2017-10-29 16:33:10 +01:00
parent 9600a9ea0b
commit 368269cb11

View File

@ -7,7 +7,8 @@ import re
from enum import Enum from enum import Enum
from ..tl.types import ( from ..tl.types import (
MessageEntityBold, MessageEntityItalic, MessageEntityCode, MessageEntityPre MessageEntityBold, MessageEntityItalic, MessageEntityCode,
MessageEntityPre, MessageEntityTextUrl
) )
@ -18,6 +19,7 @@ class Mode(Enum):
ITALIC = 2 ITALIC = 2
CODE = 3 CODE = 3
PRE = 4 PRE = 4
URL = 5
EMOJI_PATTERN = re.compile( EMOJI_PATTERN = re.compile(
@ -48,12 +50,19 @@ def emojiness(char):
return 3 return 3
def parse(message, delimiters=None): def parse(message, delimiters=None, url_re=r'\[(.+?)\]\((.+?)\)'):
""" """
Parses the given message and returns the stripped message and a list Parses the given message and returns the stripped message and a list
of tuples containing (start, end, mode) using the specified delimiters of tuples containing (start, end, mode) using the specified delimiters
dictionary (or default if None). dictionary (or default if None).
The url_re(gex) must contain two matching groups: the text to be
clickable and the URL itself.
""" """
if url_re:
if isinstance(url_re, str):
url_re = re.compile(url_re)
if not delimiters: if not delimiters:
if delimiters is not None: if delimiters is not None:
return message, [] return message, []
@ -70,19 +79,35 @@ def parse(message, delimiters=None):
offset = 0 offset = 0
i = 0 i = 0
while i < len(message): while i < len(message):
for d, m in delimiters.items(): if current == Mode.NONE:
if message[i:i + len(d)] == d and current in (Mode.NONE, m): url_match = url_re.match(message, pos=i)
if message[i + len(d):i + 2 * len(d)] == d: if url_match:
continue # ignore two consecutive delimiters message = ''.join((
message[:url_match.start()],
url_match.group(1),
message[url_match.end():]
))
emoji_len = sum(emojiness(c) for c in url_match.group(1))
result.append((
offset,
i + emoji_len,
(Mode.URL, url_match.group(2))
))
i += len(url_match.group(1))
else:
for d, m in delimiters.items():
if message[i:i + len(d)] == d and current in (Mode.NONE, m):
if message[i + len(d):i + 2 * len(d)] == d:
continue # ignore two consecutive delimiters
message = message[:i] + message[i + len(d):] message = message[:i] + message[i + len(d):]
if current == Mode.NONE: if current == Mode.NONE:
result.append(offset) result.append(offset)
current = m current = m
else: else:
result[-1] = (result[-1], offset, current) result[-1] = (result[-1], offset, current)
current = Mode.NONE current = Mode.NONE
break break
if i < len(message): if i < len(message):
offset += emojiness(message[i]) offset += emojiness(message[i])
@ -98,6 +123,10 @@ def parse_tg(message, delimiters=None):
message, tuples = parse(message, delimiters=delimiters) message, tuples = parse(message, delimiters=delimiters)
result = [] result = []
for start, end, mode in tuples: for start, end, mode in tuples:
extra = None
if isinstance(mode, tuple):
mode, extra = mode
if mode == Mode.BOLD: if mode == Mode.BOLD:
result.append(MessageEntityBold(start, end - start)) result.append(MessageEntityBold(start, end - start))
elif mode == Mode.ITALIC: elif mode == Mode.ITALIC:
@ -106,4 +135,6 @@ def parse_tg(message, delimiters=None):
result.append(MessageEntityCode(start, end - start)) result.append(MessageEntityCode(start, end - start))
elif mode == Mode.PRE: elif mode == Mode.PRE:
result.append(MessageEntityPre(start, end - start, '')) result.append(MessageEntityPre(start, end - start, ''))
elif mode == Mode.URL:
result.append(MessageEntityTextUrl(start, end - start, extra))
return message, result return message, result