From a654ee497231bcf795e5c947f3033a30fadd043d Mon Sep 17 00:00:00 2001 From: HBcao233 Date: Thu, 3 Oct 2024 23:33:51 +0800 Subject: [PATCH] feat: Support
info: https://core.telegram.org/bots/api#html-style --- telethon/extensions/html.py | 96 +++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/telethon/extensions/html.py b/telethon/extensions/html.py index 0e2e8f33..9ca8e5f5 100644 --- a/telethon/extensions/html.py +++ b/telethon/extensions/html.py @@ -1,6 +1,7 @@ """ Simple HTML -> Telegram entity parser. """ + from collections import deque from html import escape from html.parser import HTMLParser @@ -9,18 +10,25 @@ from typing import Iterable, Tuple, List from ..helpers import add_surrogate, del_surrogate, within_surrogate, strip_text from ..tl import TLObject from ..tl.types import ( - MessageEntityBold, MessageEntityItalic, MessageEntityCode, - MessageEntityPre, MessageEntityEmail, MessageEntityUrl, - MessageEntityTextUrl, MessageEntityMentionName, - MessageEntityUnderline, MessageEntityStrike, MessageEntityBlockquote, - TypeMessageEntity + MessageEntityBold, + MessageEntityItalic, + MessageEntityCode, + MessageEntityPre, + MessageEntityEmail, + MessageEntityUrl, + MessageEntityTextUrl, + MessageEntityMentionName, + MessageEntityUnderline, + MessageEntityStrike, + MessageEntityBlockquote, + TypeMessageEntity, ) class HTMLToTelegramParser(HTMLParser): def __init__(self): super().__init__() - self.text = '' + self.text = "" self.entities = [] self._building_entities = {} self._open_tags = deque() @@ -33,17 +41,19 @@ class HTMLToTelegramParser(HTMLParser): attrs = dict(attrs) EntityType = None args = {} - if tag == 'strong' or tag == 'b': + if tag == "strong" or tag == "b": EntityType = MessageEntityBold - elif tag == 'em' or tag == 'i': + elif tag == "em" or tag == "i": EntityType = MessageEntityItalic - elif tag == 'u': + elif tag == "u": EntityType = MessageEntityUnderline - elif tag == 'del' or tag == 's': + elif tag == "del" or tag == "s": EntityType = MessageEntityStrike - elif tag == 'blockquote': + elif tag == "blockquote": EntityType = MessageEntityBlockquote - elif tag == 'code': + if "expandable" in attrs.keys(): + args["collapsed"] = True + elif tag == "code": try: # If we're in the middle of a
 tag, this  tag is
                 # probably intended for syntax highlighting.
@@ -51,30 +61,30 @@ class HTMLToTelegramParser(HTMLParser):
                 # Syntax highlighting is set with
                 #     codeblock
                 # inside 
 tags
-                pre = self._building_entities['pre']
+                pre = self._building_entities["pre"]
                 try:
-                    pre.language = attrs['class'][len('language-'):]
+                    pre.language = attrs["class"][len("language-") :]
                 except KeyError:
                     pass
             except KeyError:
                 EntityType = MessageEntityCode
-        elif tag == 'pre':
+        elif tag == "pre":
             EntityType = MessageEntityPre
-            args['language'] = ''
-        elif tag == 'a':
+            args["language"] = ""
+        elif tag == "a":
             try:
-                url = attrs['href']
+                url = attrs["href"]
             except KeyError:
                 return
-            if url.startswith('mailto:'):
-                url = url[len('mailto:'):]
+            if url.startswith("mailto:"):
+                url = url[len("mailto:") :]
                 EntityType = MessageEntityEmail
             else:
                 if self.get_starttag_text() == url:
                     EntityType = MessageEntityUrl
                 else:
                     EntityType = MessageEntityTextUrl
-                    args['url'] = del_surrogate(url)
+                    args["url"] = del_surrogate(url)
                     url = None
             self._open_tags_meta.popleft()
             self._open_tags_meta.appendleft(url)
@@ -84,11 +94,12 @@ class HTMLToTelegramParser(HTMLParser):
                 offset=len(self.text),
                 # The length will be determined when closing the tag.
                 length=0,
-                **args)
+                **args,
+            )
 
     def handle_data(self, text):
-        previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ''
-        if previous_tag == 'a':
+        previous_tag = self._open_tags[0] if len(self._open_tags) > 0 else ""
+        if previous_tag == "a":
             url = self._open_tags_meta[0]
             if url:
                 text = url
@@ -129,23 +140,23 @@ def parse(html: str) -> Tuple[str, List[TypeMessageEntity]]:
 
 
 ENTITY_TO_FORMATTER = {
-    MessageEntityBold: ('', ''),
-    MessageEntityItalic: ('', ''),
-    MessageEntityCode: ('', ''),
-    MessageEntityUnderline: ('', ''),
-    MessageEntityStrike: ('', ''),
-    MessageEntityBlockquote: ('
', '
'), + MessageEntityBold: ("", ""), + MessageEntityItalic: ("", ""), + MessageEntityCode: ("", ""), + MessageEntityUnderline: ("", ""), + MessageEntityStrike: ("", ""), + MessageEntityBlockquote: ("
", "
"), MessageEntityPre: lambda e, _: ( - "
\n"
-        "    \n"
-        "        ".format(e.language), "{}\n"
-        "    \n"
-        "
" + "
\n" "    \n" "        ".format(e.language),
+        "{}\n" "    \n" "
", + ), + MessageEntityEmail: lambda _, t: (''.format(t), ""), + MessageEntityUrl: lambda _, t: (''.format(t), ""), + MessageEntityTextUrl: lambda e, _: (''.format(escape(e.url)), ""), + MessageEntityMentionName: lambda e, _: ( + ''.format(e.user_id), + "", ), - MessageEntityEmail: lambda _, t: (''.format(t), ''), - MessageEntityUrl: lambda _, t: (''.format(t), ''), - MessageEntityTextUrl: lambda e, _: (''.format(escape(e.url)), ''), - MessageEntityMentionName: lambda e, _: (''.format(e.user_id), ''), } @@ -186,7 +197,12 @@ def unparse(text: str, entities: Iterable[TypeMessageEntity]) -> str: while within_surrogate(text, at): at += 1 - text = text[:at] + what + escape(text[at:next_escape_bound]) + text[next_escape_bound:] + text = ( + text[:at] + + what + + escape(text[at:next_escape_bound]) + + text[next_escape_bound:] + ) next_escape_bound = at text = escape(text[:next_escape_bound]) + text[next_escape_bound:]