mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-03-28 05:44:14 +03:00
Locally strip outgoing message text respecting entities
This commit is contained in:
parent
d854babf22
commit
aaee092a46
|
@ -6,11 +6,12 @@ from collections import deque
|
||||||
from html import escape, unescape
|
from html import escape, unescape
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
from .. import helpers
|
||||||
from ..tl.types import (
|
from ..tl.types import (
|
||||||
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
||||||
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
|
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
|
||||||
MessageEntityTextUrl, MessageEntityMentionName
|
MessageEntityTextUrl, MessageEntityMentionName
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Helpers from markdown.py
|
# Helpers from markdown.py
|
||||||
|
@ -126,7 +127,8 @@ def parse(html):
|
||||||
|
|
||||||
parser = HTMLToTelegramParser()
|
parser = HTMLToTelegramParser()
|
||||||
parser.feed(_add_surrogate(html))
|
parser.feed(_add_surrogate(html))
|
||||||
return _del_surrogate(parser.text), parser.entities
|
text = helpers.strip_text(parser.text, parser.entities)
|
||||||
|
return _del_surrogate(text), parser.entities
|
||||||
|
|
||||||
|
|
||||||
def unparse(text, entities):
|
def unparse(text, entities):
|
||||||
|
|
|
@ -5,12 +5,12 @@ since they seem to count as two characters and it's a bit strange.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ..helpers import add_surrogate, del_surrogate
|
from ..helpers import add_surrogate, del_surrogate, strip_text
|
||||||
from ..tl import TLObject
|
from ..tl import TLObject
|
||||||
from ..tl.types import (
|
from ..tl.types import (
|
||||||
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
||||||
MessageEntityPre, MessageEntityTextUrl, MessageEntityMentionName
|
MessageEntityPre, MessageEntityTextUrl, MessageEntityMentionName
|
||||||
)
|
)
|
||||||
|
|
||||||
DEFAULT_DELIMITERS = {
|
DEFAULT_DELIMITERS = {
|
||||||
'**': MessageEntityBold,
|
'**': MessageEntityBold,
|
||||||
|
@ -125,6 +125,7 @@ def parse(message, delimiters=None, url_re=None):
|
||||||
+ message[current.offset:]
|
+ message[current.offset:]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
message = strip_text(message, result)
|
||||||
return del_surrogate(message), result
|
return del_surrogate(message), result
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,41 @@ def del_surrogate(text):
|
||||||
return text.encode('utf-16', 'surrogatepass').decode('utf-16')
|
return text.encode('utf-16', 'surrogatepass').decode('utf-16')
|
||||||
|
|
||||||
|
|
||||||
|
def strip_text(text, entities):
|
||||||
|
"""
|
||||||
|
Strips whitespace from the given text modifying the provided entities.
|
||||||
|
|
||||||
|
This assumes that there are no overlapping entities, that their length
|
||||||
|
is greater or equal to one, and that their length is not out of bounds.
|
||||||
|
"""
|
||||||
|
if not entities:
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
while text and text[-1].isspace():
|
||||||
|
e = entities[-1]
|
||||||
|
if e.offset + e.length == len(text):
|
||||||
|
if e.length == 1:
|
||||||
|
del entities[-1]
|
||||||
|
if not entities:
|
||||||
|
return text.strip()
|
||||||
|
else:
|
||||||
|
e.length -= 1
|
||||||
|
text = text[:-1]
|
||||||
|
|
||||||
|
while text and text[0].isspace():
|
||||||
|
e = entities[0]
|
||||||
|
if e.offset == 0:
|
||||||
|
if e.length == 1:
|
||||||
|
del entities[0]
|
||||||
|
if not entities:
|
||||||
|
return text.lstrip()
|
||||||
|
else:
|
||||||
|
e.length -= 1
|
||||||
|
text = text[1:]
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
# endregion
|
# endregion
|
||||||
|
|
||||||
# region Cryptographic related utils
|
# region Cryptographic related utils
|
||||||
|
|
Loading…
Reference in New Issue
Block a user