mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2024-11-10 19:46:36 +03:00
Locally strip outgoing message text respecting entities
This commit is contained in:
parent
d854babf22
commit
aaee092a46
|
@ -6,6 +6,7 @@ from collections import deque
|
|||
from html import escape, unescape
|
||||
from html.parser import HTMLParser
|
||||
|
||||
from .. import helpers
|
||||
from ..tl.types import (
|
||||
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
||||
MessageEntityPre, MessageEntityEmail, MessageEntityUrl,
|
||||
|
@ -126,7 +127,8 @@ def parse(html):
|
|||
|
||||
parser = HTMLToTelegramParser()
|
||||
parser.feed(_add_surrogate(html))
|
||||
return _del_surrogate(parser.text), parser.entities
|
||||
text = helpers.strip_text(parser.text, parser.entities)
|
||||
return _del_surrogate(text), parser.entities
|
||||
|
||||
|
||||
def unparse(text, entities):
|
||||
|
|
|
@ -5,7 +5,7 @@ since they seem to count as two characters and it's a bit strange.
|
|||
"""
|
||||
import re
|
||||
|
||||
from ..helpers import add_surrogate, del_surrogate
|
||||
from ..helpers import add_surrogate, del_surrogate, strip_text
|
||||
from ..tl import TLObject
|
||||
from ..tl.types import (
|
||||
MessageEntityBold, MessageEntityItalic, MessageEntityCode,
|
||||
|
@ -125,6 +125,7 @@ def parse(message, delimiters=None, url_re=None):
|
|||
+ message[current.offset:]
|
||||
)
|
||||
|
||||
message = strip_text(message, result)
|
||||
return del_surrogate(message), result
|
||||
|
||||
|
||||
|
|
|
@ -33,6 +33,41 @@ def del_surrogate(text):
|
|||
return text.encode('utf-16', 'surrogatepass').decode('utf-16')
|
||||
|
||||
|
||||
def strip_text(text, entities):
|
||||
"""
|
||||
Strips whitespace from the given text modifying the provided entities.
|
||||
|
||||
This assumes that there are no overlapping entities, that their length
|
||||
is greater or equal to one, and that their length is not out of bounds.
|
||||
"""
|
||||
if not entities:
|
||||
return text.strip()
|
||||
|
||||
while text and text[-1].isspace():
|
||||
e = entities[-1]
|
||||
if e.offset + e.length == len(text):
|
||||
if e.length == 1:
|
||||
del entities[-1]
|
||||
if not entities:
|
||||
return text.strip()
|
||||
else:
|
||||
e.length -= 1
|
||||
text = text[:-1]
|
||||
|
||||
while text and text[0].isspace():
|
||||
e = entities[0]
|
||||
if e.offset == 0:
|
||||
if e.length == 1:
|
||||
del entities[0]
|
||||
if not entities:
|
||||
return text.lstrip()
|
||||
else:
|
||||
e.length -= 1
|
||||
text = text[1:]
|
||||
|
||||
return text
|
||||
|
||||
|
||||
# endregion
|
||||
|
||||
# region Cryptographic related utils
|
||||
|
|
Loading…
Reference in New Issue
Block a user