mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-07-10 16:12:22 +03:00
Remove text stripping
This commit is contained in:
parent
033b56f1d3
commit
0e48a01ef4
|
@ -30,7 +30,7 @@ from ...tl.types import (
|
||||||
MessageEntityUnderline,
|
MessageEntityUnderline,
|
||||||
MessageEntityUrl,
|
MessageEntityUrl,
|
||||||
)
|
)
|
||||||
from .strings import add_surrogate, del_surrogate, strip_text, within_surrogate
|
from .strings import add_surrogate, del_surrogate, within_surrogate
|
||||||
|
|
||||||
|
|
||||||
class HTMLToTelegramParser(HTMLParser):
|
class HTMLToTelegramParser(HTMLParser):
|
||||||
|
@ -141,8 +141,7 @@ def parse(html: str) -> Tuple[str, List[MessageEntity]]:
|
||||||
|
|
||||||
parser = HTMLToTelegramParser()
|
parser = HTMLToTelegramParser()
|
||||||
parser.feed(add_surrogate(html))
|
parser.feed(add_surrogate(html))
|
||||||
text = strip_text(parser.text, parser.entities)
|
return del_surrogate(parser.text), parser.entities
|
||||||
return del_surrogate(text), parser.entities
|
|
||||||
|
|
||||||
|
|
||||||
ENTITY_TO_FORMATTER: Dict[
|
ENTITY_TO_FORMATTER: Dict[
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
import struct
|
import struct
|
||||||
from typing import List, Optional
|
from typing import Optional
|
||||||
|
|
||||||
from ...tl.abcs import MessageEntity
|
|
||||||
|
|
||||||
|
|
||||||
def add_surrogate(text: str) -> str:
|
def add_surrogate(text: str) -> str:
|
||||||
|
@ -33,51 +31,3 @@ def within_surrogate(text: str, index: int, *, length: Optional[int] = None) ->
|
||||||
and "\ud800" <= text[index - 1] <= "\udfff" # previous is
|
and "\ud800" <= text[index - 1] <= "\udfff" # previous is
|
||||||
and "\ud800" <= text[index] <= "\udfff" # current is
|
and "\ud800" <= text[index] <= "\udfff" # current is
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def strip_text(text: str, entities: List[MessageEntity]) -> str:
|
|
||||||
"""
|
|
||||||
Strips whitespace from the given text modifying the provided entities.
|
|
||||||
|
|
||||||
This assumes that there are no overlapping entities, that their length
|
|
||||||
is greater or equal to one, and that their length is not out of bounds.
|
|
||||||
"""
|
|
||||||
if not entities:
|
|
||||||
return text.strip()
|
|
||||||
|
|
||||||
assert all(isinstance(getattr(e, "offset"), int) for e in entities)
|
|
||||||
|
|
||||||
while text and text[-1].isspace():
|
|
||||||
e = entities[-1]
|
|
||||||
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
|
|
||||||
assert isinstance(offset, int) and isinstance(length, int)
|
|
||||||
|
|
||||||
if offset + length == len(text):
|
|
||||||
if length == 1:
|
|
||||||
del entities[-1]
|
|
||||||
if not entities:
|
|
||||||
return text.strip()
|
|
||||||
else:
|
|
||||||
length -= 1
|
|
||||||
text = text[:-1]
|
|
||||||
|
|
||||||
while text and text[0].isspace():
|
|
||||||
for i in reversed(range(len(entities))):
|
|
||||||
e = entities[i]
|
|
||||||
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
|
|
||||||
assert isinstance(offset, int) and isinstance(length, int)
|
|
||||||
|
|
||||||
if offset != 0:
|
|
||||||
setattr(e, "offset", offset - 1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if length == 1:
|
|
||||||
del entities[0]
|
|
||||||
if not entities:
|
|
||||||
return text.lstrip()
|
|
||||||
else:
|
|
||||||
setattr(e, "length", length - 1)
|
|
||||||
|
|
||||||
text = text[1:]
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user