mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2024-11-10 19:46:36 +03:00
Remove text stripping
This commit is contained in:
parent
033b56f1d3
commit
0e48a01ef4
|
@ -30,7 +30,7 @@ from ...tl.types import (
|
|||
MessageEntityUnderline,
|
||||
MessageEntityUrl,
|
||||
)
|
||||
from .strings import add_surrogate, del_surrogate, strip_text, within_surrogate
|
||||
from .strings import add_surrogate, del_surrogate, within_surrogate
|
||||
|
||||
|
||||
class HTMLToTelegramParser(HTMLParser):
|
||||
|
@ -141,8 +141,7 @@ def parse(html: str) -> Tuple[str, List[MessageEntity]]:
|
|||
|
||||
parser = HTMLToTelegramParser()
|
||||
parser.feed(add_surrogate(html))
|
||||
text = strip_text(parser.text, parser.entities)
|
||||
return del_surrogate(text), parser.entities
|
||||
return del_surrogate(parser.text), parser.entities
|
||||
|
||||
|
||||
ENTITY_TO_FORMATTER: Dict[
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
import struct
|
||||
from typing import List, Optional
|
||||
|
||||
from ...tl.abcs import MessageEntity
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def add_surrogate(text: str) -> str:
|
||||
|
@ -33,51 +31,3 @@ def within_surrogate(text: str, index: int, *, length: Optional[int] = None) ->
|
|||
and "\ud800" <= text[index - 1] <= "\udfff" # previous is
|
||||
and "\ud800" <= text[index] <= "\udfff" # current is
|
||||
)
|
||||
|
||||
|
||||
def strip_text(text: str, entities: List[MessageEntity]) -> str:
|
||||
"""
|
||||
Strips whitespace from the given text modifying the provided entities.
|
||||
|
||||
This assumes that there are no overlapping entities, that their length
|
||||
is greater or equal to one, and that their length is not out of bounds.
|
||||
"""
|
||||
if not entities:
|
||||
return text.strip()
|
||||
|
||||
assert all(isinstance(getattr(e, "offset"), int) for e in entities)
|
||||
|
||||
while text and text[-1].isspace():
|
||||
e = entities[-1]
|
||||
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
|
||||
assert isinstance(offset, int) and isinstance(length, int)
|
||||
|
||||
if offset + length == len(text):
|
||||
if length == 1:
|
||||
del entities[-1]
|
||||
if not entities:
|
||||
return text.strip()
|
||||
else:
|
||||
length -= 1
|
||||
text = text[:-1]
|
||||
|
||||
while text and text[0].isspace():
|
||||
for i in reversed(range(len(entities))):
|
||||
e = entities[i]
|
||||
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
|
||||
assert isinstance(offset, int) and isinstance(length, int)
|
||||
|
||||
if offset != 0:
|
||||
setattr(e, "offset", offset - 1)
|
||||
continue
|
||||
|
||||
if length == 1:
|
||||
del entities[0]
|
||||
if not entities:
|
||||
return text.lstrip()
|
||||
else:
|
||||
setattr(e, "length", length - 1)
|
||||
|
||||
text = text[1:]
|
||||
|
||||
return text
|
||||
|
|
Loading…
Reference in New Issue
Block a user