Remove text stripping

This commit is contained in:
Lonami Exo 2024-03-16 19:32:27 +01:00
parent 033b56f1d3
commit 0e48a01ef4
2 changed files with 3 additions and 54 deletions

View File

@ -30,7 +30,7 @@ from ...tl.types import (
MessageEntityUnderline,
MessageEntityUrl,
)
from .strings import add_surrogate, del_surrogate, strip_text, within_surrogate
from .strings import add_surrogate, del_surrogate, within_surrogate
class HTMLToTelegramParser(HTMLParser):
@ -141,8 +141,7 @@ def parse(html: str) -> Tuple[str, List[MessageEntity]]:
parser = HTMLToTelegramParser()
parser.feed(add_surrogate(html))
text = strip_text(parser.text, parser.entities)
return del_surrogate(text), parser.entities
return del_surrogate(parser.text), parser.entities
ENTITY_TO_FORMATTER: Dict[

View File

@ -1,7 +1,5 @@
import struct
from typing import List, Optional
from ...tl.abcs import MessageEntity
from typing import Optional
def add_surrogate(text: str) -> str:
@ -33,51 +31,3 @@ def within_surrogate(text: str, index: int, *, length: Optional[int] = None) ->
and "\ud800" <= text[index - 1] <= "\udfff" # previous is
and "\ud800" <= text[index] <= "\udfff" # current is
)
def strip_text(text: str, entities: List[MessageEntity]) -> str:
"""
Strips whitespace from the given text modifying the provided entities.
This assumes that there are no overlapping entities, that their length
is greater or equal to one, and that their length is not out of bounds.
"""
if not entities:
return text.strip()
assert all(isinstance(getattr(e, "offset"), int) for e in entities)
while text and text[-1].isspace():
e = entities[-1]
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
assert isinstance(offset, int) and isinstance(length, int)
if offset + length == len(text):
if length == 1:
del entities[-1]
if not entities:
return text.strip()
else:
length -= 1
text = text[:-1]
while text and text[0].isspace():
for i in reversed(range(len(entities))):
e = entities[i]
offset, length = getattr(e, "offset", None), getattr(e, "length", None)
assert isinstance(offset, int) and isinstance(length, int)
if offset != 0:
setattr(e, "offset", offset - 1)
continue
if length == 1:
del entities[0]
if not entities:
return text.lstrip()
else:
setattr(e, "length", length - 1)
text = text[1:]
return text