Introduce trace-level logs to MessageBox

These will log sensitive information.
They are disabled when running with PYTHONOPTIMIZED.
They can only be enabled by setting a level lower than DEBUG,
which is difficult to do on accident.
This commit is contained in:
Lonami Exo 2022-12-17 23:13:06 +01:00
parent 5080715565
commit c72c7b160a
2 changed files with 43 additions and 2 deletions

View File

@ -19,6 +19,7 @@ to get the difference.
import asyncio import asyncio
import datetime import datetime
import time import time
import logging
from enum import Enum from enum import Enum
from .session import SessionState, ChannelState from .session import SessionState, ChannelState
from ..tl import types as tl, functions as fn from ..tl import types as tl, functions as fn
@ -51,6 +52,10 @@ ENTRY_ACCOUNT = object()
ENTRY_SECRET = object() ENTRY_SECRET = object()
# Integers will be Channel-specific `pts`, and includes "megagroup", "broadcast" and "supergroup" channels. # Integers will be Channel-specific `pts`, and includes "megagroup", "broadcast" and "supergroup" channels.
# Python's logging doesn't define a TRACE level. Pick halfway between DEBUG and NOTSET.
# We don't define a name for this as libraries shouldn't do that though.
LOG_LEVEL_TRACE = (logging.DEBUG - logging.NOTSET) // 2
_sentinel = object() _sentinel = object()
def next_updates_deadline(): def next_updates_deadline():
@ -155,10 +160,11 @@ class PossibleGap:
# #
# See https://core.telegram.org/api/updates#message-related-event-sequences. # See https://core.telegram.org/api/updates#message-related-event-sequences.
class MessageBox: class MessageBox:
__slots__ = ('map', 'date', 'seq', 'next_deadline', 'possible_gaps', 'getting_diff_for', 'reset_deadlines_for') __slots__ = ('_log', 'map', 'date', 'seq', 'next_deadline', 'possible_gaps', 'getting_diff_for', 'reset_deadlines_for')
def __init__( def __init__(
self, self,
log,
# Map each entry to their current state. # Map each entry to their current state.
map: dict = _sentinel, # entry -> state map: dict = _sentinel, # entry -> state
@ -185,6 +191,7 @@ class MessageBox:
# Stored in the message box in order to reuse the allocation. # Stored in the message box in order to reuse the allocation.
reset_deadlines_for: set = _sentinel # entry reset_deadlines_for: set = _sentinel # entry
): ):
self._log = log
self.map = {} if map is _sentinel else map self.map = {} if map is _sentinel else map
self.date = date self.date = date
self.seq = seq self.seq = seq
@ -193,12 +200,29 @@ class MessageBox:
self.getting_diff_for = set() if getting_diff_for is _sentinel else getting_diff_for self.getting_diff_for = set() if getting_diff_for is _sentinel else getting_diff_for
self.reset_deadlines_for = set() if reset_deadlines_for is _sentinel else reset_deadlines_for self.reset_deadlines_for = set() if reset_deadlines_for is _sentinel else reset_deadlines_for
if __debug__:
# Need this to tell them apart when printing the repr of the state map.
# Could be done once at the global level, but that makes configuring logging
# more annoying because it would need to be done before importing telethon.
self._trace('ENTRY_ACCOUNT = %r; ENTRY_SECRET = %r', ENTRY_ACCOUNT, ENTRY_SECRET)
self._trace('Created new MessageBox with map = %r, date = %r, seq = %r', self.map, self.date, self.seq)
def _trace(self, msg, *args, **kwargs):
# Calls to trace can't really be removed beforehand without some dark magic.
# So every call to trace is prefixed with `if __debug__`` instead, to remove
# it when using `python -O`. Probably unnecessary, but it's nice to avoid
# paying the cost for something that is not used.
self._log.log(LOG_LEVEL_TRACE, msg, *args, **kwargs)
# region Creation, querying, and setting base state. # region Creation, querying, and setting base state.
def load(self, session_state, channel_states): def load(self, session_state, channel_states):
""" """
Create a [`MessageBox`] from a previously known update state. Create a [`MessageBox`] from a previously known update state.
""" """
if __debug__:
self._trace('Loading MessageBox with session_state = %r, channel_states = %r', session_state, channel_states)
deadline = next_updates_deadline() deadline = next_updates_deadline()
self.map.clear() self.map.clear()
@ -304,6 +328,9 @@ class MessageBox:
# Should be called right after login if [`MessageBox::new`] was used, otherwise undesirable # Should be called right after login if [`MessageBox::new`] was used, otherwise undesirable
# updates will be fetched. # updates will be fetched.
def set_state(self, state, reset=True): def set_state(self, state, reset=True):
if __debug__:
self._trace('Setting state %r', state)
deadline = next_updates_deadline() deadline = next_updates_deadline()
if state.pts != NO_SEQ or not reset: if state.pts != NO_SEQ or not reset:
@ -330,6 +357,9 @@ class MessageBox:
# #
# The update state will only be updated if no entry was known previously. # The update state will only be updated if no entry was known previously.
def try_set_channel_state(self, id, pts): def try_set_channel_state(self, id, pts):
if __debug__:
self._trace('Trying to set channel state for %r: %r', id, pts)
if id not in self.map: if id not in self.map:
self.map[id] = State(pts=pts, deadline=next_updates_deadline()) self.map[id] = State(pts=pts, deadline=next_updates_deadline())
@ -383,6 +413,9 @@ class MessageBox:
chat_hashes, chat_hashes,
result, # out list of updates; returns list of user, chat, or raise if gap result, # out list of updates; returns list of user, chat, or raise if gap
): ):
if __debug__:
self._trace('Processing updates %s', updates)
date = getattr(updates, 'date', None) date = getattr(updates, 'date', None)
if date is None: if date is None:
# updatesTooLong is the only one with no date (we treat it as a gap) # updatesTooLong is the only one with no date (we treat it as a gap)
@ -569,6 +602,9 @@ class MessageBox:
diff, diff,
chat_hashes, chat_hashes,
): ):
if __debug__:
self._trace('Applying account difference %s', diff)
finish = None finish = None
result = None result = None
@ -692,6 +728,9 @@ class MessageBox:
chat_hashes, chat_hashes,
): ):
entry = request.channel.channel_id entry = request.channel.channel_id
if __debug__:
self._trace('Applying channel difference for %r: %s', entry, diff)
self.possible_gaps.pop(entry, None) self.possible_gaps.pop(entry, None)
if isinstance(diff, tl.updates.ChannelDifferenceEmpty): if isinstance(diff, tl.updates.ChannelDifferenceEmpty):
@ -735,6 +774,8 @@ class MessageBox:
def end_channel_difference(self, request, reason: PrematureEndReason, chat_hashes): def end_channel_difference(self, request, reason: PrematureEndReason, chat_hashes):
entry = request.channel.channel_id entry = request.channel.channel_id
if __debug__:
self._trace('Ending channel difference for %r', entry)
if reason == PrematureEndReason.TEMPORARY_SERVER_ISSUES: if reason == PrematureEndReason.TEMPORARY_SERVER_ISSUES:
# Temporary issues. End getting difference without updating the pts so we can retry later. # Temporary issues. End getting difference without updating the pts so we can retry later.

View File

@ -432,7 +432,7 @@ class TelegramBaseClient(abc.ABC):
# This is backported from v2 in a very ad-hoc way just to get proper update handling # This is backported from v2 in a very ad-hoc way just to get proper update handling
self._catch_up = catch_up self._catch_up = catch_up
self._updates_queue = asyncio.Queue() self._updates_queue = asyncio.Queue()
self._message_box = MessageBox() self._message_box = MessageBox(self._log['messagebox'])
# This entity cache is tailored for the messagebox and is not used for absolutely everything like _entity_cache # This entity cache is tailored for the messagebox and is not used for absolutely everything like _entity_cache
self._mb_entity_cache = MbEntityCache() # required for proper update handling (to know when to getDifference) self._mb_entity_cache = MbEntityCache() # required for proper update handling (to know when to getDifference)