Introduce trace-level logs to MessageBox

These will log sensitive information. They are disabled when running with PYTHONOPTIMIZED. They can only be enabled by setting a level lower than DEBUG, which is difficult to do on accident.
2025-08-03 03:30:09 +03:00 · 2022-12-17 23:13:06 +01:00 · 2022-12-17 23:13:06 +01:00 · c72c7b160a
commit c72c7b160a
parent 5080715565
2 changed files with 43 additions and 2 deletions
--- a/telethon/_updates/messagebox.py
+++ b/telethon/_updates/messagebox.py
@ -19,6 +19,7 @@ to get the difference.
 import asyncio
 import datetime
 import time
+import logging
 from enum import Enum
 from .session import SessionState, ChannelState
 from ..tl import types as tl, functions as fn
@ -51,6 +52,10 @@ ENTRY_ACCOUNT = object()
 ENTRY_SECRET = object()
 # Integers will be Channel-specific `pts`, and includes "megagroup", "broadcast" and "supergroup" channels.

+# Python's logging doesn't define a TRACE level. Pick halfway between DEBUG and NOTSET.
+# We don't define a name for this as libraries shouldn't do that though.
+LOG_LEVEL_TRACE = (logging.DEBUG - logging.NOTSET) // 2
+
 _sentinel = object()

 def next_updates_deadline():
@ -155,10 +160,11 @@ class PossibleGap:
 #
 # See https://core.telegram.org/api/updates#message-related-event-sequences.
 class MessageBox:
-    __slots__ = ('map', 'date', 'seq', 'next_deadline', 'possible_gaps', 'getting_diff_for', 'reset_deadlines_for')
+    __slots__ = ('_log', 'map', 'date', 'seq', 'next_deadline', 'possible_gaps', 'getting_diff_for', 'reset_deadlines_for')

    def __init__(
        self,
+        log,
        # Map each entry to their current state.
        map: dict = _sentinel,  # entry -> state

@ -185,6 +191,7 @@ class MessageBox:
        # Stored in the message box in order to reuse the allocation.
        reset_deadlines_for: set = _sentinel  # entry
    ):
+        self._log = log
        self.map = {} if map is _sentinel else map
        self.date = date
        self.seq = seq
@ -193,12 +200,29 @@ class MessageBox:
        self.getting_diff_for = set() if getting_diff_for is _sentinel else getting_diff_for
        self.reset_deadlines_for = set() if reset_deadlines_for is _sentinel else reset_deadlines_for

+        if __debug__:
+            # Need this to tell them apart when printing the repr of the state map.
+            # Could be done once at the global level, but that makes configuring logging
+            # more annoying because it would need to be done before importing telethon.
+            self._trace('ENTRY_ACCOUNT = %r; ENTRY_SECRET = %r', ENTRY_ACCOUNT, ENTRY_SECRET)
+            self._trace('Created new MessageBox with map = %r, date = %r, seq = %r', self.map, self.date, self.seq)
+
+    def _trace(self, msg, *args, **kwargs):
+        # Calls to trace can't really be removed beforehand without some dark magic.
+        # So every call to trace is prefixed with `if __debug__`` instead, to remove
+        # it when using `python -O`. Probably unnecessary, but it's nice to avoid
+        # paying the cost for something that is not used.
+        self._log.log(LOG_LEVEL_TRACE, msg, *args, **kwargs)
+
    # region Creation, querying, and setting base state.

    def load(self, session_state, channel_states):
        """
        Create a [`MessageBox`] from a previously known update state.
        """
+        if __debug__:
+            self._trace('Loading MessageBox with session_state = %r, channel_states = %r', session_state, channel_states)
+
        deadline = next_updates_deadline()

        self.map.clear()
@ -304,6 +328,9 @@ class MessageBox:
    # Should be called right after login if [`MessageBox::new`] was used, otherwise undesirable
    # updates will be fetched.
    def set_state(self, state, reset=True):
+        if __debug__:
+            self._trace('Setting state %r', state)
+
        deadline = next_updates_deadline()

        if state.pts != NO_SEQ or not reset:
@ -330,6 +357,9 @@ class MessageBox:
    #
    # The update state will only be updated if no entry was known previously.
    def try_set_channel_state(self, id, pts):
+        if __debug__:
+            self._trace('Trying to set channel state for %r: %r', id, pts)
+
        if id not in self.map:
            self.map[id] = State(pts=pts, deadline=next_updates_deadline())

@ -383,6 +413,9 @@ class MessageBox:
        chat_hashes,
        result,  # out list of updates; returns list of user, chat, or raise if gap
    ):
+        if __debug__:
+            self._trace('Processing updates %s', updates)
+
        date = getattr(updates, 'date', None)
        if date is None:
            # updatesTooLong is the only one with no date (we treat it as a gap)
@ -569,6 +602,9 @@ class MessageBox:
        diff,
        chat_hashes,
    ):
+        if __debug__:
+            self._trace('Applying account difference %s', diff)
+
        finish = None
        result = None

@ -692,6 +728,9 @@ class MessageBox:
        chat_hashes,
    ):
        entry = request.channel.channel_id
+        if __debug__:
+            self._trace('Applying channel difference for %r: %s', entry, diff)
+
        self.possible_gaps.pop(entry, None)

        if isinstance(diff, tl.updates.ChannelDifferenceEmpty):
@ -735,6 +774,8 @@ class MessageBox:

    def end_channel_difference(self, request, reason: PrematureEndReason, chat_hashes):
        entry = request.channel.channel_id
+        if __debug__:
+            self._trace('Ending channel difference for %r', entry)

        if reason == PrematureEndReason.TEMPORARY_SERVER_ISSUES:
            # Temporary issues. End getting difference without updating the pts so we can retry later.
--- a/telethon/client/telegrambaseclient.py
+++ b/telethon/client/telegrambaseclient.py
@ -432,7 +432,7 @@ class TelegramBaseClient(abc.ABC):
        # This is backported from v2 in a very ad-hoc way just to get proper update handling
        self._catch_up = catch_up
        self._updates_queue = asyncio.Queue()
-        self._message_box = MessageBox()
+        self._message_box = MessageBox(self._log['messagebox'])
        # This entity cache is tailored for the messagebox and is not used for absolutely everything like _entity_cache
        self._mb_entity_cache = MbEntityCache()  # required for proper update handling (to know when to getDifference)