From 7142734fb433c7867cd4f790aa26b50afa0dee87 Mon Sep 17 00:00:00 2001 From: Lonami Exo Date: Tue, 18 Jan 2022 18:19:14 +0100 Subject: [PATCH] Remove StateCache and EntityCache --- telethon/_client/auth.py | 2 - telethon/_client/telegrambaseclient.py | 8 -- telethon/_client/updates.py | 15 +-- telethon/_misc/entitycache.py | 179 ------------------------- telethon/_misc/statecache.py | 164 ---------------------- 5 files changed, 2 insertions(+), 366 deletions(-) delete mode 100644 telethon/_misc/entitycache.py delete mode 100644 telethon/_misc/statecache.py diff --git a/telethon/_client/auth.py b/telethon/_client/auth.py index 69b5df7e..7d122d8f 100644 --- a/telethon/_client/auth.py +++ b/telethon/_client/auth.py @@ -375,8 +375,6 @@ async def log_out(self: 'TelegramClient') -> bool: except errors.RPCError: return False - self._state_cache.reset() - await self.disconnect() return True diff --git a/telethon/_client/telegrambaseclient.py b/telethon/_client/telegrambaseclient.py index c8c60df1..9fbf3f14 100644 --- a/telethon/_client/telegrambaseclient.py +++ b/telethon/_client/telegrambaseclient.py @@ -239,10 +239,6 @@ async def connect(self: 'TelegramClient') -> None: ) all_dcs[dc.id] = dc - # Update state (for catching up after a disconnection) - # TODO Get state from channels too - self._state_cache = statecache.StateCache(self._session_state, self._log) - # Use known key, if any self._sender.auth_key.key = dc.auth @@ -351,10 +347,6 @@ async def _disconnect_coro(self: 'TelegramClient'): await asyncio.wait(self._updates_queue) self._updates_queue.clear() - pts, date = self._state_cache[None] - if pts and date: - if self._session_state: - await self._replace_session_state(pts=pts, date=date) async def _disconnect(self: 'TelegramClient'): """ diff --git a/telethon/_client/updates.py b/telethon/_client/updates.py index 141f6b95..92515dae 100644 --- a/telethon/_client/updates.py +++ b/telethon/_client/updates.py @@ -79,10 +79,7 @@ def list_event_handlers(self: 'TelegramClient')\ return [(callback, event) for event, callback in self._event_builders] async def catch_up(self: 'TelegramClient'): - pts, date = self._state_cache[None] - if not pts: - return - + return self._catching_up = True try: while True: @@ -131,8 +128,6 @@ async def catch_up(self: 'TelegramClient'): except (ConnectionError, asyncio.CancelledError): pass finally: - # TODO Save new pts to session - self._state_cache._pts_date = (pts, date) self._catching_up = False @@ -150,14 +145,12 @@ def _handle_update(self: 'TelegramClient', update): else: _process_update(self, update, {}, None) - self._state_cache.update(update) def _process_update(self: 'TelegramClient', update, entities, others): # This part is somewhat hot so we don't bother patching # update with channel ID/its state. Instead we just pass # arguments which is faster. - channel_id = self._state_cache.get_channel_id(update) - args = (update, entities, others, channel_id, self._state_cache[channel_id]) + args = (update, entities, others, channel_id, None) if self._dispatching_updates_queue is None: task = asyncio.create_task(_dispatch_update(self, *args)) self._updates_queue.add(task) @@ -168,8 +161,6 @@ def _process_update(self: 'TelegramClient', update, entities, others): self._dispatching_updates_queue.set() asyncio.create_task(_dispatch_queue_updates(self)) - self._state_cache.update(update) - async def _update_loop(self: 'TelegramClient'): # Pings' ID don't really need to be secure, just "random" rnd = lambda: random.randrange(-2**63, 2**63) @@ -326,7 +317,6 @@ async def _get_difference(self: 'TelegramClient', update, entities, channel_id, result = await self(_tl.fn.channels.GetFullChannel( utils.get_input_channel(where) )) - self._state_cache[channel_id] = result.full_chat.pts return result = await self(_tl.fn.updates.GetChannelDifference( @@ -340,7 +330,6 @@ async def _get_difference(self: 'TelegramClient', update, entities, channel_id, if not pts_date[0]: # First-time, can't get difference. Get pts instead. result = await self(_tl.fn.updates.GetState()) - self._state_cache[None] = result.pts, result.date return result = await self(_tl.fn.updates.GetDifference( diff --git a/telethon/_misc/entitycache.py b/telethon/_misc/entitycache.py deleted file mode 100644 index 2b7b8af1..00000000 --- a/telethon/_misc/entitycache.py +++ /dev/null @@ -1,179 +0,0 @@ -import inspect -import itertools - -from .._misc import utils -from .. import _tl -from .._sessions.types import EntityType, Entity - -# Which updates have the following fields? -_has_field = { - ('user_id', int): [], - ('chat_id', int): [], - ('channel_id', int): [], - ('peer', 'TypePeer'): [], - ('peer', 'TypeDialogPeer'): [], - ('message', 'TypeMessage'): [], -} - -# Note: We don't bother checking for some rare: -# * `UpdateChatParticipantAdd.inviter_id` integer. -# * `UpdateNotifySettings.peer` dialog peer. -# * `UpdatePinnedDialogs.order` list of dialog peers. -# * `UpdateReadMessagesContents.messages` list of messages. -# * `UpdateChatParticipants.participants` list of participants. -# -# There are also some uninteresting `update.message` of type string. - - -def _fill(): - for name in dir(_tl): - update = getattr(_tl, name) - if getattr(update, 'SUBCLASS_OF_ID', None) == 0x9f89304e: - cid = update.CONSTRUCTOR_ID - sig = inspect.signature(update.__init__) - for param in sig.parameters.values(): - vec = _has_field.get((param.name, param.annotation)) - if vec is not None: - vec.append(cid) - - # Future-proof check: if the documentation format ever changes - # then we won't be able to pick the update types we are interested - # in, so we must make sure we have at least an update for each field - # which likely means we are doing it right. - if not all(_has_field.values()): - raise RuntimeError('FIXME: Did the init signature or updates change?') - - -# We use a function to avoid cluttering the globals (with name/update/cid/doc) -_fill() - - -class EntityCache: - """ - In-memory input entity cache, defaultdict-like behaviour. - """ - def add(self, entities, _mappings={ - _tl.User.CONSTRUCTOR_ID: lambda e: (EntityType.BOT if e.bot else EntityType.USER, e.id, e.access_hash), - _tl.UserFull.CONSTRUCTOR_ID: lambda e: (EntityType.BOT if e.user.bot else EntityType.USER, e.user.id, e.user.access_hash), - _tl.Chat.CONSTRUCTOR_ID: lambda e: (EntityType.GROUP, e.id, 0), - _tl.ChatFull.CONSTRUCTOR_ID: lambda e: (EntityType.GROUP, e.id, 0), - _tl.ChatEmpty.CONSTRUCTOR_ID: lambda e: (EntityType.GROUP, e.id, 0), - _tl.ChatForbidden.CONSTRUCTOR_ID: lambda e: (EntityType.GROUP, e.id, 0), - _tl.Channel.CONSTRUCTOR_ID: lambda e: ( - EntityType.MEGAGROUP if e.megagroup else (EntityType.GIGAGROUP if e.gigagroup else EntityType.CHANNEL), - e.id, - e.access_hash, - ), - _tl.ChannelForbidden.CONSTRUCTOR_ID: lambda e: (EntityType.MEGAGROUP if e.megagroup else EntityType.CHANNEL, e.id, e.access_hash), - }): - """ - Adds the given entities to the cache, if they weren't saved before. - - Returns a list of Entity that can be saved in the session. - """ - if not utils.is_list_like(entities): - # Invariant: all "chats" and "users" are always iterables, - # and "user" and "chat" never are (so we wrap them inside a list). - # - # Itself may be already the entity we want to cache. - entities = itertools.chain( - [entities], - getattr(entities, 'chats', []), - getattr(entities, 'users', []), - (hasattr(entities, 'user') and [entities.user]) or [], - (hasattr(entities, 'chat') and [entities.user]) or [], - ) - - rows = [] - for e in entities: - try: - mapper = _mappings[e.CONSTRUCTOR_ID] - except (AttributeError, KeyError): - continue - - ty, id, access_hash = mapper(e) - - # Need to check for non-zero access hash unless it's a group (#354 and #392). - # Also check it's not `min` (`access_hash` usage is limited since layer 102). - if not getattr(e, 'min', False) and (access_hash or ty == Entity.GROUP): - rows.append(Entity(ty, id, access_hash)) - if id not in self.__dict__: - if ty in (EntityType.USER, EntityType.BOT): - self.__dict__[id] = _tl.InputPeerUser(id, access_hash) - elif ty in (EntityType.GROUP,): - self.__dict__[id] = _tl.InputPeerChat(id) - elif ty in (EntityType.CHANNEL, EntityType.MEGAGROUP, EntityType.GIGAGROUP): - self.__dict__[id] = _tl.InputPeerChannel(id, access_hash) - - return rows - - def __getitem__(self, item): - """ - Gets the corresponding :tl:`InputPeer` for the given ID or peer, - or raises ``KeyError`` on any error (i.e. cannot be found). - """ - if not isinstance(item, int) or item < 0: - try: - return self.__dict__[utils.get_peer_id(item)] - except TypeError: - raise KeyError('Invalid key will not have entity') from None - - for cls in (_tl.PeerUser, _tl.PeerChat, _tl.PeerChannel): - result = self.__dict__.get(utils.get_peer_id(cls(item))) - if result: - return result - - raise KeyError('No cached entity for the given key') - - def clear(self): - """ - Clear the entity cache. - """ - self.__dict__.clear() - - def ensure_cached( - self, - update, - has_user_id=frozenset(_has_field[('user_id', int)]), - has_chat_id=frozenset(_has_field[('chat_id', int)]), - has_channel_id=frozenset(_has_field[('channel_id', int)]), - has_peer=frozenset(_has_field[('peer', 'TypePeer')] + _has_field[('peer', 'TypeDialogPeer')]), - has_message=frozenset(_has_field[('message', 'TypeMessage')]) - ): - """ - Ensures that all the relevant entities in the given update are cached. - """ - # This method is called pretty often and we want it to have the lowest - # overhead possible. For that, we avoid `isinstance` and constantly - # getting attributes out of `_tl.` by "caching" the constructor IDs - # in sets inside the arguments, and using local variables. - dct = self.__dict__ - cid = update.CONSTRUCTOR_ID - if cid in has_user_id and \ - update.user_id not in dct: - return False - - if cid in has_chat_id and update.chat_id not in dct: - return False - - if cid in has_channel_id and update.channel_id not in dct: - return False - - if cid in has_peer and \ - utils.get_peer_id(update.peer) not in dct: - return False - - if cid in has_message: - x = update.message - y = getattr(x, 'peer_id', None) # handle MessageEmpty - if y and utils.get_peer_id(y) not in dct: - return False - - y = getattr(x, 'from_id', None) - if y and utils.get_peer_id(y) not in dct: - return False - - # We don't quite worry about entities anywhere else. - # This is enough. - - return True diff --git a/telethon/_misc/statecache.py b/telethon/_misc/statecache.py deleted file mode 100644 index c1a6d7c9..00000000 --- a/telethon/_misc/statecache.py +++ /dev/null @@ -1,164 +0,0 @@ -import inspect - -from .. import _tl - - -# Which updates have the following fields? -_has_channel_id = [] - - -# TODO EntityCache does the same. Reuse? -def _fill(): - for name in dir(_tl): - update = getattr(_tl, name) - if getattr(update, 'SUBCLASS_OF_ID', None) == 0x9f89304e: - cid = update.CONSTRUCTOR_ID - sig = inspect.signature(update.__init__) - for param in sig.parameters.values(): - if param.name == 'channel_id' and param.annotation == int: - _has_channel_id.append(cid) - - if not _has_channel_id: - raise RuntimeError('FIXME: Did the init signature or updates change?') - - -# We use a function to avoid cluttering the globals (with name/update/cid/doc) -_fill() - - -class StateCache: - """ - In-memory update state cache, defaultdict-like behaviour. - """ - def __init__(self, initial, loggers): - # We only care about the pts and the date. By using a tuple which - # is lightweight and immutable we can easily copy them around to - # each update in case they need to fetch missing entities. - self._logger = loggers[__name__] - if initial: - self._pts_date = initial.pts or None, initial.date or None - else: - self._pts_date = None, None - - def reset(self): - self.__dict__.clear() - self._pts_date = None, None - - # TODO Call this when receiving responses too...? - def update( - self, - update, - *, - channel_id=None, - has_pts=frozenset(x.CONSTRUCTOR_ID for x in ( - _tl.UpdateNewMessage, - _tl.UpdateDeleteMessages, - _tl.UpdateReadHistoryInbox, - _tl.UpdateReadHistoryOutbox, - _tl.UpdateWebPage, - _tl.UpdateReadMessagesContents, - _tl.UpdateEditMessage, - _tl.updates.State, - _tl.updates.DifferenceTooLong, - _tl.UpdateShortMessage, - _tl.UpdateShortChatMessage, - _tl.UpdateShortSentMessage - )), - has_date=frozenset(x.CONSTRUCTOR_ID for x in ( - _tl.UpdateUserPhoto, - _tl.UpdateEncryption, - _tl.UpdateEncryptedMessagesRead, - _tl.UpdateChatParticipantAdd, - _tl.updates.DifferenceEmpty, - _tl.UpdateShortMessage, - _tl.UpdateShortChatMessage, - _tl.UpdateShort, - _tl.UpdatesCombined, - _tl.Updates, - _tl.UpdateShortSentMessage, - )), - has_channel_pts=frozenset(x.CONSTRUCTOR_ID for x in ( - _tl.UpdateChannelTooLong, - _tl.UpdateNewChannelMessage, - _tl.UpdateDeleteChannelMessages, - _tl.UpdateEditChannelMessage, - _tl.UpdateChannelWebPage, - _tl.updates.ChannelDifferenceEmpty, - _tl.updates.ChannelDifferenceTooLong, - _tl.updates.ChannelDifference - )), - check_only=False - ): - """ - Update the state with the given update. - """ - cid = update.CONSTRUCTOR_ID - if check_only: - return cid in has_pts or cid in has_date or cid in has_channel_pts - - if cid in has_pts: - if cid in has_date: - self._pts_date = update.pts, update.date - else: - self._pts_date = update.pts, self._pts_date[1] - elif cid in has_date: - self._pts_date = self._pts_date[0], update.date - - if cid in has_channel_pts: - if channel_id is None: - channel_id = self.get_channel_id(update) - - if channel_id is None: - self._logger.info( - 'Failed to retrieve channel_id from %s', update) - else: - self.__dict__[channel_id] = update.pts - - def get_channel_id( - self, - update, - has_channel_id=frozenset(_has_channel_id), - # Hardcoded because only some with message are for channels - has_message=frozenset(x.CONSTRUCTOR_ID for x in ( - _tl.UpdateNewChannelMessage, - _tl.UpdateEditChannelMessage - )) - ): - """ - Gets the **unmarked** channel ID from this update, if it has any. - - Fails for ``*difference`` updates, where ``channel_id`` - is supposedly already known from the outside. - """ - cid = update.CONSTRUCTOR_ID - if cid in has_channel_id: - return update.channel_id - elif cid in has_message: - if update.message.peer_id is None: - # Telegram sometimes sends empty messages to give a newer pts: - # UpdateNewChannelMessage(message=MessageEmpty(id), pts=pts, pts_count=1) - # Not sure why, but it's safe to ignore them. - self._logger.debug('Update has None peer_id %s', update) - else: - return update.message.peer_id.channel_id - - return None - - def __getitem__(self, item): - """ - If `item` is `None`, returns the default ``(pts, date)``. - - If it's an **unmarked** channel ID, returns its ``pts``. - - If no information is known, ``pts`` will be `None`. - """ - if item is None: - return self._pts_date - else: - return self.__dict__.get(item) - - def __setitem__(self, where, value): - if where is None: - self._pts_date = value - else: - self.__dict__[where] = value