From 97b0ba67071da8b68047999449dd6ec6f4f62ba2 Mon Sep 17 00:00:00 2001 From: Lonami Exo Date: Thu, 6 Apr 2023 13:45:12 +0200 Subject: [PATCH] Flush in-memory cache to session after a limit is reached Should fully close #3989. Should help with #3235. --- telethon/_updates/entitycache.py | 6 ++++++ telethon/client/telegrambaseclient.py | 18 +++++++++++++++++- telethon/client/updates.py | 19 +++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/telethon/_updates/entitycache.py b/telethon/_updates/entitycache.py index e0e9888a..0c28d3a2 100644 --- a/telethon/_updates/entitycache.py +++ b/telethon/_updates/entitycache.py @@ -52,3 +52,9 @@ class EntityCache: def put(self, entity): self.hash_map[entity.id] = (entity.hash, entity.ty) + + def retain(self, filter): + self.hash_map = {k: v for k, v in self.hash_map.items() if filter(k)} + + def __len__(self): + return len(self.hash_map) diff --git a/telethon/client/telegrambaseclient.py b/telethon/client/telegrambaseclient.py index 48eba037..63d69bb8 100644 --- a/telethon/client/telegrambaseclient.py +++ b/telethon/client/telegrambaseclient.py @@ -208,6 +208,20 @@ class TelegramBaseClient(abc.ABC): so event handlers, conversations, and QR login will not work. However, certain scripts don't need updates, so this will reduce the amount of bandwidth used. + + entity_cache_limit (`int`, optional): + How many users, chats and channels to keep in the in-memory cache + at most. This limit is checked against when processing updates. + + When this limit is reached or exceeded, all entities that are not + required for update handling will be flushed to the session file. + + Note that this implies that there is a lower bound to the amount + of entities that must be kept in memory. + + Setting this limit too low will cause the library to attempt to + flush entities to the session file even if no entities can be + removed from the in-memory cache, which will degrade performance. """ # Current TelegramClient version @@ -245,7 +259,8 @@ class TelegramBaseClient(abc.ABC): loop: asyncio.AbstractEventLoop = None, base_logger: typing.Union[str, logging.Logger] = None, receive_updates: bool = True, - catch_up: bool = False + catch_up: bool = False, + entity_cache_limit: int = 5000 ): if not api_id or not api_hash: raise ValueError( @@ -432,6 +447,7 @@ class TelegramBaseClient(abc.ABC): self._updates_queue = asyncio.Queue() self._message_box = MessageBox(self._log['messagebox']) self._mb_entity_cache = MbEntityCache() # required for proper update handling (to know when to getDifference) + self._entity_cache_limit = entity_cache_limit self._sender = MTProtoSender( self.session.auth_key, diff --git a/telethon/client/updates.py b/telethon/client/updates.py index 448e6008..6e659ec3 100644 --- a/telethon/client/updates.py +++ b/telethon/client/updates.py @@ -7,6 +7,7 @@ import time import traceback import typing import logging +import warnings from collections import deque from .. import events, utils, errors @@ -281,6 +282,24 @@ class UpdateMethods: continue + if len(self._mb_entity_cache) >= self._entity_cache_limit: + self._log[__name__].info( + 'In-memory entity cache limit reached (%s/%s), flushing to session', + len(self._mb_entity_cache), + self._entity_cache_limit + ) + self._save_states_and_entities() + self._mb_entity_cache.retain(lambda id: id == self._mb_entity_cache.self_id or id in self._message_box.map) + if len(self._mb_entity_cache) >= self._entity_cache_limit: + warnings.warn('in-memory entities exceed entity_cache_limit after flushing; consider setting a larger limit') + + self._log[__name__].info( + 'In-memory entity cache at %s/%s after flushing to session', + len(self._mb_entity_cache), + self._entity_cache_limit + ) + + get_diff = self._message_box.get_difference() if get_diff: self._log[__name__].debug('Getting difference for account updates')