Create a new RequestIter ABC to deal with iter methods

This should make it easier to maintain these methods, increase reusability, and get rid of the async_generator dependency. In the future, people could use this to more easily deal with raw API themselves.
2025-09-17 17:32:40 +03:00 · 2019-02-26 20:26:40 +01:00 · 2019-02-26 20:26:40 +01:00 · 36eb1b1009
commit 36eb1b1009
parent 1e4a12d2f7
2 changed files with 257 additions and 4 deletions
--- a/telethon/client/messages.py
+++ b/telethon/client/messages.py
@ -2,13 +2,152 @@ import asyncio
 import itertools
 import time

-from async_generator import async_generator, yield_
-
 from .messageparse import MessageParseMethods
 from .uploads import UploadMethods
 from .buttons import ButtonMethods
 from .. import helpers, utils, errors
 from ..tl import types, functions
+from ..requestiter import RequestIter
+
+
+class _GetHistoryIter(RequestIter):
+    async def _init(self, entity, offset_id, min_id, max_id, from_user, batch_size, offset_date, add_offset):
+        self.entity = await self.client.get_input_entity(entity)
+
+        # Telegram doesn't like min_id/max_id. If these IDs are low enough
+        # (starting from last_id - 100), the request will return nothing.
+        #
+        # We can emulate their behaviour locally by setting offset = max_id
+        # and simply stopping once we hit a message with ID <= min_id.
+        if self.reverse:
+            offset_id = max(offset_id, min_id)
+            if offset_id and max_id:
+                if max_id - offset_id <= 1:
+                    raise StopAsyncIteration
+
+            if not max_id:
+                max_id = float('inf')
+        else:
+            offset_id = max(offset_id, max_id)
+            if offset_id and min_id:
+                if offset_id - min_id <= 1:
+                    raise StopAsyncIteration
+
+        if self.reverse:
+            if offset_id:
+                offset_id += 1
+            else:
+                offset_id = 1
+
+        if from_user:
+            from_user = await self.client.get_input_entity(from_user)
+            if not isinstance(from_user, (
+                    types.InputPeerUser, types.InputPeerSelf)):
+                from_user = None  # Ignore from_user unless it's a user
+
+        self.from_id = (await self.client.get_peer_id(from_user)) if from_user else None
+
+        self.request = functions.messages.GetHistoryRequest(
+            peer=entity,
+            limit=1,
+            offset_date=offset_date,
+            offset_id=offset_id,
+            min_id=0,
+            max_id=0,
+            add_offset=add_offset,
+            hash=0
+        )
+
+        if self.limit == 0:
+            # No messages, but we still need to know the total message count
+            result = await self.client(self.request)
+            if isinstance(result, types.messages.MessagesNotModified):
+                self.total = result.count
+            else:
+                self.total = getattr(result, 'count', len(result.messages))
+            raise StopAsyncIteration
+
+        # When going in reverse we need an offset of `-limit`, but we
+        # also want to respect what the user passed, so add them together.
+        if self.reverse:
+            self.request.add_offset -= batch_size
+
+        if self.wait_time is None:
+            self.wait_time = 1 if self.limit > 3000 else 0
+
+        # Telegram has a hard limit of 100.
+        # We don't need to fetch 100 if the limit is less.
+        self.batch_size = min(max(batch_size, 1), min(100, self.limit))
+        self.add_offset = add_offset
+        self.max_id = max_id
+        self.min_id = min_id
+        self.last_id = 0 if self.reverse else float('inf')
+
+    async def _load_next_chunk(self):
+        result = []
+
+        self.request.limit = min(self.left, self.batch_size)
+        if self.reverse and self.request.limit != self.batch_size:
+            # Remember that we need -limit when going in reverse
+            self.request.add_offset = self.add_offset - self.request.limit
+
+        r = await self.client(self.request)
+        self.total = getattr(r, 'count', len(r.messages))
+
+        entities = {utils.get_peer_id(x): x
+                    for x in itertools.chain(r.users, r.chats)}
+
+        messages = reversed(r.messages) if self.reverse else r.messages
+        for message in messages:
+            if (isinstance(message, types.MessageEmpty)
+                    or self.from_id and message.from_id != self.from_id):
+                continue
+
+            # TODO We used to yield and return here (stopping the iterator)
+            # How should we go around that here?
+            if self.reverse:
+                if message.id <= self.last_id or message.id >= self.max_id:
+                    break
+            else:
+                if message.id >= self.last_id or message.id <= self.min_id:
+                    break
+
+            # There has been reports that on bad connections this method
+            # was returning duplicated IDs sometimes. Using ``last_id``
+            # is an attempt to avoid these duplicates, since the message
+            # IDs are returned in descending order (or asc if reverse).
+            self.last_id = message.id
+            message._finish_init(self.client, entities, self.entity)
+            result.append(message)
+
+        if len(r.messages) < self.request.limit:
+            return result
+
+        # Find the first message that's not empty (in some rare cases
+        # it can happen that the last message is :tl:`MessageEmpty`)
+        last_message = None
+        messages = r.messages if self.reverse else reversed(r.messages)
+        for m in messages:
+            if not isinstance(m, types.MessageEmpty):
+                last_message = m
+                break
+
+        # TODO If it's None, we used to break (ending the iterator)
+        # Similar case as the return above.
+        if last_message is not None:
+            # There are some cases where all the messages we get start
+            # being empty. This can happen on migrated mega-groups if
+            # the history was cleared, and we're using search. Telegram
+            # acts incredibly weird sometimes. Messages are returned but
+            # only "empty", not their contents. If this is the case we
+            # should just give up since there won't be any new Message.
+            self.request.offset_id = last_message.id
+            self.request.offset_date = last_message.date
+            if self.reverse:
+                # We want to skip the one we already have
+                self.request.offset_id += 1
+
+        return result


 class MessageMethods(UploadMethods, ButtonMethods, MessageParseMethods):
@ -17,7 +156,6 @@ class MessageMethods(UploadMethods, ButtonMethods, MessageParseMethods):

    # region Message retrieval

-    @async_generator
    async def iter_messages(
            self, entity, limit=None, *, offset_date=None, offset_id=0,
            max_id=0, min_id=0, add_offset=0, search=None, filter=None,
@ -133,6 +271,23 @@ class MessageMethods(UploadMethods, ButtonMethods, MessageParseMethods):
            an higher limit, so you're free to set the ``batch_size`` that
            you think may be good.
        """
+        # TODO Handle global search
+        # TODO Handle search
+        # TODO Handle yield IDs
+        return _GetHistoryIter(
+            self,
+            limit=limit,
+            wait_time=wait_time,
+            entity=entity,
+            reverse=reverse,
+            offset_id=offset_id,
+            min_id=min_id,
+            max_id=max_id,
+            from_user=from_user,
+            batch_size=batch_size,
+            offset_date=offset_date,
+            add_offset=add_offset
+        )
        # Note that entity being ``None`` is intended to get messages by
        # ID under no specific chat, and also to request a global search.
        if entity:
@ -802,7 +957,6 @@ class MessageMethods(UploadMethods, ButtonMethods, MessageParseMethods):

    # region Private methods

-    @async_generator
    async def _iter_ids(self, entity, ids, total):
        """
        Special case for `iter_messages` when it should only fetch some IDs.
--- a/telethon/requestiter.py
+++ b/telethon/requestiter.py
@ -0,0 +1,99 @@
+import abc
+import asyncio
+import time
+
+
+class RequestIter(abc.ABC):
+    """
+    Helper class to deal with requests that need offsets to iterate.
+
+    It has some facilities, such as automatically sleeping a desired
+    amount of time between requests if needed (but not more).
+
+    Can be used synchronously if the event loop is not running and
+    as an asynchronous iterator otherwise.
+
+    `limit` is the total amount of items that the iterator should return.
+    This is handled on this base class, and will be always ``>= 0``.
+
+    `left` will be reset every time the iterator is used and will indicate
+    the amount of items that should be emitted left, so that subclasses can
+    be more efficient and fetch only as many items as they need.
+
+    Iterators may be used with ``reversed``, and their `reverse` flag will
+    be set to ``True`` if that's the case. Note that if this flag is set,
+    `buffer` should be filled in reverse too.
+    """
+    def __init__(self, client, limit, *, reverse=False, wait_time=None, **kwargs):
+        self.client = client
+        self.reverse = reverse
+        self.wait_time = wait_time
+        self.kwargs = kwargs
+        self.limit = max(float('inf') if limit is None else limit, 0)
+        self.left = None
+        self.buffer = None
+        self.index = None
+        self.total = None
+        self.last_load = None
+
+    async def _init(self, **kwargs):
+        """
+        Called when asynchronous initialization is necessary. All keyword
+        arguments passed to `__init__` will be forwarded here, and it's
+        preferable to use named arguments in the subclasses without defaults
+        to avoid forgetting or misspelling any of them.
+
+        This method may ``raise StopAsyncIteration`` if it cannot continue.
+        """
+
+    async def __anext__(self):
+        if self.buffer is ():
+            await self._init(**self.kwargs)
+
+        if self.index == len(self.buffer):
+            # asyncio will handle times <= 0 to sleep 0 seconds
+            if self.wait_time:
+                await asyncio.sleep(
+                    self.wait_time - (time.time() - self.last_load),
+                    loop=self.client.loop
+                )
+                self.last_load = time.time()
+
+            self.index = 0
+            self.buffer = await self._load_next_chunk()
+
+        if not self.buffer:
+            raise StopAsyncIteration
+
+        result = self.buffer[self.index]
+        self.left -= 1
+        self.index += 1
+        return result
+
+    def __aiter__(self):
+        self.buffer = ()
+        self.index = 0
+        self.last_load = 0
+        self.left = self.limit
+        return self
+
+    def __iter__(self):
+        if self.client.loop.is_running():
+            raise RuntimeError(
+                'You must use "async for" if the event loop '
+                'is running (i.e. you are inside an "async def")'
+            )
+
+        raise NotImplementedError('lol!')
+
+    @abc.abstractmethod
+    async def _load_next_chunk(self):
+        """
+        Called when the next chunk is necessary.
+        It should *always* return a `list`.
+        """
+        raise NotImplementedError
+
+    def __reversed__(self):
+        self.reverse = not self.reverse
+        return self  # __aiter__ will be called after, too