Allow >100 limits when getting message history (implements #290)

This commit is contained in:
Lonami Exo 2017-10-31 12:48:55 +01:00
parent 6c2363acd2
commit 3d6c8915e3

View File

@ -1,4 +1,5 @@
import os import os
import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from mimetypes import guess_type from mimetypes import guess_type
@ -48,7 +49,8 @@ from .tl.types import (
Message, MessageMediaContact, MessageMediaDocument, MessageMediaPhoto, Message, MessageMediaContact, MessageMediaDocument, MessageMediaPhoto,
InputUserSelf, UserProfilePhoto, ChatPhoto, UpdateMessageID, InputUserSelf, UserProfilePhoto, ChatPhoto, UpdateMessageID,
UpdateNewChannelMessage, UpdateNewMessage, UpdateShortSentMessage, UpdateNewChannelMessage, UpdateNewMessage, UpdateShortSentMessage,
PeerUser, InputPeerUser, InputPeerChat, InputPeerChannel) PeerUser, InputPeerUser, InputPeerChat, InputPeerChannel, MessageEmpty
)
from .tl.types.messages import DialogsSlice from .tl.types.messages import DialogsSlice
from .extensions import markdown from .extensions import markdown
@ -458,43 +460,91 @@ class TelegramClient(TelegramBareClient):
""" """
Gets the message history for the specified entity Gets the message history for the specified entity
:param entity: The entity from whom to retrieve the message history :param entity:
:param limit: Number of messages to be retrieved The entity from whom to retrieve the message history.
:param offset_date: Offset date (messages *previous* to this date will be retrieved) :param limit:
:param offset_id: Offset message ID (only messages *previous* to the given ID will be retrieved) Number of messages to be retrieved. Due to limitations with the API
:param max_id: All the messages with a higher (newer) ID or equal to this will be excluded retrieving more than 3000 messages will take longer than half a
:param min_id: All the messages with a lower (older) ID or equal to this will be excluded minute (or even more based on previous calls). The limit may also
:param add_offset: Additional message offset (all of the specified offsets + this offset = older messages) be None, which would eventually return the whole history.
:param offset_date:
Offset date (messages *previous* to this date will be retrieved).
:param offset_id:
Offset message ID (only messages *previous* to the given ID will
be retrieved).
:param max_id:
All the messages with a higher (newer) ID or equal to this will
be excluded
:param min_id:
All the messages with a lower (older) ID or equal to this will
be excluded.
:param add_offset:
Additional message offset
(all of the specified offsets + this offset = older messages).
:return: A tuple containing total message count and two more lists ([messages], [senders]). :return: A tuple containing total message count and two more lists ([messages], [senders]).
Note that the sender can be null if it was not found! Note that the sender can be null if it was not found!
""" """
limit = float('inf') if limit is None else int(limit)
total_messages = 0
messages = []
entities = {}
while len(messages) < limit:
# Telegram has a hard limit of 100
real_limit = min(limit - len(messages), 100)
result = self(GetHistoryRequest( result = self(GetHistoryRequest(
peer=self.get_input_entity(entity), peer=self.get_input_entity(entity),
limit=limit, limit=real_limit,
offset_date=offset_date, offset_date=offset_date,
offset_id=offset_id, offset_id=offset_id,
max_id=max_id, max_id=max_id,
min_id=min_id, min_id=min_id,
add_offset=add_offset add_offset=add_offset
)) ))
messages.extend(
# The result may be a messages slice (not all messages were retrieved) m for m in result.messages if not isinstance(m, MessageEmpty)
# or simply a messages TLObject. In the later case, no "count" )
# attribute is specified, so the total messages count is simply
# the count of retrieved messages
total_messages = getattr(result, 'count', len(result.messages)) total_messages = getattr(result, 'count', len(result.messages))
# Iterate over all the messages and find the sender User # TODO We can potentially use self.session.database, but since
entities = [ # it might be disabled, use a local dictionary.
utils.find_user_or_chat(m.from_id, result.users, result.chats) for u in result.users:
if m.from_id is not None else entities[utils.get_peer_id(u, add_mark=True)] = u
utils.find_user_or_chat(m.to_id, result.users, result.chats) for c in result.chats:
entities[utils.get_peer_id(c, add_mark=True)] = c
for m in result.messages if len(result.messages) < real_limit:
] break
return total_messages, result.messages, entities offset_id = result.messages[-1].id
offset_date = result.messages[-1].date
# Telegram limit seems to be 3000 messages within 30 seconds in
# batches of 100 messages each request (since the FloodWait was
# of 30 seconds). If the limit is greater than that, we will
# sleep 1s between each request.
if limit > 3000:
time.sleep(1)
# In a new list with the same length as the messages append
# their senders, so people can zip(messages, senders).
senders = []
for m in messages:
if m.from_id:
who = entities[utils.get_peer_id(m.from_id, add_mark=True)]
elif getattr(m, 'fwd_from', None):
# .from_id is optional, so this is the sanest fallback.
who = entities[utils.get_peer_id(
m.fwd_from.from_id or m.fwd_from.channel_id,
add_mark=True
)]
else:
# If there's not even a FwdHeader, fallback to the sender
# being where the message was sent.
who = entities[utils.get_peer_id(m.to_id, add_mark=True)]
senders.append(who)
return total_messages, messages, senders
def send_read_acknowledge(self, entity, messages=None, max_id=None): def send_read_acknowledge(self, entity, messages=None, max_id=None):
""" """