mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-02-04 21:50:57 +03:00
Refactor code to fetch missing entities once again
This is another attempt at reducing CPU usage similar to:
1b6b4a57d9
In addition it simplifies some of the code and opens up new
ideas for the state cache as well.
This commit is contained in:
parent
c12c65f728
commit
22124b5ced
|
@ -282,25 +282,22 @@ class UpdateMethods(UserMethods):
|
|||
self._dispatching_updates_queue.clear()
|
||||
|
||||
async def _dispatch_update(self, update, channel_id, pts_date):
|
||||
if not self._entity_cache.ensure_cached(update):
|
||||
await self._get_difference(update, channel_id, pts_date)
|
||||
|
||||
built = EventBuilderDict(self, update)
|
||||
if self._conversations:
|
||||
for conv in self._conversations.values():
|
||||
ev = built[events.NewMessage]
|
||||
if ev:
|
||||
if not ev._load_entities():
|
||||
await ev._get_difference(channel_id, pts_date)
|
||||
conv._on_new_message(ev)
|
||||
|
||||
ev = built[events.MessageEdited]
|
||||
if ev:
|
||||
if not ev._load_entities():
|
||||
await ev._get_difference(channel_id, pts_date)
|
||||
conv._on_edit(ev)
|
||||
|
||||
ev = built[events.MessageRead]
|
||||
if ev:
|
||||
if not ev._load_entities():
|
||||
await ev._get_difference(channel_id, pts_date)
|
||||
conv._on_read(ev)
|
||||
|
||||
if conv._custom:
|
||||
|
@ -318,14 +315,6 @@ class UpdateMethods(UserMethods):
|
|||
continue
|
||||
|
||||
try:
|
||||
# Although needing to do this constantly is annoying and
|
||||
# error-prone, this part is somewhat hot, and always doing
|
||||
# `await` for `check_entities_and_get_difference` causes
|
||||
# unnecessary work. So we need to call a function that
|
||||
# doesn't cause a task switch.
|
||||
if isinstance(event, EventCommon) and not event._load_entities():
|
||||
await event._get_difference(channel_id, pts_date)
|
||||
|
||||
await callback(event)
|
||||
except errors.AlreadyInConversationError:
|
||||
name = getattr(callback, '__name__', repr(callback))
|
||||
|
@ -344,6 +333,46 @@ class UpdateMethods(UserMethods):
|
|||
self._log[__name__].exception('Unhandled exception on %s',
|
||||
name)
|
||||
|
||||
async def _get_difference(self, update, channel_id, pts_date):
|
||||
"""
|
||||
Get the difference for this `channel_id` if any, then load entities.
|
||||
|
||||
Calls :tl:`updates.getDifference`, which fills the entities cache
|
||||
(always done by `__call__`) and lets us know about the full entities.
|
||||
"""
|
||||
# Fetch since the last known pts/date before this update arrived,
|
||||
# in order to fetch this update at full, including its entities.
|
||||
self._log[__name__].debug('Getting difference for entities '
|
||||
'for %r', update.__class__)
|
||||
if channel_id:
|
||||
try:
|
||||
where = await self.get_input_entity(channel_id)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
result = await self(functions.updates.GetChannelDifferenceRequest(
|
||||
channel=where,
|
||||
filter=types.ChannelMessagesFilterEmpty(),
|
||||
pts=pts_date, # just pts
|
||||
limit=100,
|
||||
force=True
|
||||
))
|
||||
else:
|
||||
result = await self(functions.updates.GetDifferenceRequest(
|
||||
pts=pts_date[0],
|
||||
date=pts_date[1],
|
||||
qts=0
|
||||
))
|
||||
|
||||
if isinstance(result, (types.updates.Difference,
|
||||
types.updates.DifferenceSlice,
|
||||
types.updates.ChannelDifference,
|
||||
types.updates.ChannelDifferenceTooLong)):
|
||||
update._entities.update({
|
||||
utils.get_peer_id(x): x for x in
|
||||
itertools.chain(result.users, result.chats)
|
||||
})
|
||||
|
||||
async def _handle_auto_reconnect(self):
|
||||
# TODO Catch-up
|
||||
return
|
||||
|
@ -398,6 +427,7 @@ class EventBuilderDict:
|
|||
if isinstance(event, EventCommon):
|
||||
event.original_update = self.update
|
||||
event._set_client(self.client)
|
||||
event._load_entities()
|
||||
elif event:
|
||||
event._client = self.client
|
||||
|
||||
|
|
|
@ -1,7 +1,57 @@
|
|||
import itertools
|
||||
|
||||
from . import utils
|
||||
from .tl import types
|
||||
|
||||
# Which updates have the following fields?
|
||||
_has_user_id = []
|
||||
_has_chat_id = []
|
||||
_has_channel_id = []
|
||||
_has_peer = []
|
||||
_has_dialog_peer = []
|
||||
_has_message = []
|
||||
|
||||
# Note: We don't bother checking for some rare:
|
||||
# * `UpdateChatParticipantAdd.inviter_id` integer.
|
||||
# * `UpdateNotifySettings.peer` dialog peer.
|
||||
# * `UpdatePinnedDialogs.order` list of dialog peers.
|
||||
# * `UpdateReadMessagesContents.messages` list of messages.
|
||||
# * `UpdateChatParticipants.participants` list of participants.
|
||||
#
|
||||
# There are also some uninteresting `update.message` of type string.
|
||||
|
||||
|
||||
def _fill():
|
||||
for name in dir(types):
|
||||
update = getattr(types, name)
|
||||
if getattr(update, 'SUBCLASS_OF_ID', None) == 0x9f89304e:
|
||||
cid = update.CONSTRUCTOR_ID
|
||||
doc = update.__init__.__doc__ or ''
|
||||
if ':param int user_id:' in doc:
|
||||
_has_user_id.append(cid)
|
||||
if ':param int chat_id:' in doc:
|
||||
_has_chat_id.append(cid)
|
||||
if ':param int channel_id:' in doc:
|
||||
_has_channel_id.append(cid)
|
||||
if ':param TypePeer peer:' in doc:
|
||||
_has_peer.append(cid)
|
||||
if ':param TypeDialogPeer peer:' in doc:
|
||||
_has_dialog_peer.append(cid)
|
||||
if ':param TypeMessage message:' in doc:
|
||||
_has_message.append(cid)
|
||||
|
||||
# Future-proof check: if the documentation format ever changes
|
||||
# then we won't be able to pick the update types we are interested
|
||||
# in, so we must make sure we have at least an update for each field
|
||||
# which likely means we are doing it right.
|
||||
if not all((_has_user_id, _has_chat_id, _has_channel_id,
|
||||
_has_peer, _has_dialog_peer)):
|
||||
raise RuntimeError('FIXME: Did the generated docs or updates change?')
|
||||
|
||||
|
||||
# We use a function to avoid cluttering the globals (with name/update/cid/doc)
|
||||
_fill()
|
||||
|
||||
|
||||
class EntityCache:
|
||||
"""
|
||||
|
@ -46,3 +96,51 @@ class EntityCache:
|
|||
return result
|
||||
|
||||
raise KeyError('No cached entity for the given key')
|
||||
|
||||
def ensure_cached(
|
||||
self,
|
||||
update,
|
||||
has_user_id=frozenset(_has_user_id),
|
||||
has_channel_id=frozenset(_has_channel_id),
|
||||
has_peer=frozenset(_has_peer + _has_dialog_peer),
|
||||
has_message=frozenset(_has_message)
|
||||
):
|
||||
"""
|
||||
Ensures that all the relevant entities in the given update are cached.
|
||||
"""
|
||||
# This method is called pretty often and we want it to have the lowest
|
||||
# overhead possible. For that, we avoid `isinstance` and constantly
|
||||
# getting attributes out of `types.` by "caching" the constructor IDs
|
||||
# in sets inside the arguments, and using local variables.
|
||||
dct = self.__dict__
|
||||
cid = update.CONSTRUCTOR_ID
|
||||
if cid in has_user_id and \
|
||||
update.user_id not in dct:
|
||||
return False
|
||||
|
||||
if cid in _has_chat_id and \
|
||||
utils.get_peer_id(types.PeerChat(update.chat_id)) not in dct:
|
||||
return False
|
||||
|
||||
if cid in has_channel_id and \
|
||||
utils.get_peer_id(types.PeerChannel(update.channel_id)) not in dct:
|
||||
return False
|
||||
|
||||
if cid in has_peer and \
|
||||
utils.get_peer_id(update.peer) not in dct:
|
||||
return False
|
||||
|
||||
if cid in has_message:
|
||||
x = update.message
|
||||
y = getattr(x, 'to_id', None) # handle MessageEmpty
|
||||
if y and utils.get_peer_id(y) not in dct:
|
||||
return False
|
||||
|
||||
y = getattr(x, 'from_id', None)
|
||||
if y and y not in dct:
|
||||
return False
|
||||
|
||||
# We don't quite worry about entities anywhere else.
|
||||
# This is enough.
|
||||
|
||||
return True
|
||||
|
|
|
@ -175,51 +175,6 @@ class EventCommon(ChatGetter, abc.ABC):
|
|||
self._chat, self._input_chat = self._get_entity_pair(self.chat_id)
|
||||
return self._input_chat is not None
|
||||
|
||||
async def _get_difference(self, channel_id, pts_date):
|
||||
"""
|
||||
Get the difference for this `channel_id` if any, then load entities.
|
||||
|
||||
Calls :tl:`updates.getDifference`, which fills the entities cache
|
||||
(always done by `__call__`) and lets us know about the full entities.
|
||||
"""
|
||||
# Fetch since the last known pts/date before this update arrived,
|
||||
# in order to fetch this update at full, including its entities.
|
||||
self.client._log[__name__].debug('Getting difference for entities')
|
||||
if channel_id:
|
||||
try:
|
||||
where = await self.client.get_input_entity(channel_id)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
result = await self.client(functions.updates.GetChannelDifferenceRequest(
|
||||
channel=where,
|
||||
filter=types.ChannelMessagesFilterEmpty(),
|
||||
pts=pts_date, # just pts
|
||||
limit=100,
|
||||
force=True
|
||||
))
|
||||
else:
|
||||
result = await self.client(functions.updates.GetDifferenceRequest(
|
||||
pts=pts_date[0],
|
||||
date=pts_date[1],
|
||||
qts=0
|
||||
))
|
||||
|
||||
if isinstance(result, (types.updates.Difference,
|
||||
types.updates.DifferenceSlice,
|
||||
types.updates.ChannelDifference,
|
||||
types.updates.ChannelDifferenceTooLong)):
|
||||
self.original_update._entities.update({
|
||||
utils.get_peer_id(x): x for x in
|
||||
itertools.chain(result.users, result.chats)
|
||||
})
|
||||
|
||||
if not self._load_entities():
|
||||
self.client._log[__name__].info(
|
||||
'Could not find all entities for update.pts = %s',
|
||||
getattr(self.original_update, 'pts', None)
|
||||
)
|
||||
|
||||
@property
|
||||
def client(self):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue
Block a user