Telethon/telethon/update_state.py

import itertools
import logging
from datetime import datetime
from queue import Queue, Empty
from threading import RLock, Thread

from . import utils
from .tl import types as tl

__log__ = logging.getLogger(__name__)


class UpdateState:
    """
    Used to hold the current state of processed updates.
    To retrieve an update, :meth:`poll` should be called.
    """
    WORKER_POLL_TIMEOUT = 5.0  # Avoid waiting forever on the workers

    def __init__(self, workers=None):
        """
        :param workers: This integer parameter has three possible cases:
          workers is None: Updates will *not* be stored on self.
          workers = 0: Another thread is responsible for calling self.poll()
          workers > 0: 'workers' background threads will be spawned, any
                       any of them will invoke the self.handler.
        """
        self._workers = workers
        self._worker_threads = []

        self.handler = None
        self._updates_lock = RLock()
        self._updates = Queue()

        # https://core.telegram.org/api/updates
        self._state = tl.updates.State(0, 0, datetime.now(), 0, 0)

    def can_poll(self):
        """Returns True if a call to .poll() won't lock"""
        return not self._updates.empty()

    def poll(self, timeout=None):
        """
        Polls an update or blocks until an update object is available.
        If 'timeout is not None', it should be a floating point value,
        and the method will 'return None' if waiting times out.
        """
        try:
            return self._updates.get(timeout=timeout)
        except Empty:
            return None

    def get_workers(self):
        return self._workers

    def set_workers(self, n):
        """Changes the number of workers running.
           If 'n is None', clears all pending updates from memory.
        """
        if n is None:
            self.stop_workers()
        else:
            self._workers = n
            self.setup_workers()

    workers = property(fget=get_workers, fset=set_workers)

    def stop_workers(self):
        """
        Waits for all the worker threads to stop.
        """
        # Put dummy ``None`` objects so that they don't need to timeout.
        n = self._workers
        self._workers = None
        if n:
            with self._updates_lock:
                for _ in range(n):
                    self._updates.put(None)

        for t in self._worker_threads:
            t.join()

        self._worker_threads.clear()
        self._workers = n

    def setup_workers(self):
        if self._worker_threads or not self._workers:
            # There already are workers, or workers is None or 0. Do nothing.
            return

        for i in range(self._workers):
            thread = Thread(
                target=UpdateState._worker_loop,
                name='UpdateWorker{}'.format(i),
                daemon=True,
                args=(self, i)
            )
            self._worker_threads.append(thread)
            thread.start()

    def _worker_loop(self, wid):
        while self._workers is not None:
            try:
                update = self.poll(timeout=UpdateState.WORKER_POLL_TIMEOUT)
                if update and self.handler:
                    self.handler(update)
            except StopIteration:
                break
            except:
                # We don't want to crash a worker thread due to any reason
                __log__.exception('Unhandled exception on worker %d', wid)

    def process(self, update):
        """Processes an update object. This method is normally called by
           the library itself.
        """
        if self._workers is None:
            return  # No processing needs to be done if nobody's working

        with self._updates_lock:
            if isinstance(update, tl.updates.State):
                __log__.debug('Saved new updates state')
                self._state = update
                return  # Nothing else to be done

            if hasattr(update, 'pts'):
                self._state.pts = update.pts

            # After running the script for over an hour and receiving over
            # 1000 updates, the only duplicates received were users going
            # online or offline. We can trust the server until new reports.
            # This should only be used as read-only.
            if isinstance(update, tl.UpdateShort):
                update.update._entities = {}
                self._updates.put(update.update)
            # Expand "Updates" into "Update", and pass these to callbacks.
            # Since .users and .chats have already been processed, we
            # don't need to care about those either.
            elif isinstance(update, (tl.Updates, tl.UpdatesCombined)):
                entities = {utils.get_peer_id(x): x for x in
                            itertools.chain(update.users, update.chats)}
                for u in update.updates:
                    u._entities = entities
                    self._updates.put(u)
            # TODO Handle "tl.UpdatesTooLong"
            else:
                update._entities = {}
                self._updates.put(update)
Add known entities to all updates and use them in the events This should reduce the amount of API calls made when getting the full sender/chat on events (mostly on channels, where Telegram seems to always send Updates instead only a normal Update). 2018-03-14 12:28:21 +03:00			`import itertools`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`import logging`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00			`from datetime import datetime`
Add known entities to all updates and use them in the events This should reduce the amount of API calls made when getting the full sender/chat on events (mostly on channels, where Telegram seems to always send Updates instead only a normal Update). 2018-03-14 12:28:21 +03:00			`from queue import Queue, Empty`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`from threading import RLock, Thread`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00
Add known entities to all updates and use them in the events This should reduce the amount of API calls made when getting the full sender/chat on events (mostly on channels, where Telegram seems to always send Updates instead only a normal Update). 2018-03-14 12:28:21 +03:00			`from . import utils`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00			`from .tl import types as tl`
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00
Make a proper use of the logging module 2017-12-20 14:47:10 +03:00			`__log__ = logging.getLogger(__name__)`

Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00
			`class UpdateState:`
Add a custom role for TL references and make use of it 2018-03-23 23:40:24 +03:00			`"""`
			`Used to hold the current state of processed updates.`
			To retrieve an update, :meth:`poll` should be called.
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00			`"""`
Use a timeout when worker threads are polling 2017-09-30 12:21:07 +03:00			`WORKER_POLL_TIMEOUT = 5.0 # Avoid waiting forever on the workers`

Rename process_updates/polling to workers 2017-09-30 12:17:31 +03:00			`def __init__(self, workers=None):`
			`"""`
			`:param workers: This integer parameter has three possible cases:`
			`workers is None: Updates will not be stored on self.`
			`workers = 0: Another thread is responsible for calling self.poll()`
			`workers > 0: 'workers' background threads will be spawned, any`
Add add_event_handler and deprecate add_update_handler 2018-02-18 15:29:05 +03:00			`any of them will invoke the self.handler.`
Rename process_updates/polling to workers 2017-09-30 12:17:31 +03:00			`"""`
			`self._workers = workers`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`self._worker_threads = []`

Add add_event_handler and deprecate add_update_handler 2018-02-18 15:29:05 +03:00			`self.handler = None`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00			`self._updates_lock = RLock()`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`self._updates = Queue()`
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00			`# https://core.telegram.org/api/updates`
			`self._state = tl.updates.State(0, 0, datetime.now(), 0, 0)`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00
Simplify the workflow with UpdateState exposing a single flag param 2017-09-08 13:54:38 +03:00			`def can_poll(self):`
			`"""Returns True if a call to .poll() won't lock"""`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`return not self._updates.empty()`
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00
Use a timeout when worker threads are polling 2017-09-30 12:21:07 +03:00			`def poll(self, timeout=None):`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`"""`
			`Polls an update or blocks until an update object is available.`
			`If 'timeout is not None', it should be a floating point value,`
			`and the method will 'return None' if waiting times out.`
Use a timeout when worker threads are polling 2017-09-30 12:21:07 +03:00			`"""`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`try:`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`return self._updates.get(timeout=timeout)`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`except Empty:`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`return None`
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`def get_workers(self):`
			`return self._workers`

			`def set_workers(self, n):`
Rename process_updates/polling to workers 2017-09-30 12:17:31 +03:00			`"""Changes the number of workers running.`
			`If 'n is None', clears all pending updates from memory.`
			`"""`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`if n is None:`
			`self.stop_workers()`
			`else:`
			`self._workers = n`
Fix workers not stopping on .disconnect(), start them on login 2017-10-01 20:56:24 +03:00			`self.setup_workers()`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00
			`workers = property(fget=get_workers, fset=set_workers)`

Fix workers not stopping on .disconnect(), start them on login 2017-10-01 20:56:24 +03:00			`def stop_workers(self):`
Better attempt at joining update worker threads 2018-03-01 15:31:39 +03:00			`"""`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`Waits for all the worker threads to stop.`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`"""`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			# Put dummy ``None`` objects so that they don't need to timeout.
			`n = self._workers`
			`self._workers = None`
Fix n might be None when stopping workers 2018-03-02 12:10:59 +03:00			`if n:`
			`with self._updates_lock:`
			`for _ in range(n):`
			`self._updates.put(None)`
Remove UpdateState .set and .check error 2017-09-30 19:39:31 +03:00
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`for t in self._worker_threads:`
			`t.join()`

			`self._worker_threads.clear()`
Stopping workers should not clear their count (may fix #686) 2018-03-15 12:29:12 +03:00			`self._workers = n`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00
Fix workers not stopping on .disconnect(), start them on login 2017-10-01 20:56:24 +03:00			`def setup_workers(self):`
Rename process_updates/polling to workers 2017-09-30 12:17:31 +03:00			`if self._worker_threads or not self._workers:`
			`# There already are workers, or workers is None or 0. Do nothing.`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`return`

			`for i in range(self._workers):`
			`thread = Thread(`
			`target=UpdateState._worker_loop,`
			`name='UpdateWorker{}'.format(i),`
			`daemon=True,`
			`args=(self, i)`
			`)`
			`self._worker_threads.append(thread)`
			`thread.start()`

			`def _worker_loop(self, wid):`
Fix-up 771c573 to properly stop background update workers The "special" StopIteration object didn't actually make any sense. Instead looping forever, workers now loop while there are workers, so that they stop looping once the count is cleared. Dummy values are still inserted so that they don't need to timeout on the queue before exiting (these values are None) so in essence, this keeps the best of both of worlds. 2018-03-01 22:13:21 +03:00			`while self._workers is not None:`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`try:`
Use a timeout when worker threads are polling 2017-09-30 12:21:07 +03:00			`update = self.poll(timeout=UpdateState.WORKER_POLL_TIMEOUT)`
Add add_event_handler and deprecate add_update_handler 2018-02-18 15:29:05 +03:00			`if update and self.handler:`
			`self.handler(update)`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`except StopIteration:`
			`break`
Fix exception when logging exceptions 2017-10-25 14:04:12 +03:00			`except:`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00			`# We don't want to crash a worker thread due to any reason`
Make a proper use of the logging module 2017-12-20 14:47:10 +03:00			`__log__.exception('Unhandled exception on worker %d', wid)`
Spawn new worker threads to handle updates instead using ReadThread 2017-09-30 11:12:01 +03:00
Create and use UpdateState to .process() unhandled TLObjects 2017-09-07 19:49:08 +03:00			`def process(self, update):`
			`"""Processes an update object. This method is normally called by`
			`the library itself.`
			`"""`
Rename process_updates/polling to workers 2017-09-30 12:17:31 +03:00			`if self._workers is None:`
			`return # No processing needs to be done if nobody's working`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00
			`with self._updates_lock:`
			`if isinstance(update, tl.updates.State):`
Make a proper use of the logging module 2017-12-20 14:47:10 +03:00			`__log__.debug('Saved new updates state')`
Add basic updates processing to ignore updates with lower .pts 2017-09-07 21:17:40 +03:00			`self._state = update`
Fix UpdateState calling handlers with updates with lower pts 2017-09-19 14:17:40 +03:00			`return # Nothing else to be done`
Allow adding callback methods to UpdateState 2017-09-07 19:58:54 +03:00
Trust the server will not send duplicates This change was also suggested by the test on the previous commit. 2018-02-03 17:42:43 +03:00			`if hasattr(update, 'pts'):`
			`self._state.pts = update.pts`
Temporary fix for abusive duplicated updates (closes #336) 2017-10-14 12:37:47 +03:00
Trust the server will not send duplicates This change was also suggested by the test on the previous commit. 2018-02-03 17:42:43 +03:00			`# After running the script for over an hour and receiving over`
			`# 1000 updates, the only duplicates received were users going`
			`# online or offline. We can trust the server until new reports.`
Add known entities to all updates and use them in the events This should reduce the amount of API calls made when getting the full sender/chat on events (mostly on channels, where Telegram seems to always send Updates instead only a normal Update). 2018-03-14 12:28:21 +03:00			`# This should only be used as read-only.`
Remove redundant checks from UpdateState 2017-11-30 22:40:35 +03:00			`if isinstance(update, tl.UpdateShort):`
Rename .entities -> ._entities from 7e9d19d to avoid collision 2018-03-29 01:56:05 +03:00			`update.update._entities = {}`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`self._updates.put(update.update)`
Remove redundant checks from UpdateState 2017-11-30 22:40:35 +03:00			`# Expand "Updates" into "Update", and pass these to callbacks.`
			`# Since .users and .chats have already been processed, we`
			`# don't need to care about those either.`
			`elif isinstance(update, (tl.Updates, tl.UpdatesCombined)):`
Add known entities to all updates and use them in the events This should reduce the amount of API calls made when getting the full sender/chat on events (mostly on channels, where Telegram seems to always send Updates instead only a normal Update). 2018-03-14 12:28:21 +03:00			`entities = {utils.get_peer_id(x): x for x in`
			`itertools.chain(update.users, update.chats)}`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`for u in update.updates:`
Rename .entities -> ._entities from 7e9d19d to avoid collision 2018-03-29 01:56:05 +03:00			`u._entities = entities`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`self._updates.put(u)`
Remove another redundant if 2017-11-30 23:10:02 +03:00			`# TODO Handle "tl.UpdatesTooLong"`
Expand Updates into Update objects before calling handlers 2017-10-01 17:30:27 +03:00			`else:`
Rename .entities -> ._entities from 7e9d19d to avoid collision 2018-03-29 01:56:05 +03:00			`update._entities = {}`
Use a synchronized queue instead event/deque pair 2017-11-30 23:09:34 +03:00			`self._updates.put(update)`