diff --git a/bots/poller/README.md b/bots/poller/README.md new file mode 100644 index 0000000..f2de136 --- /dev/null +++ b/bots/poller/README.md @@ -0,0 +1,34 @@ +# poller + +script to load new media from telegram chats + +### Installation +get api_id and api_hash at https://my.telegram.org/apps +```shell +$ pip install -r requirements.txt +$ export api_id=123 +$ export api_hash=abcdef.... +``` + +### Configuration +modify poller.yaml to change chats +```yaml +channels: + usernames: + - https://t.me/sanspie_notes + - @s4nspie + ids: + - 868474142 + +folders: + - Users +``` + +### Run +```shell +$ python3 poller.py +``` + +files will be downloaded to folders + +###### Note: if you want to redownload files delete poller/.offsets.json \ No newline at end of file diff --git a/bots/poller/poller.py b/bots/poller/poller.py new file mode 100644 index 0000000..74a0374 --- /dev/null +++ b/bots/poller/poller.py @@ -0,0 +1,209 @@ +import os +import json +import yaml + +from telethon import TelegramClient +from telethon.tl import functions +from telethon.tl.types import MessageMediaDocument, MessageMediaPhoto, User + +if os.getenv("api_id") is None: + raise ValueError("please set api_id env variable") + +if os.getenv("api_hash") is None: + raise ValueError("please set api_hash env variable") + +api_id = os.getenv("api_id") +api_hash = os.getenv("api_hash") + +if not os.path.isdir("poller"): + os.mkdir("poller") + +if not os.path.isfile("poller.yaml"): + raise FileNotFoundError("Please create poller.yaml") + +with open("poller.yaml", "r") as stream: + try: + config = yaml.safe_load(stream) + except yaml.YAMLError as exc: + print(exc) + +# load offsets +offsets = {} +if not os.path.isfile("poller/.offsets.json"): + f = open("poller/.offsets.json", "x") + f.write("{}") + f.close() +else: + with open("poller/.offsets.json") as f: + offsets = json.load(f) + + +# parse and check config +folders = [] +channels = [] +if "folders" in config: + if config["folders"]: + folders = config["folders"] + if type(folders) is not list: + raise TypeError("Folders should be a list(start with - on a new line)") + if not (all([type(x) is str for x in folders])): + raise TypeError("Folders should be specified by name") +if "channels" in config: + if "ids" in config["channels"]: + ids = config["channels"]["ids"] + if type(ids) is not list: + raise TypeError("Ids should be a list(start with - on a new line)") + if not (all([type(x) is int for x in ids])): + raise TypeError("Ids should be integers") + channels += list(map(str, ids)) + if "usernames" in config["channels"]: + usernames = config["channels"]["usernames"] + if type(usernames) is not list: + raise TypeError("Usernames should be a list(start with - on a new line)") + if not (all([type(x) is str for x in usernames])): + raise TypeError("Channel's ids should be string") + channels += [x.replace("@", "") for x in usernames] + + +async def aenumerate(asequence, start=0): + """Asynchronously enumerate an async iterator from a given start value""" + n = start + async for elem in asequence: + yield n, elem + n += 1 + + +async def progress_bar( + iterable, + total, + prefix="", + suffix="", + decimals=1, + length=100, + fill="█", + print_end="\r", +): + # Progress Bar Printing Function + def print_progress_bar(iteration): + percent = ("{0:." + str(decimals) + "f}").format( + 100 * (iteration / float(total)) + ) + filledLength = int(length * iteration // total) + bar = fill * filledLength + "-" * (length - filledLength) + print( + f"\r{prefix} |{bar}| {percent}% ({iteration+1}/{total}) {suffix}", + end=print_end, + ) + + print_progress_bar(0) + async for i, item in aenumerate(iterable): + yield item + print_progress_bar(item.id) + + +async def download_channel(client, id): + id = str(id) + min_id = 0 + max_id = 0 + if id in offsets: + min_id = offsets[id] + entity = await client.get_entity(int(id)) + t = type(entity) + + if t is User: + title = entity.username + else: + title = entity.title + + async for message in client.iter_messages(entity): + max_id = message.id + break + + if max_id == min_id: + print(" " * 4 + f"done {title}") + return + if not os.path.isdir(f"poller/{title}"): + os.mkdir(f"poller/{title}") + if not os.path.isdir(f"poller/{title}/photos/"): + os.mkdir(f"poller/{title}/photos/") + if not os.path.isdir(f"poller/{title}/videos/"): + os.mkdir(f"poller/{title}/videos/") + if not os.path.isdir(f"poller/{title}/other/"): + os.mkdir(f"poller/{title}/other/") + print(" " * 4 + f"downloading {title}", end="\r") + async for message in progress_bar( + client.iter_messages(entity, reverse=True, min_id=min_id), + max_id, + " " * 4 + f"downloading {title}:", + ): + if message.media: + if type(message.media) == MessageMediaPhoto: + await message.download_media(file=f"poller/{title}/photos/") + elif type(message.media) == MessageMediaDocument: + if message.media.document.mime_type: + mime_type = message.media.document.mime_type + if mime_type.startswith("image"): + await message.download_media(file=f"poller/{title}/photos/") + elif mime_type.startswith("video"): + await message.download_media(file=f"poller/{title}/videos/") + else: + await message.download_media(file=f"poller/{title}/other/") + else: + await message.download_media(file=f"poller/{title}/other/") + + offsets[id] = message.id + + if message.id % 10 == 0: + with open("poller/.offsets.json", "w") as f: + json.dump(offsets, f, indent=4) + print(" " * 4 + f"done {title}") + with open("poller/.offsets.json", "w") as f: + json.dump(offsets, f, indent=4) + + +async def run(client): + err = False + + if channels: + for c in channels: + try: + entity = await client.get_entity(c) + await download_channel(client, entity.id) + except ValueError: + err = True + print(f"Chat {c} not found") + if err: + raise ValueError( + "please check if channel's names or ids in config are correct" + ) + + if folders: + user_folders = {} + req = await client(functions.messages.GetDialogFiltersRequest()) + for folder in req: + d = folder.to_dict() + if "title" in d: + if d["title"] in folders: + user_folders[d["title"]] = [] + for el in d["include_peers"]: + id = 0 + for name, val in el.items(): + if "id" in name: + id = str(val) + user_folders[d["title"]].append({"_": el["_"], "id": id}) + + for el in folders: + if el not in user_folders: + err = True + print(f"folder {el} not found") + if err: + raise ValueError("please check if folder's names in config are correct") + for folder, included_chats in user_folders.items(): + print(f"downloading folder: {folder}") + for el in included_chats: + id = el["id"] + await download_channel(client, id) + + +with TelegramClient("anon", api_id, api_hash) as client: + client.loop.run_until_complete(run(client)) diff --git a/bots/poller/poller.yaml b/bots/poller/poller.yaml new file mode 100644 index 0000000..15de5dd --- /dev/null +++ b/bots/poller/poller.yaml @@ -0,0 +1,9 @@ +channels: + usernames: + - https://t.me/sanspie_notes + - @s4nspie + ids: + - 868474142 + +folders: + - Users \ No newline at end of file diff --git a/bots/poller/requirements.txt b/bots/poller/requirements.txt new file mode 100644 index 0000000..ce3f09a --- /dev/null +++ b/bots/poller/requirements.txt @@ -0,0 +1,2 @@ +telethon +cryptg \ No newline at end of file