mirror of
https://github.com/Alexander-D-Karpov/scripts.git
synced 2024-11-22 03:56:32 +03:00
added poller
This commit is contained in:
parent
46755b3f9f
commit
b23d0b3f3f
34
bots/poller/README.md
Normal file
34
bots/poller/README.md
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# poller
|
||||||
|
|
||||||
|
script to load new media from telegram chats
|
||||||
|
|
||||||
|
### Installation
|
||||||
|
get api_id and api_hash at https://my.telegram.org/apps
|
||||||
|
```shell
|
||||||
|
$ pip install -r requirements.txt
|
||||||
|
$ export api_id=123
|
||||||
|
$ export api_hash=abcdef....
|
||||||
|
```
|
||||||
|
|
||||||
|
### Configuration
|
||||||
|
modify poller.yaml to change chats
|
||||||
|
```yaml
|
||||||
|
channels:
|
||||||
|
usernames:
|
||||||
|
- https://t.me/sanspie_notes
|
||||||
|
- @s4nspie
|
||||||
|
ids:
|
||||||
|
- 868474142
|
||||||
|
|
||||||
|
folders:
|
||||||
|
- Users
|
||||||
|
```
|
||||||
|
|
||||||
|
### Run
|
||||||
|
```shell
|
||||||
|
$ python3 poller.py
|
||||||
|
```
|
||||||
|
|
||||||
|
files will be downloaded to folders
|
||||||
|
|
||||||
|
###### Note: if you want to redownload files delete poller/.offsets.json
|
209
bots/poller/poller.py
Normal file
209
bots/poller/poller.py
Normal file
|
@ -0,0 +1,209 @@
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from telethon import TelegramClient
|
||||||
|
from telethon.tl import functions
|
||||||
|
from telethon.tl.types import MessageMediaDocument, MessageMediaPhoto, User
|
||||||
|
|
||||||
|
if os.getenv("api_id") is None:
|
||||||
|
raise ValueError("please set api_id env variable")
|
||||||
|
|
||||||
|
if os.getenv("api_hash") is None:
|
||||||
|
raise ValueError("please set api_hash env variable")
|
||||||
|
|
||||||
|
api_id = os.getenv("api_id")
|
||||||
|
api_hash = os.getenv("api_hash")
|
||||||
|
|
||||||
|
if not os.path.isdir("poller"):
|
||||||
|
os.mkdir("poller")
|
||||||
|
|
||||||
|
if not os.path.isfile("poller.yaml"):
|
||||||
|
raise FileNotFoundError("Please create poller.yaml")
|
||||||
|
|
||||||
|
with open("poller.yaml", "r") as stream:
|
||||||
|
try:
|
||||||
|
config = yaml.safe_load(stream)
|
||||||
|
except yaml.YAMLError as exc:
|
||||||
|
print(exc)
|
||||||
|
|
||||||
|
# load offsets
|
||||||
|
offsets = {}
|
||||||
|
if not os.path.isfile("poller/.offsets.json"):
|
||||||
|
f = open("poller/.offsets.json", "x")
|
||||||
|
f.write("{}")
|
||||||
|
f.close()
|
||||||
|
else:
|
||||||
|
with open("poller/.offsets.json") as f:
|
||||||
|
offsets = json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
# parse and check config
|
||||||
|
folders = []
|
||||||
|
channels = []
|
||||||
|
if "folders" in config:
|
||||||
|
if config["folders"]:
|
||||||
|
folders = config["folders"]
|
||||||
|
if type(folders) is not list:
|
||||||
|
raise TypeError("Folders should be a list(start with - on a new line)")
|
||||||
|
if not (all([type(x) is str for x in folders])):
|
||||||
|
raise TypeError("Folders should be specified by name")
|
||||||
|
if "channels" in config:
|
||||||
|
if "ids" in config["channels"]:
|
||||||
|
ids = config["channels"]["ids"]
|
||||||
|
if type(ids) is not list:
|
||||||
|
raise TypeError("Ids should be a list(start with - on a new line)")
|
||||||
|
if not (all([type(x) is int for x in ids])):
|
||||||
|
raise TypeError("Ids should be integers")
|
||||||
|
channels += list(map(str, ids))
|
||||||
|
if "usernames" in config["channels"]:
|
||||||
|
usernames = config["channels"]["usernames"]
|
||||||
|
if type(usernames) is not list:
|
||||||
|
raise TypeError("Usernames should be a list(start with - on a new line)")
|
||||||
|
if not (all([type(x) is str for x in usernames])):
|
||||||
|
raise TypeError("Channel's ids should be string")
|
||||||
|
channels += [x.replace("@", "") for x in usernames]
|
||||||
|
|
||||||
|
|
||||||
|
async def aenumerate(asequence, start=0):
|
||||||
|
"""Asynchronously enumerate an async iterator from a given start value"""
|
||||||
|
n = start
|
||||||
|
async for elem in asequence:
|
||||||
|
yield n, elem
|
||||||
|
n += 1
|
||||||
|
|
||||||
|
|
||||||
|
async def progress_bar(
|
||||||
|
iterable,
|
||||||
|
total,
|
||||||
|
prefix="",
|
||||||
|
suffix="",
|
||||||
|
decimals=1,
|
||||||
|
length=100,
|
||||||
|
fill="█",
|
||||||
|
print_end="\r",
|
||||||
|
):
|
||||||
|
# Progress Bar Printing Function
|
||||||
|
def print_progress_bar(iteration):
|
||||||
|
percent = ("{0:." + str(decimals) + "f}").format(
|
||||||
|
100 * (iteration / float(total))
|
||||||
|
)
|
||||||
|
filledLength = int(length * iteration // total)
|
||||||
|
bar = fill * filledLength + "-" * (length - filledLength)
|
||||||
|
print(
|
||||||
|
f"\r{prefix} |{bar}| {percent}% ({iteration+1}/{total}) {suffix}",
|
||||||
|
end=print_end,
|
||||||
|
)
|
||||||
|
|
||||||
|
print_progress_bar(0)
|
||||||
|
async for i, item in aenumerate(iterable):
|
||||||
|
yield item
|
||||||
|
print_progress_bar(item.id)
|
||||||
|
|
||||||
|
|
||||||
|
async def download_channel(client, id):
|
||||||
|
id = str(id)
|
||||||
|
min_id = 0
|
||||||
|
max_id = 0
|
||||||
|
if id in offsets:
|
||||||
|
min_id = offsets[id]
|
||||||
|
entity = await client.get_entity(int(id))
|
||||||
|
t = type(entity)
|
||||||
|
|
||||||
|
if t is User:
|
||||||
|
title = entity.username
|
||||||
|
else:
|
||||||
|
title = entity.title
|
||||||
|
|
||||||
|
async for message in client.iter_messages(entity):
|
||||||
|
max_id = message.id
|
||||||
|
break
|
||||||
|
|
||||||
|
if max_id == min_id:
|
||||||
|
print(" " * 4 + f"done {title}")
|
||||||
|
return
|
||||||
|
if not os.path.isdir(f"poller/{title}"):
|
||||||
|
os.mkdir(f"poller/{title}")
|
||||||
|
if not os.path.isdir(f"poller/{title}/photos/"):
|
||||||
|
os.mkdir(f"poller/{title}/photos/")
|
||||||
|
if not os.path.isdir(f"poller/{title}/videos/"):
|
||||||
|
os.mkdir(f"poller/{title}/videos/")
|
||||||
|
if not os.path.isdir(f"poller/{title}/other/"):
|
||||||
|
os.mkdir(f"poller/{title}/other/")
|
||||||
|
print(" " * 4 + f"downloading {title}", end="\r")
|
||||||
|
async for message in progress_bar(
|
||||||
|
client.iter_messages(entity, reverse=True, min_id=min_id),
|
||||||
|
max_id,
|
||||||
|
" " * 4 + f"downloading {title}:",
|
||||||
|
):
|
||||||
|
if message.media:
|
||||||
|
if type(message.media) == MessageMediaPhoto:
|
||||||
|
await message.download_media(file=f"poller/{title}/photos/")
|
||||||
|
elif type(message.media) == MessageMediaDocument:
|
||||||
|
if message.media.document.mime_type:
|
||||||
|
mime_type = message.media.document.mime_type
|
||||||
|
if mime_type.startswith("image"):
|
||||||
|
await message.download_media(file=f"poller/{title}/photos/")
|
||||||
|
elif mime_type.startswith("video"):
|
||||||
|
await message.download_media(file=f"poller/{title}/videos/")
|
||||||
|
else:
|
||||||
|
await message.download_media(file=f"poller/{title}/other/")
|
||||||
|
else:
|
||||||
|
await message.download_media(file=f"poller/{title}/other/")
|
||||||
|
|
||||||
|
offsets[id] = message.id
|
||||||
|
|
||||||
|
if message.id % 10 == 0:
|
||||||
|
with open("poller/.offsets.json", "w") as f:
|
||||||
|
json.dump(offsets, f, indent=4)
|
||||||
|
print(" " * 4 + f"done {title}")
|
||||||
|
with open("poller/.offsets.json", "w") as f:
|
||||||
|
json.dump(offsets, f, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
async def run(client):
|
||||||
|
err = False
|
||||||
|
|
||||||
|
if channels:
|
||||||
|
for c in channels:
|
||||||
|
try:
|
||||||
|
entity = await client.get_entity(c)
|
||||||
|
await download_channel(client, entity.id)
|
||||||
|
except ValueError:
|
||||||
|
err = True
|
||||||
|
print(f"Chat {c} not found")
|
||||||
|
if err:
|
||||||
|
raise ValueError(
|
||||||
|
"please check if channel's names or ids in config are correct"
|
||||||
|
)
|
||||||
|
|
||||||
|
if folders:
|
||||||
|
user_folders = {}
|
||||||
|
req = await client(functions.messages.GetDialogFiltersRequest())
|
||||||
|
for folder in req:
|
||||||
|
d = folder.to_dict()
|
||||||
|
if "title" in d:
|
||||||
|
if d["title"] in folders:
|
||||||
|
user_folders[d["title"]] = []
|
||||||
|
for el in d["include_peers"]:
|
||||||
|
id = 0
|
||||||
|
for name, val in el.items():
|
||||||
|
if "id" in name:
|
||||||
|
id = str(val)
|
||||||
|
user_folders[d["title"]].append({"_": el["_"], "id": id})
|
||||||
|
|
||||||
|
for el in folders:
|
||||||
|
if el not in user_folders:
|
||||||
|
err = True
|
||||||
|
print(f"folder {el} not found")
|
||||||
|
if err:
|
||||||
|
raise ValueError("please check if folder's names in config are correct")
|
||||||
|
for folder, included_chats in user_folders.items():
|
||||||
|
print(f"downloading folder: {folder}")
|
||||||
|
for el in included_chats:
|
||||||
|
id = el["id"]
|
||||||
|
await download_channel(client, id)
|
||||||
|
|
||||||
|
|
||||||
|
with TelegramClient("anon", api_id, api_hash) as client:
|
||||||
|
client.loop.run_until_complete(run(client))
|
9
bots/poller/poller.yaml
Normal file
9
bots/poller/poller.yaml
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
channels:
|
||||||
|
usernames:
|
||||||
|
- https://t.me/sanspie_notes
|
||||||
|
- @s4nspie
|
||||||
|
ids:
|
||||||
|
- 868474142
|
||||||
|
|
||||||
|
folders:
|
||||||
|
- Users
|
2
bots/poller/requirements.txt
Normal file
2
bots/poller/requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
telethon
|
||||||
|
cryptg
|
Loading…
Reference in New Issue
Block a user