From 73ea9207e0dc491b9f1e61b071ebbe040f1394d6 Mon Sep 17 00:00:00 2001 From: Alexander-D-Karpov Date: Thu, 8 Jun 2023 23:53:41 +0300 Subject: [PATCH 1/2] fixed folder peer handling --- bots/poller/poller.py | 74 ++++++++++++++++++++++++++++++++----------- 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/bots/poller/poller.py b/bots/poller/poller.py index 74a0374..abde489 100644 --- a/bots/poller/poller.py +++ b/bots/poller/poller.py @@ -4,7 +4,13 @@ import yaml from telethon import TelegramClient from telethon.tl import functions -from telethon.tl.types import MessageMediaDocument, MessageMediaPhoto, User +from telethon.tl.types import ( + MessageMediaDocument, + MessageMediaPhoto, + PeerChannel, + PeerUser, + PeerChat, +) if os.getenv("api_id") is None: raise ValueError("please set api_id env variable") @@ -101,25 +107,13 @@ async def progress_bar( print_progress_bar(item.id) -async def download_channel(client, id): - id = str(id) - min_id = 0 +async def download(client, entity, title, min_id): max_id = 0 - if id in offsets: - min_id = offsets[id] - entity = await client.get_entity(int(id)) - t = type(entity) - - if t is User: - title = entity.username - else: - title = entity.title - async for message in client.iter_messages(entity): max_id = message.id break - if max_id == min_id: + if max_id <= min_id: print(" " * 4 + f"done {title}") return if not os.path.isdir(f"poller/{title}"): @@ -151,7 +145,7 @@ async def download_channel(client, id): else: await message.download_media(file=f"poller/{title}/other/") - offsets[id] = message.id + offsets[entity.id] = message.id if message.id % 10 == 0: with open("poller/.offsets.json", "w") as f: @@ -161,6 +155,45 @@ async def download_channel(client, id): json.dump(offsets, f, indent=4) +async def download_channel(client, id): + id = str(id) + min_id = 0 + if id in offsets: + min_id = offsets[id] + try: + entity = await client.get_entity(PeerChannel(int(id))) + except ValueError: + print("channel not found, there is probably somthing broken...") + return + await download(client, entity, entity.title, min_id) + + +async def download_user(client, id): + id = str(id) + min_id = 0 + if id in offsets: + min_id = offsets[id] + try: + entity = await client.get_entity(PeerUser(int(id))) + except ValueError: + print("user not found, there is probably somthing broken...") + return + await download(client, entity, entity.username, min_id) + + +async def download_chat(client, id): + id = str(id) + min_id = 0 + if id in offsets: + min_id = offsets[id] + try: + entity = await client.get_entity(PeerChat(int(id))) + except ValueError: + print("chat not found, there is probably somthing broken...") + return + await download(client, entity, entity.title, min_id) + + async def run(client): err = False @@ -202,8 +235,13 @@ async def run(client): print(f"downloading folder: {folder}") for el in included_chats: id = el["id"] - await download_channel(client, id) + if el["_"] == "InputPeerUser": + await download_user(client, id) + elif el["_"] == "InputPeerChannel": + await download_channel(client, id) + elif el["_"] == "InputPeerChat": + await download_chat(client, id) -with TelegramClient("anon", api_id, api_hash) as client: +with TelegramClient("downloader", int(api_id), api_hash) as client: client.loop.run_until_complete(run(client)) From 9800d8d345f4a66ea0d4fb5c1b847d8d67979c2a Mon Sep 17 00:00:00 2001 From: Alexander-D-Karpov Date: Tue, 13 Jun 2023 11:08:34 +0300 Subject: [PATCH 2/2] added castbox script, moved yandex --- podcasts/castbox/castbox.py | 84 ++++++++++++++ .../Файлы к заданиям/4.py => podcasts/castbox/requirement.txt | 0 podcasts/podcasts.py | 107 ------------------ podcasts/{ => yandex}/.env.template | 0 podcasts/yandex/podcasts.py | 104 +++++++++++++++++ podcasts/{ => yandex}/requirement.txt | 0 6 files changed, 188 insertions(+), 107 deletions(-) create mode 100644 podcasts/castbox/castbox.py rename ege/11_Основная волна. Первый день/Файлы к заданиям-20230222T110948Z-001/Файлы к заданиям/4.py => podcasts/castbox/requirement.txt (100%) delete mode 100644 podcasts/podcasts.py rename podcasts/{ => yandex}/.env.template (100%) create mode 100644 podcasts/yandex/podcasts.py rename podcasts/{ => yandex}/requirement.txt (100%) diff --git a/podcasts/castbox/castbox.py b/podcasts/castbox/castbox.py new file mode 100644 index 0000000..f3aadc4 --- /dev/null +++ b/podcasts/castbox/castbox.py @@ -0,0 +1,84 @@ +import requests +import json +import os + +from urllib.parse import unquote +from pydub import AudioSegment +from mutagen.easyid3 import EasyID3 +from mutagen.mp3 import MP3 +from mutagen.id3 import APIC, ID3 + +url = input("https://castbox.fm/channel/: ") + +if not url.startswith("https://castbox.fm/channel/"): + url = "https://castbox.fm/channel/" + url + + +def download_file(file_url): + local_filename = file_url.split("/")[-1] + with requests.get(file_url, stream=True) as r: + r.raise_for_status() + with open(local_filename, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + return local_filename + + +r = requests.get(url) +if r.status_code != 200: + raise LookupError("Site not found") +inner_data = r.text.splitlines() +data = [] +for line in inner_data: + if "window.__INITIAL_STATE__" in line: + data.append(line) + +if len(data) != 1: + raise ValueError("Payload not found") + +d = json.loads(unquote(data[0].split('"')[1::2][0])) # type: dict +title = d["ch"]["chInfo"]["title"] +main_image = d["ch"]["chInfo"]["cover_web"] +author = d["ch"]["chInfo"]["author"] +print("Downloading podcast " + title) +episodes = d["ch"]["eps"] +if not os.path.isdir(title): + os.mkdir(title) +for i, episode in enumerate(episodes): + print(f"Downloading: {episode['title']}", end="\r") + if "url" in episode and episode["url"]: + ep_url = episode["url"] + else: + ep_url = episode["urls"][0] + orig_path = download_file(ep_url) + n_path = title + "/" + f"{title}.mp3" + AudioSegment.from_file(orig_path).export(n_path) + os.remove(orig_path) + if "cover_url" not in episode or not episode["cover_url"]: + img_path = download_file(main_image) + else: + img_path = download_file(episode["cover_url"]) + if "author" in episode and episode["author"]: + ep_author = episode["author"] + else: + ep_author = author + + tag = MP3(n_path, ID3=ID3) + tag.tags.add( + APIC( + encoding=3, + mime="image/png", + type=3, + desc="Cover", + data=open(img_path, "rb").read(), + ) + ) + tag.save() + tag = EasyID3(n_path) + + tag["title"] = episode["title"] + tag["album"] = title + tag["artist"] = ep_author + + tag.save() + os.remove(img_path) diff --git a/ege/11_Основная волна. Первый день/Файлы к заданиям-20230222T110948Z-001/Файлы к заданиям/4.py b/podcasts/castbox/requirement.txt similarity index 100% rename from ege/11_Основная волна. Первый день/Файлы к заданиям-20230222T110948Z-001/Файлы к заданиям/4.py rename to podcasts/castbox/requirement.txt diff --git a/podcasts/podcasts.py b/podcasts/podcasts.py deleted file mode 100644 index afad4c2..0000000 --- a/podcasts/podcasts.py +++ /dev/null @@ -1,107 +0,0 @@ -import asyncio -import os -import daemon -from io import BytesIO - -from time import sleep - -from aiogram import Bot -from aiogram.bot.api import TelegramAPIServer -from mutagen.easyid3 import EasyID3 -from mutagen.mp3 import MP3 -from mutagen.id3 import APIC, ID3, TORY -from pydub import AudioSegment -from yandex_music import Client, Track -from dotenv import load_dotenv - -load_dotenv(dotenv_path=".env") - -YANDEX_TOKEN = os.getenv("YANDEX_TOKEN") -CHAT_ID = os.getenv("CHAT_ID") -TOKEN = os.getenv("BOT_TOKEN") -TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None) - -if TELEGRAM_SERVER: - local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER) - bot = Bot(TOKEN, server=local_server) -else: - bot = Bot(TOKEN) - - -client = Client(YANDEX_TOKEN).init() -latest_podcast = None -latest_sent = True -podcasts_listened = [] - - -with daemon.DaemonContext(): - while True: - try: - queues = client.queues_list() - last_queue = client.queue(queues[0].id) - - last_track_id = last_queue.get_current_track() - last_track: Track = last_track_id.fetch_track() - - if "podcast" in last_track.type: - if last_track_id not in podcasts_listened: - if last_track_id == latest_podcast and not latest_sent: - latest_sent = True - podcasts_listened.append(last_track_id) - - title = last_track.title - album = last_track.albums[0] - url = f"https://music.yandex.ru/track/{last_track.id}" - desc = last_track.short_description.split("\n")[0] - - last_track.download_cover(filename="cover.png") - img_path = os.path.abspath("cover.png") - - last_track.download(filename="file", codec="mp3") - orig_path = os.path.abspath("file") - path = os.path.abspath("file.mp3") - - AudioSegment.from_file(orig_path).export(path) - os.remove(orig_path) - - # set music meta - tag = MP3(path, ID3=ID3) - tag.tags.add( - APIC( - encoding=3, # 3 is for utf-8 - mime="image/png", # image/jpeg or image/png - type=3, # 3 is for the cover image - desc="Cover", - data=open(img_path, "rb").read(), - ) - ) - tag.tags.add(TORY(text=str(album.year))) - tag.save() - tag = EasyID3(path) - - tag["title"] = title - tag["album"] = album.title - - tag.save() - - with open(path, "rb") as tmp: - obj = BytesIO(tmp.read()) - obj.name = f"{title}.mp3" - loop = asyncio.get_event_loop() - coroutine = bot.send_audio( - chat_id=CHAT_ID, - audio=obj, - caption=f"{title} - {album.title}\n{desc}\n\n{url}", - title=title, - performer=album.title, - ) - loop.run_until_complete(coroutine) - - else: - latest_podcast = last_track_id - latest_sent = False - except BaseException as e: - loop = asyncio.get_event_loop() - coroutine = bot.send_message(CHAT_ID, text=str(e)) - loop.run_until_complete(coroutine) - sleep(5 * 60) diff --git a/podcasts/.env.template b/podcasts/yandex/.env.template similarity index 100% rename from podcasts/.env.template rename to podcasts/yandex/.env.template diff --git a/podcasts/yandex/podcasts.py b/podcasts/yandex/podcasts.py new file mode 100644 index 0000000..159f599 --- /dev/null +++ b/podcasts/yandex/podcasts.py @@ -0,0 +1,104 @@ +import asyncio +import os +from io import BytesIO + +from time import sleep + +from aiogram import Bot +from aiogram.bot.api import TelegramAPIServer +from mutagen.easyid3 import EasyID3 +from mutagen.mp3 import MP3 +from mutagen.id3 import APIC, ID3, TORY +from pydub import AudioSegment +from yandex_music import Client, Track +from dotenv import load_dotenv + +load_dotenv(dotenv_path=".env") + +YANDEX_TOKEN = os.getenv("YANDEX_TOKEN") +CHAT_ID = os.getenv("CHAT_ID") +TOKEN = os.getenv("BOT_TOKEN") +TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None) + +if TELEGRAM_SERVER: + local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER) + bot = Bot(TOKEN, server=local_server) +else: + bot = Bot(TOKEN) + + +client = Client(YANDEX_TOKEN).init() +latest_podcast = None +latest_sent = True +podcasts_listened = [] + +while True: + try: + queues = client.queues_list() + last_queue = client.queue(queues[0].id) + + last_track_id = last_queue.get_current_track() + last_track: Track = last_track_id.fetch_track() + + if "podcast" in last_track.type: + if last_track_id not in podcasts_listened: + if last_track_id == latest_podcast and not latest_sent: + latest_sent = True + podcasts_listened.append(last_track_id) + + title = last_track.title + album = last_track.albums[0] + url = f"https://music.yandex.ru/track/{last_track.id}" + desc = last_track.short_description.split("\n")[0] + + last_track.download_cover(filename="cover.png") + img_path = os.path.abspath("cover.png") + + last_track.download(filename="file", codec="mp3") + orig_path = os.path.abspath("file") + path = os.path.abspath("file.mp3") + + AudioSegment.from_file(orig_path).export(path) + os.remove(orig_path) + + # set music meta + tag = MP3(path, ID3=ID3) + tag.tags.add( + APIC( + encoding=3, # 3 is for utf-8 + mime="image/png", # image/jpeg or image/png + type=3, # 3 is for the cover image + desc="Cover", + data=open(img_path, "rb").read(), + ) + ) + tag.tags.add(TORY(text=str(album.year))) + tag.save() + tag = EasyID3(path) + + tag["title"] = title + tag["album"] = album.title + + tag.save() + + with open(path, "rb") as tmp: + obj = BytesIO(tmp.read()) + obj.name = f"{title}.mp3" + loop = asyncio.get_event_loop() + coroutine = bot.send_audio( + chat_id=CHAT_ID, + audio=obj, + caption=f"{title} - {album.title}\n{desc}\n\n{url}", + title=title, + performer=album.title, + ) + loop.run_until_complete(coroutine) + + else: + latest_podcast = last_track_id + latest_sent = False + except BaseException as e: + loop = asyncio.get_event_loop() + coroutine = bot.send_message(CHAT_ID, text=str(e)) + loop.run_until_complete(coroutine) + sleep(5 * 60) diff --git a/podcasts/requirement.txt b/podcasts/yandex/requirement.txt similarity index 100% rename from podcasts/requirement.txt rename to podcasts/yandex/requirement.txt