Merge remote-tracking branch 'origin/master'

2025-12-22 16:52:45 +03:00 · 2023-06-20 13:34:18 +03:00 · 2023-06-20 13:34:18 +03:00 · 79d30ab9a0
commit 79d30ab9a0
parent 76dd9d5af9 9800d8d345
7 changed files with 244 additions and 125 deletions
--- a/bots/poller/poller.py
+++ b/bots/poller/poller.py
@ -4,7 +4,13 @@ import yaml

 from telethon import TelegramClient
 from telethon.tl import functions
-from telethon.tl.types import MessageMediaDocument, MessageMediaPhoto, User
+from telethon.tl.types import (
+    MessageMediaDocument,
+    MessageMediaPhoto,
+    PeerChannel,
+    PeerUser,
+    PeerChat,
+)

 if os.getenv("api_id") is None:
    raise ValueError("please set api_id env variable")
@ -101,25 +107,13 @@ async def progress_bar(
        print_progress_bar(item.id)


-async def download_channel(client, id):
-    id = str(id)
-    min_id = 0
+async def download(client, entity, title, min_id):
    max_id = 0
-    if id in offsets:
-        min_id = offsets[id]
-    entity = await client.get_entity(int(id))
-    t = type(entity)
-
-    if t is User:
-        title = entity.username
-    else:
-        title = entity.title
-
    async for message in client.iter_messages(entity):
        max_id = message.id
        break

-    if max_id == min_id:
+    if max_id <= min_id:
        print(" " * 4 + f"done {title}")
        return
    if not os.path.isdir(f"poller/{title}"):
@ -151,7 +145,7 @@ async def download_channel(client, id):
                else:
                    await message.download_media(file=f"poller/{title}/other/")

-        offsets[id] = message.id
+        offsets[entity.id] = message.id

        if message.id % 10 == 0:
            with open("poller/.offsets.json", "w") as f:
@ -161,6 +155,45 @@ async def download_channel(client, id):
        json.dump(offsets, f, indent=4)


+async def download_channel(client, id):
+    id = str(id)
+    min_id = 0
+    if id in offsets:
+        min_id = offsets[id]
+    try:
+        entity = await client.get_entity(PeerChannel(int(id)))
+    except ValueError:
+        print("channel not found, there is probably somthing broken...")
+        return
+    await download(client, entity, entity.title, min_id)
+
+
+async def download_user(client, id):
+    id = str(id)
+    min_id = 0
+    if id in offsets:
+        min_id = offsets[id]
+    try:
+        entity = await client.get_entity(PeerUser(int(id)))
+    except ValueError:
+        print("user not found, there is probably somthing broken...")
+        return
+    await download(client, entity, entity.username, min_id)
+
+
+async def download_chat(client, id):
+    id = str(id)
+    min_id = 0
+    if id in offsets:
+        min_id = offsets[id]
+    try:
+        entity = await client.get_entity(PeerChat(int(id)))
+    except ValueError:
+        print("chat not found, there is probably somthing broken...")
+        return
+    await download(client, entity, entity.title, min_id)
+
+
 async def run(client):
    err = False

@ -202,8 +235,13 @@ async def run(client):
            print(f"downloading folder: {folder}")
            for el in included_chats:
                id = el["id"]
-                await download_channel(client, id)
+                if el["_"] == "InputPeerUser":
+                    await download_user(client, id)
+                elif el["_"] == "InputPeerChannel":
+                    await download_channel(client, id)
+                elif el["_"] == "InputPeerChat":
+                    await download_chat(client, id)


-with TelegramClient("anon", api_id, api_hash) as client:
+with TelegramClient("downloader", int(api_id), api_hash) as client:
    client.loop.run_until_complete(run(client))
--- a/podcasts/castbox/castbox.py
+++ b/podcasts/castbox/castbox.py
@ -0,0 +1,84 @@
+import requests
+import json
+import os
+
+from urllib.parse import unquote
+from pydub import AudioSegment
+from mutagen.easyid3 import EasyID3
+from mutagen.mp3 import MP3
+from mutagen.id3 import APIC, ID3
+
+url = input("https://castbox.fm/channel/: ")
+
+if not url.startswith("https://castbox.fm/channel/"):
+    url = "https://castbox.fm/channel/" + url
+
+
+def download_file(file_url):
+    local_filename = file_url.split("/")[-1]
+    with requests.get(file_url, stream=True) as r:
+        r.raise_for_status()
+        with open(local_filename, "wb") as f:
+            for chunk in r.iter_content(chunk_size=8192):
+                f.write(chunk)
+    return local_filename
+
+
+r = requests.get(url)
+if r.status_code != 200:
+    raise LookupError("Site not found")
+inner_data = r.text.splitlines()
+data = []
+for line in inner_data:
+    if "window.__INITIAL_STATE__" in line:
+        data.append(line)
+
+if len(data) != 1:
+    raise ValueError("Payload not found")
+
+d = json.loads(unquote(data[0].split('"')[1::2][0]))  # type: dict
+title = d["ch"]["chInfo"]["title"]
+main_image = d["ch"]["chInfo"]["cover_web"]
+author = d["ch"]["chInfo"]["author"]
+print("Downloading podcast " + title)
+episodes = d["ch"]["eps"]
+if not os.path.isdir(title):
+    os.mkdir(title)
+for i, episode in enumerate(episodes):
+    print(f"Downloading: {episode['title']}", end="\r")
+    if "url" in episode and episode["url"]:
+        ep_url = episode["url"]
+    else:
+        ep_url = episode["urls"][0]
+    orig_path = download_file(ep_url)
+    n_path = title + "/" + f"{title}.mp3"
+    AudioSegment.from_file(orig_path).export(n_path)
+    os.remove(orig_path)
+    if "cover_url" not in episode or not episode["cover_url"]:
+        img_path = download_file(main_image)
+    else:
+        img_path = download_file(episode["cover_url"])
+    if "author" in episode and episode["author"]:
+        ep_author = episode["author"]
+    else:
+        ep_author = author
+
+    tag = MP3(n_path, ID3=ID3)
+    tag.tags.add(
+        APIC(
+            encoding=3,
+            mime="image/png",
+            type=3,
+            desc="Cover",
+            data=open(img_path, "rb").read(),
+        )
+    )
+    tag.save()
+    tag = EasyID3(n_path)
+
+    tag["title"] = episode["title"]
+    tag["album"] = title
+    tag["artist"] = ep_author
+
+    tag.save()
+    os.remove(img_path)
--- a/podcasts/castbox/requirement.txt
+++ b/podcasts/castbox/requirement.txt
--- a/podcasts/podcasts.py
+++ b/podcasts/podcasts.py
@ -1,107 +0,0 @@
-import asyncio
-import os
-import daemon
-from io import BytesIO
-
-from time import sleep
-
-from aiogram import Bot
-from aiogram.bot.api import TelegramAPIServer
-from mutagen.easyid3 import EasyID3
-from mutagen.mp3 import MP3
-from mutagen.id3 import APIC, ID3, TORY
-from pydub import AudioSegment
-from yandex_music import Client, Track
-from dotenv import load_dotenv
-
-load_dotenv(dotenv_path=".env")
-
-YANDEX_TOKEN = os.getenv("YANDEX_TOKEN")
-CHAT_ID = os.getenv("CHAT_ID")
-TOKEN = os.getenv("BOT_TOKEN")
-TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None)
-
-if TELEGRAM_SERVER:
-    local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER)
-    bot = Bot(TOKEN, server=local_server)
-else:
-    bot = Bot(TOKEN)
-
-
-client = Client(YANDEX_TOKEN).init()
-latest_podcast = None
-latest_sent = True
-podcasts_listened = []
-
-
-with daemon.DaemonContext():
-    while True:
-        try:
-            queues = client.queues_list()
-            last_queue = client.queue(queues[0].id)
-
-            last_track_id = last_queue.get_current_track()
-            last_track: Track = last_track_id.fetch_track()
-
-            if "podcast" in last_track.type:
-                if last_track_id not in podcasts_listened:
-                    if last_track_id == latest_podcast and not latest_sent:
-                        latest_sent = True
-                        podcasts_listened.append(last_track_id)
-
-                        title = last_track.title
-                        album = last_track.albums[0]
-                        url = f"https://music.yandex.ru/track/{last_track.id}"
-                        desc = last_track.short_description.split("\n")[0]
-
-                        last_track.download_cover(filename="cover.png")
-                        img_path = os.path.abspath("cover.png")
-
-                        last_track.download(filename="file", codec="mp3")
-                        orig_path = os.path.abspath("file")
-                        path = os.path.abspath("file.mp3")
-
-                        AudioSegment.from_file(orig_path).export(path)
-                        os.remove(orig_path)
-
-                        # set music meta
-                        tag = MP3(path, ID3=ID3)
-                        tag.tags.add(
-                            APIC(
-                                encoding=3,  # 3 is for utf-8
-                                mime="image/png",  # image/jpeg or image/png
-                                type=3,  # 3 is for the cover image
-                                desc="Cover",
-                                data=open(img_path, "rb").read(),
-                            )
-                        )
-                        tag.tags.add(TORY(text=str(album.year)))
-                        tag.save()
-                        tag = EasyID3(path)
-
-                        tag["title"] = title
-                        tag["album"] = album.title
-
-                        tag.save()
-
-                        with open(path, "rb") as tmp:
-                            obj = BytesIO(tmp.read())
-                            obj.name = f"{title}.mp3"
-                            loop = asyncio.get_event_loop()
-                            coroutine = bot.send_audio(
-                                chat_id=CHAT_ID,
-                                audio=obj,
-                                caption=f"{title} - {album.title}\n{desc}\n\n{url}",
-                                title=title,
-                                performer=album.title,
-                            )
-                            loop.run_until_complete(coroutine)
-
-                    else:
-                        latest_podcast = last_track_id
-                        latest_sent = False
-        except BaseException as e:
-            loop = asyncio.get_event_loop()
-            coroutine = bot.send_message(CHAT_ID, text=str(e))
-            loop.run_until_complete(coroutine)
-        sleep(5 * 60)
--- a/podcasts/yandex/.env.template
+++ b/podcasts/yandex/.env.template
--- a/podcasts/yandex/podcasts.py
+++ b/podcasts/yandex/podcasts.py
@ -0,0 +1,104 @@
+import asyncio
+import os
+from io import BytesIO
+
+from time import sleep
+
+from aiogram import Bot
+from aiogram.bot.api import TelegramAPIServer
+from mutagen.easyid3 import EasyID3
+from mutagen.mp3 import MP3
+from mutagen.id3 import APIC, ID3, TORY
+from pydub import AudioSegment
+from yandex_music import Client, Track
+from dotenv import load_dotenv
+
+load_dotenv(dotenv_path=".env")
+
+YANDEX_TOKEN = os.getenv("YANDEX_TOKEN")
+CHAT_ID = os.getenv("CHAT_ID")
+TOKEN = os.getenv("BOT_TOKEN")
+TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None)
+
+if TELEGRAM_SERVER:
+    local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER)
+    bot = Bot(TOKEN, server=local_server)
+else:
+    bot = Bot(TOKEN)
+
+
+client = Client(YANDEX_TOKEN).init()
+latest_podcast = None
+latest_sent = True
+podcasts_listened = []
+
+while True:
+    try:
+        queues = client.queues_list()
+        last_queue = client.queue(queues[0].id)
+
+        last_track_id = last_queue.get_current_track()
+        last_track: Track = last_track_id.fetch_track()
+
+        if "podcast" in last_track.type:
+            if last_track_id not in podcasts_listened:
+                if last_track_id == latest_podcast and not latest_sent:
+                    latest_sent = True
+                    podcasts_listened.append(last_track_id)
+
+                    title = last_track.title
+                    album = last_track.albums[0]
+                    url = f"https://music.yandex.ru/track/{last_track.id}"
+                    desc = last_track.short_description.split("\n")[0]
+
+                    last_track.download_cover(filename="cover.png")
+                    img_path = os.path.abspath("cover.png")
+
+                    last_track.download(filename="file", codec="mp3")
+                    orig_path = os.path.abspath("file")
+                    path = os.path.abspath("file.mp3")
+
+                    AudioSegment.from_file(orig_path).export(path)
+                    os.remove(orig_path)
+
+                    # set music meta
+                    tag = MP3(path, ID3=ID3)
+                    tag.tags.add(
+                        APIC(
+                            encoding=3,  # 3 is for utf-8
+                            mime="image/png",  # image/jpeg or image/png
+                            type=3,  # 3 is for the cover image
+                            desc="Cover",
+                            data=open(img_path, "rb").read(),
+                        )
+                    )
+                    tag.tags.add(TORY(text=str(album.year)))
+                    tag.save()
+                    tag = EasyID3(path)
+
+                    tag["title"] = title
+                    tag["album"] = album.title
+
+                    tag.save()
+
+                    with open(path, "rb") as tmp:
+                        obj = BytesIO(tmp.read())
+                        obj.name = f"{title}.mp3"
+                        loop = asyncio.get_event_loop()
+                        coroutine = bot.send_audio(
+                            chat_id=CHAT_ID,
+                            audio=obj,
+                            caption=f"{title} - {album.title}\n{desc}\n\n{url}",
+                            title=title,
+                            performer=album.title,
+                        )
+                        loop.run_until_complete(coroutine)
+
+                else:
+                    latest_podcast = last_track_id
+                    latest_sent = False
+    except BaseException as e:
+        loop = asyncio.get_event_loop()
+        coroutine = bot.send_message(CHAT_ID, text=str(e))
+        loop.run_until_complete(coroutine)
+    sleep(5 * 60)
--- a/podcasts/yandex/requirement.txt
+++ b/podcasts/yandex/requirement.txt