Merge remote-tracking branch 'origin/master'

This commit is contained in:
Alexander Karpov 2023-06-20 13:34:18 +03:00
commit 79d30ab9a0
7 changed files with 244 additions and 125 deletions

View File

@ -4,7 +4,13 @@ import yaml
from telethon import TelegramClient
from telethon.tl import functions
from telethon.tl.types import MessageMediaDocument, MessageMediaPhoto, User
from telethon.tl.types import (
MessageMediaDocument,
MessageMediaPhoto,
PeerChannel,
PeerUser,
PeerChat,
)
if os.getenv("api_id") is None:
raise ValueError("please set api_id env variable")
@ -101,25 +107,13 @@ async def progress_bar(
print_progress_bar(item.id)
async def download_channel(client, id):
id = str(id)
min_id = 0
async def download(client, entity, title, min_id):
max_id = 0
if id in offsets:
min_id = offsets[id]
entity = await client.get_entity(int(id))
t = type(entity)
if t is User:
title = entity.username
else:
title = entity.title
async for message in client.iter_messages(entity):
max_id = message.id
break
if max_id == min_id:
if max_id <= min_id:
print(" " * 4 + f"done {title}")
return
if not os.path.isdir(f"poller/{title}"):
@ -151,7 +145,7 @@ async def download_channel(client, id):
else:
await message.download_media(file=f"poller/{title}/other/")
offsets[id] = message.id
offsets[entity.id] = message.id
if message.id % 10 == 0:
with open("poller/.offsets.json", "w") as f:
@ -161,6 +155,45 @@ async def download_channel(client, id):
json.dump(offsets, f, indent=4)
async def download_channel(client, id):
id = str(id)
min_id = 0
if id in offsets:
min_id = offsets[id]
try:
entity = await client.get_entity(PeerChannel(int(id)))
except ValueError:
print("channel not found, there is probably somthing broken...")
return
await download(client, entity, entity.title, min_id)
async def download_user(client, id):
id = str(id)
min_id = 0
if id in offsets:
min_id = offsets[id]
try:
entity = await client.get_entity(PeerUser(int(id)))
except ValueError:
print("user not found, there is probably somthing broken...")
return
await download(client, entity, entity.username, min_id)
async def download_chat(client, id):
id = str(id)
min_id = 0
if id in offsets:
min_id = offsets[id]
try:
entity = await client.get_entity(PeerChat(int(id)))
except ValueError:
print("chat not found, there is probably somthing broken...")
return
await download(client, entity, entity.title, min_id)
async def run(client):
err = False
@ -202,8 +235,13 @@ async def run(client):
print(f"downloading folder: {folder}")
for el in included_chats:
id = el["id"]
await download_channel(client, id)
if el["_"] == "InputPeerUser":
await download_user(client, id)
elif el["_"] == "InputPeerChannel":
await download_channel(client, id)
elif el["_"] == "InputPeerChat":
await download_chat(client, id)
with TelegramClient("anon", api_id, api_hash) as client:
with TelegramClient("downloader", int(api_id), api_hash) as client:
client.loop.run_until_complete(run(client))

View File

@ -0,0 +1,84 @@
import requests
import json
import os
from urllib.parse import unquote
from pydub import AudioSegment
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.id3 import APIC, ID3
url = input("https://castbox.fm/channel/: ")
if not url.startswith("https://castbox.fm/channel/"):
url = "https://castbox.fm/channel/" + url
def download_file(file_url):
local_filename = file_url.split("/")[-1]
with requests.get(file_url, stream=True) as r:
r.raise_for_status()
with open(local_filename, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
return local_filename
r = requests.get(url)
if r.status_code != 200:
raise LookupError("Site not found")
inner_data = r.text.splitlines()
data = []
for line in inner_data:
if "window.__INITIAL_STATE__" in line:
data.append(line)
if len(data) != 1:
raise ValueError("Payload not found")
d = json.loads(unquote(data[0].split('"')[1::2][0])) # type: dict
title = d["ch"]["chInfo"]["title"]
main_image = d["ch"]["chInfo"]["cover_web"]
author = d["ch"]["chInfo"]["author"]
print("Downloading podcast " + title)
episodes = d["ch"]["eps"]
if not os.path.isdir(title):
os.mkdir(title)
for i, episode in enumerate(episodes):
print(f"Downloading: {episode['title']}", end="\r")
if "url" in episode and episode["url"]:
ep_url = episode["url"]
else:
ep_url = episode["urls"][0]
orig_path = download_file(ep_url)
n_path = title + "/" + f"{title}.mp3"
AudioSegment.from_file(orig_path).export(n_path)
os.remove(orig_path)
if "cover_url" not in episode or not episode["cover_url"]:
img_path = download_file(main_image)
else:
img_path = download_file(episode["cover_url"])
if "author" in episode and episode["author"]:
ep_author = episode["author"]
else:
ep_author = author
tag = MP3(n_path, ID3=ID3)
tag.tags.add(
APIC(
encoding=3,
mime="image/png",
type=3,
desc="Cover",
data=open(img_path, "rb").read(),
)
)
tag.save()
tag = EasyID3(n_path)
tag["title"] = episode["title"]
tag["album"] = title
tag["artist"] = ep_author
tag.save()
os.remove(img_path)

View File

View File

@ -1,107 +0,0 @@
import asyncio
import os
import daemon
from io import BytesIO
from time import sleep
from aiogram import Bot
from aiogram.bot.api import TelegramAPIServer
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.id3 import APIC, ID3, TORY
from pydub import AudioSegment
from yandex_music import Client, Track
from dotenv import load_dotenv
load_dotenv(dotenv_path=".env")
YANDEX_TOKEN = os.getenv("YANDEX_TOKEN")
CHAT_ID = os.getenv("CHAT_ID")
TOKEN = os.getenv("BOT_TOKEN")
TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None)
if TELEGRAM_SERVER:
local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER)
bot = Bot(TOKEN, server=local_server)
else:
bot = Bot(TOKEN)
client = Client(YANDEX_TOKEN).init()
latest_podcast = None
latest_sent = True
podcasts_listened = []
with daemon.DaemonContext():
while True:
try:
queues = client.queues_list()
last_queue = client.queue(queues[0].id)
last_track_id = last_queue.get_current_track()
last_track: Track = last_track_id.fetch_track()
if "podcast" in last_track.type:
if last_track_id not in podcasts_listened:
if last_track_id == latest_podcast and not latest_sent:
latest_sent = True
podcasts_listened.append(last_track_id)
title = last_track.title
album = last_track.albums[0]
url = f"https://music.yandex.ru/track/{last_track.id}"
desc = last_track.short_description.split("\n")[0]
last_track.download_cover(filename="cover.png")
img_path = os.path.abspath("cover.png")
last_track.download(filename="file", codec="mp3")
orig_path = os.path.abspath("file")
path = os.path.abspath("file.mp3")
AudioSegment.from_file(orig_path).export(path)
os.remove(orig_path)
# set music meta
tag = MP3(path, ID3=ID3)
tag.tags.add(
APIC(
encoding=3, # 3 is for utf-8
mime="image/png", # image/jpeg or image/png
type=3, # 3 is for the cover image
desc="Cover",
data=open(img_path, "rb").read(),
)
)
tag.tags.add(TORY(text=str(album.year)))
tag.save()
tag = EasyID3(path)
tag["title"] = title
tag["album"] = album.title
tag.save()
with open(path, "rb") as tmp:
obj = BytesIO(tmp.read())
obj.name = f"{title}.mp3"
loop = asyncio.get_event_loop()
coroutine = bot.send_audio(
chat_id=CHAT_ID,
audio=obj,
caption=f"{title} - {album.title}\n{desc}\n\n{url}",
title=title,
performer=album.title,
)
loop.run_until_complete(coroutine)
else:
latest_podcast = last_track_id
latest_sent = False
except BaseException as e:
loop = asyncio.get_event_loop()
coroutine = bot.send_message(CHAT_ID, text=str(e))
loop.run_until_complete(coroutine)
sleep(5 * 60)

104
podcasts/yandex/podcasts.py Normal file
View File

@ -0,0 +1,104 @@
import asyncio
import os
from io import BytesIO
from time import sleep
from aiogram import Bot
from aiogram.bot.api import TelegramAPIServer
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.id3 import APIC, ID3, TORY
from pydub import AudioSegment
from yandex_music import Client, Track
from dotenv import load_dotenv
load_dotenv(dotenv_path=".env")
YANDEX_TOKEN = os.getenv("YANDEX_TOKEN")
CHAT_ID = os.getenv("CHAT_ID")
TOKEN = os.getenv("BOT_TOKEN")
TELEGRAM_SERVER = os.getenv("TELEGRAM_SERVER", default=None)
if TELEGRAM_SERVER:
local_server = TelegramAPIServer.from_base(TELEGRAM_SERVER)
bot = Bot(TOKEN, server=local_server)
else:
bot = Bot(TOKEN)
client = Client(YANDEX_TOKEN).init()
latest_podcast = None
latest_sent = True
podcasts_listened = []
while True:
try:
queues = client.queues_list()
last_queue = client.queue(queues[0].id)
last_track_id = last_queue.get_current_track()
last_track: Track = last_track_id.fetch_track()
if "podcast" in last_track.type:
if last_track_id not in podcasts_listened:
if last_track_id == latest_podcast and not latest_sent:
latest_sent = True
podcasts_listened.append(last_track_id)
title = last_track.title
album = last_track.albums[0]
url = f"https://music.yandex.ru/track/{last_track.id}"
desc = last_track.short_description.split("\n")[0]
last_track.download_cover(filename="cover.png")
img_path = os.path.abspath("cover.png")
last_track.download(filename="file", codec="mp3")
orig_path = os.path.abspath("file")
path = os.path.abspath("file.mp3")
AudioSegment.from_file(orig_path).export(path)
os.remove(orig_path)
# set music meta
tag = MP3(path, ID3=ID3)
tag.tags.add(
APIC(
encoding=3, # 3 is for utf-8
mime="image/png", # image/jpeg or image/png
type=3, # 3 is for the cover image
desc="Cover",
data=open(img_path, "rb").read(),
)
)
tag.tags.add(TORY(text=str(album.year)))
tag.save()
tag = EasyID3(path)
tag["title"] = title
tag["album"] = album.title
tag.save()
with open(path, "rb") as tmp:
obj = BytesIO(tmp.read())
obj.name = f"{title}.mp3"
loop = asyncio.get_event_loop()
coroutine = bot.send_audio(
chat_id=CHAT_ID,
audio=obj,
caption=f"{title} - {album.title}\n{desc}\n\n{url}",
title=title,
performer=album.title,
)
loop.run_until_complete(coroutine)
else:
latest_podcast = last_track_id
latest_sent = False
except BaseException as e:
loop = asyncio.get_event_loop()
coroutine = bot.send_message(CHAT_ID, text=str(e))
loop.run_until_complete(coroutine)
sleep(5 * 60)