diff --git a/podcasts/castbox/castbox.py b/podcasts/castbox/castbox.py index f3aadc4..a910691 100644 --- a/podcasts/castbox/castbox.py +++ b/podcasts/castbox/castbox.py @@ -24,61 +24,78 @@ def download_file(file_url): return local_filename -r = requests.get(url) -if r.status_code != 200: - raise LookupError("Site not found") -inner_data = r.text.splitlines() -data = [] -for line in inner_data: - if "window.__INITIAL_STATE__" in line: - data.append(line) +def get_data(url): + r = requests.get(url) + if r.status_code != 200: + raise LookupError("Site not found") + inner_data = r.text.splitlines() + data = [] + for line in inner_data: + if "window.__INITIAL_STATE__" in line: + data.append(line) -if len(data) != 1: - raise ValueError("Payload not found") + if len(data) != 1: + raise ValueError("Payload not found") -d = json.loads(unquote(data[0].split('"')[1::2][0])) # type: dict + d = json.loads(unquote(data[0].split('"')[1::2][0])) # type: dict + return d + + +d = get_data(url) +while ( + not d + or "ch" not in d + or "chInfo" not in d["ch"] + or "title" not in d["ch"]["chInfo"] +): + d = get_data(url) + print("Data not loaded, retrying...") title = d["ch"]["chInfo"]["title"] main_image = d["ch"]["chInfo"]["cover_web"] author = d["ch"]["chInfo"]["author"] +episode_count = d["ch"]["chInfo"]["episode_count"] print("Downloading podcast " + title) episodes = d["ch"]["eps"] if not os.path.isdir(title): os.mkdir(title) for i, episode in enumerate(episodes): - print(f"Downloading: {episode['title']}", end="\r") - if "url" in episode and episode["url"]: - ep_url = episode["url"] - else: - ep_url = episode["urls"][0] - orig_path = download_file(ep_url) - n_path = title + "/" + f"{title}.mp3" - AudioSegment.from_file(orig_path).export(n_path) - os.remove(orig_path) - if "cover_url" not in episode or not episode["cover_url"]: - img_path = download_file(main_image) - else: - img_path = download_file(episode["cover_url"]) - if "author" in episode and episode["author"]: - ep_author = episode["author"] - else: - ep_author = author + n_path = title + "/" + f"{title}.mp3" # имя из ep + if not os.path.exists(n_path): # на 1 выключается + print(f"Downloading: {episode['title']}", end="\r") + if "url" in episode and episode["url"]: + ep_url = episode["url"] + else: + ep_url = episode["urls"][0] + orig_path = download_file(ep_url) + AudioSegment.from_file(orig_path).export(n_path) + os.remove(orig_path) + if "cover_url" not in episode or not episode["cover_url"]: + img_path = download_file(main_image) + else: + img_path = download_file(episode["cover_url"]) + if "author" in episode and episode["author"]: + ep_author = episode["author"] + else: + ep_author = author - tag = MP3(n_path, ID3=ID3) - tag.tags.add( - APIC( - encoding=3, - mime="image/png", - type=3, - desc="Cover", - data=open(img_path, "rb").read(), + print(f"Processing: {episode['title']}", end="\r") + tag = MP3(n_path, ID3=ID3) + tag.tags.add( + APIC( + encoding=3, + mime="image/png", + type=3, + desc="Cover", + data=open(img_path, "rb").read(), + ) ) - ) - tag.save() - tag = EasyID3(n_path) + tag.save() + tag = EasyID3(n_path) - tag["title"] = episode["title"] - tag["album"] = title - tag["artist"] = ep_author + tag["title"] = episode["title"] + tag["album"] = title + tag["tracknumber"] = f"{episode_count - i}/{episode_count}" + tag["artist"] = ep_author - tag.save() - os.remove(img_path) + tag.save() + os.remove(img_path) diff --git a/soundcloud_downloader.py b/soundcloud_downloader.py new file mode 100644 index 0000000..0f9383d --- /dev/null +++ b/soundcloud_downloader.py @@ -0,0 +1,336 @@ +#!/usr/bin/env python3 +""" +SoundCloud Downloader with ID3 Tags +----------------------------------- +This script downloads all tracks from a SoundCloud artist, +including proper ID3 tags and album artwork. + +Requirements: +pip install scdl mutagen requests tqdm +""" + +import os +import sys +import subprocess +import json +import requests +from pathlib import Path +from mutagen.id3 import ID3, APIC, TIT2, TPE1, TALB, TDRC, TCON, TCOM, COMM +from tqdm import tqdm +import re +import argparse + + +# ANSI colors for terminal output +class Colors: + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + BLUE = '\033[94m' + ENDC = '\033[0m' + + +def setup_argparser(): + parser = argparse.ArgumentParser(description='Download all tracks from a SoundCloud artist with proper ID3 tags') + parser.add_argument('url', help='SoundCloud URL (artist profile or likes page)') + parser.add_argument('-o', '--output', default='downloads', help='Output directory') + parser.add_argument('-c', '--client-id', help='SoundCloud client ID (optional)') + parser.add_argument('--likes', action='store_true', help='Download liked tracks (auto-detected from URL)') + parser.add_argument('--author', help='Explicitly set the author name for all tracks') + parser.add_argument('--album', help='Explicitly set the album name for all tracks') + parser.add_argument('--force-tags', action='store_true', help='Force update of ID3 tags even if they exist') + return parser.parse_args() + + +def get_client_id(): + """Extract client_id by scraping SoundCloud's website""" + print(f"{Colors.BLUE}[*] Obtaining SoundCloud client ID...{Colors.ENDC}") + + try: + response = requests.get('https://soundcloud.com/') + scripts = re.findall(r'