mirror of
https://github.com/Alexander-D-Karpov/scripts.git
synced 2025-06-02 20:23:06 +03:00
337 lines
12 KiB
Python
337 lines
12 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
SoundCloud Downloader with ID3 Tags
|
|
-----------------------------------
|
|
This script downloads all tracks from a SoundCloud artist,
|
|
including proper ID3 tags and album artwork.
|
|
|
|
Requirements:
|
|
pip install scdl mutagen requests tqdm
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import json
|
|
import requests
|
|
from pathlib import Path
|
|
from mutagen.id3 import ID3, APIC, TIT2, TPE1, TALB, TDRC, TCON, TCOM, COMM
|
|
from tqdm import tqdm
|
|
import re
|
|
import argparse
|
|
|
|
|
|
# ANSI colors for terminal output
|
|
class Colors:
|
|
GREEN = '\033[92m'
|
|
YELLOW = '\033[93m'
|
|
RED = '\033[91m'
|
|
BLUE = '\033[94m'
|
|
ENDC = '\033[0m'
|
|
|
|
|
|
def setup_argparser():
|
|
parser = argparse.ArgumentParser(description='Download all tracks from a SoundCloud artist with proper ID3 tags')
|
|
parser.add_argument('url', help='SoundCloud URL (artist profile or likes page)')
|
|
parser.add_argument('-o', '--output', default='downloads', help='Output directory')
|
|
parser.add_argument('-c', '--client-id', help='SoundCloud client ID (optional)')
|
|
parser.add_argument('--likes', action='store_true', help='Download liked tracks (auto-detected from URL)')
|
|
parser.add_argument('--author', help='Explicitly set the author name for all tracks')
|
|
parser.add_argument('--album', help='Explicitly set the album name for all tracks')
|
|
parser.add_argument('--force-tags', action='store_true', help='Force update of ID3 tags even if they exist')
|
|
return parser.parse_args()
|
|
|
|
|
|
def get_client_id():
|
|
"""Extract client_id by scraping SoundCloud's website"""
|
|
print(f"{Colors.BLUE}[*] Obtaining SoundCloud client ID...{Colors.ENDC}")
|
|
|
|
try:
|
|
response = requests.get('https://soundcloud.com/')
|
|
scripts = re.findall(r'<script crossorigin src="(.*?\.js)"', response.text)
|
|
|
|
# Try to find client_id in the scripts
|
|
for script_url in scripts:
|
|
if not script_url.startswith('http'):
|
|
script_url = 'https://soundcloud.com' + script_url
|
|
|
|
script_content = requests.get(script_url).text
|
|
client_id_match = re.search(r'"client_id":"([a-zA-Z0-9]+)"', script_content)
|
|
if client_id_match:
|
|
return client_id_match.group(1)
|
|
except Exception as e:
|
|
print(f"{Colors.RED}[!] Error getting client ID: {e}{Colors.ENDC}")
|
|
|
|
return None
|
|
|
|
|
|
def download_tracks(artist_url, output_dir, client_id=None, likes=False):
|
|
"""Download all tracks from the given artist URL or likes page"""
|
|
if not client_id:
|
|
client_id = get_client_id()
|
|
|
|
if not client_id:
|
|
print(f"{Colors.RED}[!] Failed to get client ID. Please provide it manually with --client-id{Colors.ENDC}")
|
|
sys.exit(1)
|
|
|
|
# Create output directory if it doesn't exist
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Extract artist name from URL
|
|
url_parts = artist_url.strip('/').split('/')
|
|
artist_name = url_parts[-2] if likes or '/likes' in artist_url else url_parts[-1]
|
|
|
|
print(
|
|
f"{Colors.GREEN}[+] {'Downloading liked tracks' if likes else 'Downloading tracks'} for {artist_name} to {output_dir}{Colors.ENDC}")
|
|
|
|
# Use scdl to download tracks
|
|
cmd = [
|
|
'scdl',
|
|
'-l', artist_url,
|
|
'--path', output_dir,
|
|
'--client-id', client_id,
|
|
'--flac', # Try to get best quality where available
|
|
'-c' # Continue if download already exists
|
|
]
|
|
|
|
# Add appropriate flag based on download type
|
|
if likes or '/likes' in artist_url:
|
|
cmd.append('-f') # Download favorites/likes
|
|
elif '/sets/' in artist_url:
|
|
cmd.append('-p') # Download playlist
|
|
else:
|
|
cmd.append('-a') # Download all tracks from user
|
|
|
|
try:
|
|
subprocess.run(cmd, check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"{Colors.RED}[!] Error running scdl: {e}{Colors.ENDC}")
|
|
sys.exit(1)
|
|
|
|
return output_dir, artist_name
|
|
|
|
|
|
def get_artist_info(artist_url, client_id):
|
|
"""Get artist information from SoundCloud API"""
|
|
resolve_url = f"https://api-v2.soundcloud.com/resolve?url={artist_url}&client_id={client_id}"
|
|
|
|
try:
|
|
response = requests.get(resolve_url)
|
|
data = response.json()
|
|
return data
|
|
except Exception as e:
|
|
print(f"{Colors.RED}[!] Error getting artist info: {e}{Colors.ENDC}")
|
|
return None
|
|
|
|
|
|
def get_tracks_info(download_dir, client_id):
|
|
"""Get information about tracks from SoundCloud API"""
|
|
print(f"{Colors.BLUE}[*] Gathering track information from SoundCloud...{Colors.ENDC}")
|
|
|
|
# Find all MP3 files
|
|
mp3_files = list(Path(download_dir).glob('*.mp3'))
|
|
track_info_map = {}
|
|
|
|
for mp3_file in mp3_files:
|
|
# Try to extract track ID or permalink from filename
|
|
# Many SoundCloud downloaders append the track ID to the filename
|
|
track_id_match = re.search(r'[-_](\d{6,})(\.mp3)?$', mp3_file.stem)
|
|
|
|
if track_id_match:
|
|
# If we have a track ID, use it to get info from the API
|
|
track_id = track_id_match.group(1)
|
|
try:
|
|
track_url = f"https://api-v2.soundcloud.com/tracks/{track_id}?client_id={client_id}"
|
|
response = requests.get(track_url)
|
|
if response.status_code == 200:
|
|
track_data = response.json()
|
|
track_info_map[mp3_file.name] = track_data
|
|
except Exception as e:
|
|
print(f"{Colors.YELLOW}[!] Warning: Could not get info for track ID {track_id}: {e}{Colors.ENDC}")
|
|
|
|
return track_info_map
|
|
|
|
|
|
def extract_set_info(filename):
|
|
"""Extract information from set/playlist filenames"""
|
|
# For files from sets: "Set Name_Artist - Track Title.mp3"
|
|
set_match = re.search(r'^(.+?)_(.+?)\.mp3$', filename)
|
|
if set_match:
|
|
set_name = set_match.group(1).strip()
|
|
title_part = set_match.group(2).strip()
|
|
|
|
# Try to extract artist from title if it's in the "Artist - Title" format
|
|
artist_title_match = re.search(r'^(.+?) - (.+)$', title_part)
|
|
if artist_title_match:
|
|
artist = artist_title_match.group(1).strip()
|
|
title = artist_title_match.group(2).strip()
|
|
else:
|
|
# If no artist separator found, the whole part is the title
|
|
artist = None
|
|
title = title_part
|
|
|
|
return {
|
|
'set_name': set_name,
|
|
'artist': artist,
|
|
'title': title
|
|
}
|
|
|
|
# Another pattern: Some playlist files don't have the separator
|
|
# Example: "Playlist Name - Track Title.mp3" without artist info
|
|
alt_match = re.search(r'^(.+?) - (.+?)\.mp3$', filename)
|
|
if alt_match:
|
|
set_name = alt_match.group(1).strip()
|
|
title = alt_match.group(2).strip()
|
|
|
|
return {
|
|
'set_name': set_name,
|
|
'artist': None, # No artist info in this format
|
|
'title': title
|
|
}
|
|
|
|
return None
|
|
|
|
|
|
def extract_album_from_comments(tags):
|
|
"""Try to extract album information from ID3 comment tags"""
|
|
if "COMM" in tags:
|
|
comment = str(tags["COMM"])
|
|
# Look for potential album indicators in comments
|
|
album_match = re.search(r'CTCD-\d+\s+["\'](.+?)["\']', comment)
|
|
if album_match:
|
|
return album_match.group(1)
|
|
|
|
# Another pattern: Album name followed by E.P. or EP
|
|
ep_match = re.search(r'([^"\']+?)\s+E\.?P\.?', comment)
|
|
if ep_match:
|
|
return f"{ep_match.group(1)} E.P."
|
|
|
|
return None
|
|
|
|
|
|
def fix_id3_tags(download_dir, artist_name, client_id, forced_author=None, forced_album=None, force_tags=False):
|
|
"""Fix ID3 tags and add album artwork to downloaded files"""
|
|
print(f"{Colors.BLUE}[*] Adding ID3 tags and artwork...{Colors.ENDC}")
|
|
|
|
# Get artist info
|
|
artist_info = get_artist_info(f"https://soundcloud.com/{artist_name}", client_id)
|
|
artist_avatar_url = artist_info.get('avatar_url') if artist_info else None
|
|
|
|
# Try to get additional track info from SoundCloud API
|
|
track_info_map = get_tracks_info(download_dir, client_id)
|
|
|
|
# Download artist avatar for use as album art if needed
|
|
avatar_data = None
|
|
if artist_avatar_url:
|
|
try:
|
|
# Get highest resolution image by replacing size in URL
|
|
hi_res_avatar_url = artist_avatar_url.replace('-large', '-t500x500')
|
|
avatar_response = requests.get(hi_res_avatar_url)
|
|
avatar_data = avatar_response.content
|
|
except Exception as e:
|
|
print(f"{Colors.YELLOW}[!] Warning: Could not download artist avatar: {e}{Colors.ENDC}")
|
|
|
|
# Process all MP3 files
|
|
downloaded_files = list(Path(download_dir).glob('*.mp3'))
|
|
processed_count = 0
|
|
skipped_count = 0
|
|
|
|
for mp3_file in tqdm(downloaded_files, desc="Processing files"):
|
|
try:
|
|
# Read or create ID3 tags
|
|
try:
|
|
tags = ID3(mp3_file)
|
|
# Skip if tags exist and force_tags is not set
|
|
if not force_tags and "TIT2" in tags and "TPE1" in tags and "TALB" in tags:
|
|
skipped_count += 1
|
|
continue
|
|
except:
|
|
# Create new ID3 tag if not present
|
|
tags = ID3()
|
|
|
|
# Extract information from filename
|
|
set_info = extract_set_info(mp3_file.name)
|
|
|
|
# Initialize variables
|
|
title = None
|
|
artist = forced_author
|
|
album = forced_album
|
|
|
|
# Get title from set_info or filename
|
|
if set_info:
|
|
title = set_info['title']
|
|
# Only use artist from set_info if forced_author not provided
|
|
if not artist and set_info['artist']:
|
|
artist = set_info['artist']
|
|
# Only use set_name as album if forced_album not provided
|
|
if not album:
|
|
album = set_info['set_name']
|
|
else:
|
|
# Try to extract from regular filename
|
|
filename_match = re.search(r'(.+?) - (.+?)\.mp3$', mp3_file.name)
|
|
if filename_match:
|
|
if not artist:
|
|
artist = filename_match.group(1).strip()
|
|
title = filename_match.group(2).strip()
|
|
else:
|
|
# Just use the filename as title
|
|
title = mp3_file.stem
|
|
|
|
# Try to extract album info from existing tags if available
|
|
if not album and "COMM" in tags:
|
|
album_from_comment = extract_album_from_comments(tags)
|
|
if album_from_comment:
|
|
album = album_from_comment
|
|
|
|
# If no album was determined, use a default
|
|
if not album:
|
|
album = "Unknown Album"
|
|
|
|
# If no artist was determined, use the forced_author or a default
|
|
if not artist:
|
|
artist = forced_author or "Unknown Artist"
|
|
|
|
# Set ID3 tags
|
|
tags["TIT2"] = TIT2(encoding=3, text=title)
|
|
tags["TPE1"] = TPE1(encoding=3, text=artist)
|
|
tags["TALB"] = TALB(encoding=3, text=album)
|
|
|
|
# Add artwork if we have it and it's missing or we're forcing updates
|
|
if avatar_data and (force_tags or not any(tag.startswith('APIC') for tag in tags.keys())):
|
|
tags["APIC"] = APIC(
|
|
encoding=3,
|
|
mime="image/jpeg",
|
|
type=3, # Cover (front)
|
|
desc="Cover",
|
|
data=avatar_data
|
|
)
|
|
|
|
# Save tags to file
|
|
tags.save(mp3_file)
|
|
processed_count += 1
|
|
|
|
except Exception as e:
|
|
print(f"{Colors.YELLOW}[!] Warning: Could not process file {mp3_file}: {e}{Colors.ENDC}")
|
|
|
|
print(
|
|
f"{Colors.GREEN}[+] Successfully processed {processed_count} files, skipped {skipped_count} files with existing tags{Colors.ENDC}")
|
|
|
|
|
|
def main():
|
|
args = setup_argparser()
|
|
|
|
print(f"{Colors.GREEN}[+] SoundCloud Downloader with ID3 Tags{Colors.ENDC}")
|
|
|
|
# Auto-detect likes URL if not explicitly set
|
|
likes = args.likes or '/likes' in args.url
|
|
|
|
download_dir, artist_name = download_tracks(args.url, args.output, args.client_id, likes)
|
|
client_id = args.client_id or get_client_id()
|
|
fix_id3_tags(download_dir, artist_name, client_id, args.author, args.album, args.force_tags)
|
|
|
|
print(f"{Colors.GREEN}[+] All done! Downloaded tracks are in: {download_dir}{Colors.ENDC}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|