Refine YouTube and Spotify music download process

This commit is contained in:
Alexander Karpov 2024-02-01 16:46:45 +03:00
parent a87385db78
commit ffa1e9c69f
3 changed files with 55 additions and 31 deletions

View File

@ -33,7 +33,11 @@ def get_spotdl_client():
spot_settings = {
"simple_tui": True,
"log_level": "ERROR",
"lyrics_providers": ["genius", "musixmatch"],
"lyrics_providers": ["genius", "azlyrics", "musixmatch"],
"threads": 6,
"format": "mp3",
"ffmpeg": "ffmpeg",
"sponsor_block": True,
}
thread_local.spotdl_client = Spotdl(
client_id=settings.MUSIC_SPOTIFY_ID,

View File

@ -10,6 +10,7 @@
from PIL import Image
from pydub import AudioSegment
from pytube import Search, YouTube
from spotdl.providers.audio import YouTubeMusic
from akarpov.music.models import Song
from akarpov.music.services.db import load_track
@ -18,22 +19,28 @@
final_filename = None
ydl_opts = {
"format": "m4a/bestaudio/best",
"postprocessors": [
{ # Extract audio using ffmpeg
"key": "FFmpegExtractAudio",
"preferredcodec": "m4a",
}
],
"outtmpl": f"{settings.MEDIA_ROOT}/%(uploader)s_%(title)s.%(ext)s",
}
ytmusic = YouTubeMusic()
def download_file(url):
ydl_opts = {
"format": "bestaudio/best",
"outtmpl": f"{settings.MEDIA_ROOT}/%(uploader)s_%(title)s.%(ext)s",
"postprocessors": [
{"key": "SponsorBlock"}, # Skip sponsor segments
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}, # Extract audio
{"key": "EmbedThumbnail"}, # Embed Thumbnail
{"key": "FFmpegMetadata"}, # Apply correct metadata
],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url)
return info["requested_downloads"][0]["_filename"]
info = ydl.extract_info(url, download=True)
filename = ydl.prepare_filename(info)
return os.path.splitext(filename)[0] + ".mp3"
def parse_description(description: str) -> list:
@ -67,7 +74,7 @@ def parse_description(description: str) -> list:
def download_from_youtube_link(link: str, user_id: int) -> Song:
song = None
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
with yt_dlp.YoutubeDL({"ignoreerrors": True, "extract_flat": True}) as ydl:
info_dict = ydl.extract_info(link, download=False)
title = info_dict.get("title", None)
description = info_dict.get("description", None)
@ -82,6 +89,9 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
+ slugify(orig_path.split("/")[-1].split(".")[0])
+ ".mp3"
)
if orig_path.endswith(".mp3"):
os.rename(orig_path, path)
else:
AudioSegment.from_file(orig_path).export(path)
if orig_path != path:
os.remove(orig_path)

View File

@ -3,14 +3,13 @@
import pylast
import spotipy
import structlog
import yt_dlp
import ytmusicapi
from asgiref.sync import async_to_sync
from celery import shared_task
from channels.layers import get_channel_layer
from django.conf import settings
from django.utils import timezone
from django.utils.timezone import now
from pytube import Channel, Playlist
from spotipy import SpotifyClientCredentials
from akarpov.music.api.serializers import SongSerializer
@ -40,19 +39,30 @@ def list_tracks(url, user_id):
yandex.load_playlist(url, user_id)
if "youtube.com" in url:
if "channel" in url or "/c/" in url:
channel = Channel(url)
for video in channel.videos:
with yt_dlp.YoutubeDL({}) as ydl:
info = ydl.extract_info(video, download=False)
if info.get("category") == "Music":
ytmusic = ytmusicapi.YTMusic()
channel_id = url.split("/")[-1]
channel_songs = ytmusic.get_artist(channel_id)["songs"]["results"]
print(channel_songs)
for song in channel_songs:
process_yb.apply_async(
kwargs={"url": video.watch_url, "user_id": user_id}
kwargs={
"url": f"https://youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
elif "playlist" in url or "&list=" in url:
playlist = Playlist(url)
for video in playlist.videos:
ytmusic = ytmusicapi.YTMusic()
playlist_id = url.split("=")[-1]
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]["results"]
for song in playlist_songs:
process_yb.apply_async(
kwargs={"url": video.watch_url, "user_id": user_id}
kwargs={
"url": f"https://music.youtube.com/watch?v={song['videoId']}",
"user_id": user_id,
}
)
else:
process_yb.apply_async(kwargs={"url": url, "user_id": user_id})