fixed youtube track download, improved search

This commit is contained in:
Alexander Karpov 2024-03-02 02:29:49 +03:00
parent 3acd858598
commit 7c9890975b
9 changed files with 1390 additions and 1286 deletions

View File

@ -39,24 +39,70 @@ class SongDocument(Document):
}, },
) )
meta = fields.ObjectField(dynamic=True) # Added meta field here as dynamic object meta = fields.ObjectField(dynamic=True)
class Index: class Index:
name = "songs" name = "songs"
settings = {"number_of_shards": 1, "number_of_replicas": 0} settings = {
# settings = { "number_of_shards": 1,
# "number_of_shards": 1, "number_of_replicas": 0,
# "number_of_replicas": 0, "analysis": {
# "analysis": { "filter": {
# "analyzer": { "russian_stop": {
# "russian_icu": { "type": "stop",
# "type": "custom", "stopwords": "_russian_",
# "tokenizer": "icu_tokenizer", },
# "filter": ["icu_folding","icu_normalizer"] "russian_keywords": {
# } "type": "keyword_marker",
# } "keywords": ["пример"],
# } },
# } TODO "russian_stemmer": {
"type": "stemmer",
"language": "russian",
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20,
},
"synonym_filter": {
"type": "synonym",
"synonyms": [
"бит,трек,песня,музыка,песня,мелодия,композиция",
"певец,исполнитель,артист,музыкант",
"альбом,диск,пластинка,сборник,коллекция",
],
},
},
"analyzer": {
"russian": {
"tokenizer": "standard",
"filter": [
"russian_stop",
"russian_keywords",
"russian_stemmer",
],
},
"russian_icu": {
"tokenizer": "icu_tokenizer",
"filter": [
"russian_stop",
"russian_keywords",
"russian_stemmer",
],
},
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter",
"synonym_filter",
],
},
},
},
}
class Django: class Django:
model = Song model = Song

View File

@ -72,6 +72,16 @@ def load_track(
name = search_info["title"] name = search_info["title"]
elif not name: elif not name:
name = process_track_name(" ".join(p_name.strip().split("-"))) name = process_track_name(" ".join(p_name.strip().split("-")))
clear_name = [
"(Official HD Video)",
"(Official Music Video)",
"(Official Video)",
"Official Video",
"Official Music Video",
"Official HD Video",
]
for c in clear_name:
name = name.replace(c, "")
if not name: if not name:
name = orig_name name = orig_name

View File

@ -360,6 +360,7 @@ def update_author_info(author: Author) -> None:
def search_all_platforms(track_name: str) -> dict: def search_all_platforms(track_name: str) -> dict:
print(track_name)
session = spotipy.Spotify( session = spotipy.Spotify(
auth_manager=spotipy.SpotifyClientCredentials( auth_manager=spotipy.SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID, client_id=settings.MUSIC_SPOTIFY_ID,

View File

@ -13,9 +13,9 @@ def search_song(query):
ES_Q( ES_Q(
"multi_match", "multi_match",
query=query, query=query,
fields=["name^3", "authors.name^2", "album.name"], fields=["name^5", "authors.name^3", "album.name^3"],
fuzziness="AUTO", fuzziness="AUTO",
), # Change here ),
ES_Q("wildcard", name__raw=f"*{query.lower()}*"), ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q( ES_Q(
"nested", "nested",
@ -27,6 +27,7 @@ def search_song(query):
path="album", path="album",
query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"), query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
), ),
ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
], ],
minimum_should_match=1, minimum_should_match=1,
) )

View File

@ -157,6 +157,7 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
print(f"[processing] loading {title}") print(f"[processing] loading {title}")
info = search_all_platforms(title) info = search_all_platforms(title)
if "album_image" in info and info["album_image"]:
if not info["album_image"].startswith("/"): if not info["album_image"].startswith("/"):
r = requests.get(info["album_image"]) r = requests.get(info["album_image"])
img_pth = str( img_pth = str(
@ -173,6 +174,8 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
img_pth = f"{img_pth}.png" img_pth = f"{img_pth}.png"
else: else:
img_pth = info["album_image"] img_pth = info["album_image"]
else:
img_pth = None
if "genre" in info: if "genre" in info:
song = load_track( song = load_track(
path, path,

View File

@ -1,4 +1,5 @@
from datetime import timedelta from datetime import timedelta
from urllib.parse import parse_qs, urlparse
import pylast import pylast
import spotipy import spotipy
@ -54,9 +55,19 @@ def list_tracks(url, user_id):
elif "playlist" in url or "&list=" in url: elif "playlist" in url or "&list=" in url:
ytmusic = ytmusicapi.YTMusic() ytmusic = ytmusicapi.YTMusic()
playlist_id = url.split("=")[-1]
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]["results"]
# Parse the URL and the query string
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
# Get the playlist ID from the parsed query string
playlist_id = parsed_qs.get("list", [None])[0]
if playlist_id:
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
else:
raise ValueError("No playlist ID found in the URL.")
for song in playlist_songs: for song in playlist_songs:
process_yb.apply_async( process_yb.apply_async(
kwargs={ kwargs={

View File

@ -0,0 +1,4 @@
FROM elasticsearch:8.11.1
# Install the ICU plugin
RUN bin/elasticsearch-plugin install https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-8.11.1.zip

View File

@ -101,7 +101,9 @@ services:
command: /start-flower command: /start-flower
elasticsearch: elasticsearch:
image: elasticsearch:8.11.1 build:
context: .
dockerfile: ./compose/production/elasticsearch/Dockerfile
ports: ports:
- "9200:9200" - "9200:9200"
- "9300:9300" - "9300:9300"

2532
poetry.lock generated

File diff suppressed because it is too large Load Diff