fixed youtube track download, improved search

This commit is contained in:
Alexander Karpov 2024-03-02 02:29:49 +03:00
parent 3acd858598
commit 7c9890975b
9 changed files with 1390 additions and 1286 deletions

View File

@ -39,24 +39,70 @@ class SongDocument(Document):
},
)
meta = fields.ObjectField(dynamic=True) # Added meta field here as dynamic object
meta = fields.ObjectField(dynamic=True)
class Index:
name = "songs"
settings = {"number_of_shards": 1, "number_of_replicas": 0}
# settings = {
# "number_of_shards": 1,
# "number_of_replicas": 0,
# "analysis": {
# "analyzer": {
# "russian_icu": {
# "type": "custom",
# "tokenizer": "icu_tokenizer",
# "filter": ["icu_folding","icu_normalizer"]
# }
# }
# }
# } TODO
settings = {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"russian_stop": {
"type": "stop",
"stopwords": "_russian_",
},
"russian_keywords": {
"type": "keyword_marker",
"keywords": ["пример"],
},
"russian_stemmer": {
"type": "stemmer",
"language": "russian",
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20,
},
"synonym_filter": {
"type": "synonym",
"synonyms": [
"бит,трек,песня,музыка,песня,мелодия,композиция",
"певец,исполнитель,артист,музыкант",
"альбом,диск,пластинка,сборник,коллекция",
],
},
},
"analyzer": {
"russian": {
"tokenizer": "standard",
"filter": [
"russian_stop",
"russian_keywords",
"russian_stemmer",
],
},
"russian_icu": {
"tokenizer": "icu_tokenizer",
"filter": [
"russian_stop",
"russian_keywords",
"russian_stemmer",
],
},
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter",
"synonym_filter",
],
},
},
},
}
class Django:
model = Song

View File

@ -72,6 +72,16 @@ def load_track(
name = search_info["title"]
elif not name:
name = process_track_name(" ".join(p_name.strip().split("-")))
clear_name = [
"(Official HD Video)",
"(Official Music Video)",
"(Official Video)",
"Official Video",
"Official Music Video",
"Official HD Video",
]
for c in clear_name:
name = name.replace(c, "")
if not name:
name = orig_name

View File

@ -360,6 +360,7 @@ def update_author_info(author: Author) -> None:
def search_all_platforms(track_name: str) -> dict:
print(track_name)
session = spotipy.Spotify(
auth_manager=spotipy.SpotifyClientCredentials(
client_id=settings.MUSIC_SPOTIFY_ID,

View File

@ -13,9 +13,9 @@ def search_song(query):
ES_Q(
"multi_match",
query=query,
fields=["name^3", "authors.name^2", "album.name"],
fields=["name^5", "authors.name^3", "album.name^3"],
fuzziness="AUTO",
), # Change here
),
ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q(
"nested",
@ -27,6 +27,7 @@ def search_song(query):
path="album",
query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
),
ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
],
minimum_should_match=1,
)

View File

@ -157,22 +157,25 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
print(f"[processing] loading {title}")
info = search_all_platforms(title)
if not info["album_image"].startswith("/"):
r = requests.get(info["album_image"])
img_pth = str(
settings.MEDIA_ROOT
+ f"/{info['album_image'].split('/')[-1]}_{str(randint(100, 999))}"
)
with open(img_pth, "wb") as f:
f.write(r.content)
if "album_image" in info and info["album_image"]:
if not info["album_image"].startswith("/"):
r = requests.get(info["album_image"])
img_pth = str(
settings.MEDIA_ROOT
+ f"/{info['album_image'].split('/')[-1]}_{str(randint(100, 999))}"
)
with open(img_pth, "wb") as f:
f.write(r.content)
im = Image.open(img_pth)
im.save(str(f"{img_pth}.png"))
im = Image.open(img_pth)
im.save(str(f"{img_pth}.png"))
os.remove(img_pth)
img_pth = f"{img_pth}.png"
os.remove(img_pth)
img_pth = f"{img_pth}.png"
else:
img_pth = info["album_image"]
else:
img_pth = info["album_image"]
img_pth = None
if "genre" in info:
song = load_track(
path,

View File

@ -1,4 +1,5 @@
from datetime import timedelta
from urllib.parse import parse_qs, urlparse
import pylast
import spotipy
@ -54,9 +55,19 @@ def list_tracks(url, user_id):
elif "playlist" in url or "&list=" in url:
ytmusic = ytmusicapi.YTMusic()
playlist_id = url.split("=")[-1]
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]["results"]
# Parse the URL and the query string
parsed_url = urlparse(url)
parsed_qs = parse_qs(parsed_url.query)
# Get the playlist ID from the parsed query string
playlist_id = parsed_qs.get("list", [None])[0]
if playlist_id:
playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
else:
raise ValueError("No playlist ID found in the URL.")
for song in playlist_songs:
process_yb.apply_async(
kwargs={

View File

@ -0,0 +1,4 @@
FROM elasticsearch:8.11.1
# Install the ICU plugin
RUN bin/elasticsearch-plugin install https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-8.11.1.zip

View File

@ -101,7 +101,9 @@ services:
command: /start-flower
elasticsearch:
image: elasticsearch:8.11.1
build:
context: .
dockerfile: ./compose/production/elasticsearch/Dockerfile
ports:
- "9200:9200"
- "9300:9300"

2532
poetry.lock generated

File diff suppressed because it is too large Load Diff