fixed youtube track download, improved search

2025-09-14 13:32:27 +03:00 · 2024-03-02 02:29:49 +03:00 · 2024-03-02 02:29:49 +03:00 · 7c9890975b
commit 7c9890975b
parent 3acd858598
9 changed files with 1390 additions and 1286 deletions
--- a/akarpov/music/documents.py
+++ b/akarpov/music/documents.py
@ -39,24 +39,70 @@ class SongDocument(Document):
        },
    )
-    meta = fields.ObjectField(dynamic=True)  # Added meta field here as dynamic object
+    meta = fields.ObjectField(dynamic=True)
    class Index:
        name = "songs"
-        settings = {"number_of_shards": 1, "number_of_replicas": 0}
+        settings = {
-        # settings = {
+            "number_of_shards": 1,
-        #     "number_of_shards": 1,
+            "number_of_replicas": 0,
-        #     "number_of_replicas": 0,
+            "analysis": {
-        #     "analysis": {
+                "filter": {
-        #         "analyzer": {
+                    "russian_stop": {
-        #             "russian_icu": {
+                        "type": "stop",
-        #                 "type": "custom",
+                        "stopwords": "_russian_",
-        #                 "tokenizer": "icu_tokenizer",
+                    },
-        #                 "filter": ["icu_folding","icu_normalizer"]
+                    "russian_keywords": {
-        #             }
+                        "type": "keyword_marker",
-        #         }
+                        "keywords": ["пример"],
-        #     }
+                    },
-        # } TODO
+                    "russian_stemmer": {
                        "type": "stemmer",
                        "language": "russian",
                    },
                    "autocomplete_filter": {
                        "type": "edge_ngram",
                        "min_gram": 1,
                        "max_gram": 20,
                    },
                    "synonym_filter": {
                        "type": "synonym",
                        "synonyms": [
                            "бит,трек,песня,музыка,песня,мелодия,композиция",
                            "певец,исполнитель,артист,музыкант",
                            "альбом,диск,пластинка,сборник,коллекция",
                        ],
                    },
                },
                "analyzer": {
                    "russian": {
                        "tokenizer": "standard",
                        "filter": [
                            "russian_stop",
                            "russian_keywords",
                            "russian_stemmer",
                        ],
                    },
                    "russian_icu": {
                        "tokenizer": "icu_tokenizer",
                        "filter": [
                            "russian_stop",
                            "russian_keywords",
                            "russian_stemmer",
                        ],
                    },
                    "autocomplete": {
                        "type": "custom",
                        "tokenizer": "standard",
                        "filter": [
                            "lowercase",
                            "autocomplete_filter",
                            "synonym_filter",
                        ],
                    },
                },
            },
        }
    class Django:
        model = Song
--- a/akarpov/music/services/db.py
+++ b/akarpov/music/services/db.py
@ -72,6 +72,16 @@ def load_track(
            name = search_info["title"]
        elif not name:
            name = process_track_name(" ".join(p_name.strip().split("-")))
            clear_name = [
                "(Official HD Video)",
                "(Official Music Video)",
                "(Official Video)",
                "Official Video",
                "Official Music Video",
                "Official HD Video",
            ]
            for c in clear_name:
                name = name.replace(c, "")
    if not name:
        name = orig_name
--- a/akarpov/music/services/info.py
+++ b/akarpov/music/services/info.py
@ -360,6 +360,7 @@ def update_author_info(author: Author) -> None:
 def search_all_platforms(track_name: str) -> dict:
    print(track_name)
    session = spotipy.Spotify(
        auth_manager=spotipy.SpotifyClientCredentials(
            client_id=settings.MUSIC_SPOTIFY_ID,
--- a/akarpov/music/services/search.py
+++ b/akarpov/music/services/search.py
@ -13,9 +13,9 @@ def search_song(query):
            ES_Q(
                "multi_match",
                query=query,
-                fields=["name^3", "authors.name^2", "album.name"],
+                fields=["name^5", "authors.name^3", "album.name^3"],
                fuzziness="AUTO",
-            ),  # Change here
+            ),
            ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
            ES_Q(
                "nested",
@ -27,6 +27,7 @@ def search_song(query):
                path="album",
                query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
            ),
            ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
        ],
        minimum_should_match=1,
    )
--- a/akarpov/music/services/youtube.py
+++ b/akarpov/music/services/youtube.py
@ -157,6 +157,7 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
        print(f"[processing] loading {title}")
        info = search_all_platforms(title)
        if "album_image" in info and info["album_image"]:
            if not info["album_image"].startswith("/"):
                r = requests.get(info["album_image"])
                img_pth = str(
@ -173,6 +174,8 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
                img_pth = f"{img_pth}.png"
            else:
                img_pth = info["album_image"]
        else:
            img_pth = None
        if "genre" in info:
            song = load_track(
                path,
--- a/akarpov/music/tasks.py
+++ b/akarpov/music/tasks.py
@ -1,4 +1,5 @@
 from datetime import timedelta
 from urllib.parse import parse_qs, urlparse
 import pylast
 import spotipy
@ -54,9 +55,19 @@ def list_tracks(url, user_id):
        elif "playlist" in url or "&list=" in url:
            ytmusic = ytmusicapi.YTMusic()
            playlist_id = url.split("=")[-1]
            playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]["results"]
            # Parse the URL and the query string
            parsed_url = urlparse(url)
            parsed_qs = parse_qs(parsed_url.query)
            # Get the playlist ID from the parsed query string
            playlist_id = parsed_qs.get("list", [None])[0]
            if playlist_id:
                playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
            else:
                raise ValueError("No playlist ID found in the URL.")
            for song in playlist_songs:
                process_yb.apply_async(
                    kwargs={
--- a/compose/production/elasticsearch/Dockerfile
+++ b/compose/production/elasticsearch/Dockerfile
@ -0,0 +1,4 @@
 FROM elasticsearch:8.11.1
 # Install the ICU plugin
 RUN bin/elasticsearch-plugin install https://artifacts.elastic.co/downloads/elasticsearch-plugins/analysis-icu/analysis-icu-8.11.1.zip
--- a/local.yml
+++ b/local.yml
@ -101,7 +101,9 @@ services:
    command: /start-flower
  elasticsearch:
-    image: elasticsearch:8.11.1
+    build:
      context: .
      dockerfile: ./compose/production/elasticsearch/Dockerfile
    ports:
      - "9200:9200"
      - "9300:9300"
--- a/poetry.lock
+++ b/poetry.lock