updated elastic build for ru ip

fixed youtube track download, improved search
2025-09-16 20:42:27 +03:00 · 2024-03-02 02:34:44 +03:00 · 2024-03-02 02:29:49 +03:00
9 changed files with 1390 additions and 1286 deletions
--- a/akarpov/music/documents.py
+++ b/akarpov/music/documents.py
@ -39,24 +39,70 @@ class SongDocument(Document):
        },
    )

-    meta = fields.ObjectField(dynamic=True)  # Added meta field here as dynamic object
+    meta = fields.ObjectField(dynamic=True)

    class Index:
        name = "songs"
-        settings = {"number_of_shards": 1, "number_of_replicas": 0}
-        # settings = {
-        #     "number_of_shards": 1,
-        #     "number_of_replicas": 0,
-        #     "analysis": {
-        #         "analyzer": {
-        #             "russian_icu": {
-        #                 "type": "custom",
-        #                 "tokenizer": "icu_tokenizer",
-        #                 "filter": ["icu_folding","icu_normalizer"]
-        #             }
-        #         }
-        #     }
-        # } TODO
+        settings = {
+            "number_of_shards": 1,
+            "number_of_replicas": 0,
+            "analysis": {
+                "filter": {
+                    "russian_stop": {
+                        "type": "stop",
+                        "stopwords": "_russian_",
+                    },
+                    "russian_keywords": {
+                        "type": "keyword_marker",
+                        "keywords": ["пример"],
+                    },
+                    "russian_stemmer": {
+                        "type": "stemmer",
+                        "language": "russian",
+                    },
+                    "autocomplete_filter": {
+                        "type": "edge_ngram",
+                        "min_gram": 1,
+                        "max_gram": 20,
+                    },
+                    "synonym_filter": {
+                        "type": "synonym",
+                        "synonyms": [
+                            "бит,трек,песня,музыка,песня,мелодия,композиция",
+                            "певец,исполнитель,артист,музыкант",
+                            "альбом,диск,пластинка,сборник,коллекция",
+                        ],
+                    },
+                },
+                "analyzer": {
+                    "russian": {
+                        "tokenizer": "standard",
+                        "filter": [
+                            "russian_stop",
+                            "russian_keywords",
+                            "russian_stemmer",
+                        ],
+                    },
+                    "russian_icu": {
+                        "tokenizer": "icu_tokenizer",
+                        "filter": [
+                            "russian_stop",
+                            "russian_keywords",
+                            "russian_stemmer",
+                        ],
+                    },
+                    "autocomplete": {
+                        "type": "custom",
+                        "tokenizer": "standard",
+                        "filter": [
+                            "lowercase",
+                            "autocomplete_filter",
+                            "synonym_filter",
+                        ],
+                    },
+                },
+            },
+        }

    class Django:
        model = Song
--- a/akarpov/music/services/db.py
+++ b/akarpov/music/services/db.py
@ -72,6 +72,16 @@ def load_track(
            name = search_info["title"]
        elif not name:
            name = process_track_name(" ".join(p_name.strip().split("-")))
+            clear_name = [
+                "(Official HD Video)",
+                "(Official Music Video)",
+                "(Official Video)",
+                "Official Video",
+                "Official Music Video",
+                "Official HD Video",
+            ]
+            for c in clear_name:
+                name = name.replace(c, "")

    if not name:
        name = orig_name
--- a/akarpov/music/services/info.py
+++ b/akarpov/music/services/info.py
@ -360,6 +360,7 @@ def update_author_info(author: Author) -> None:


 def search_all_platforms(track_name: str) -> dict:
+    print(track_name)
    session = spotipy.Spotify(
        auth_manager=spotipy.SpotifyClientCredentials(
            client_id=settings.MUSIC_SPOTIFY_ID,
--- a/akarpov/music/services/search.py
+++ b/akarpov/music/services/search.py
@ -13,9 +13,9 @@ def search_song(query):
            ES_Q(
                "multi_match",
                query=query,
-                fields=["name^3", "authors.name^2", "album.name"],
+                fields=["name^5", "authors.name^3", "album.name^3"],
                fuzziness="AUTO",
-            ),  # Change here
+            ),
            ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
            ES_Q(
                "nested",
@ -27,6 +27,7 @@ def search_song(query):
                path="album",
                query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
            ),
+            ES_Q("wildcard", meta__raw=f"*{query.lower()}*"),
        ],
        minimum_should_match=1,
    )
--- a/akarpov/music/services/youtube.py
+++ b/akarpov/music/services/youtube.py
@ -157,22 +157,25 @@ def download_from_youtube_link(link: str, user_id: int) -> Song:
        print(f"[processing] loading {title}")

        info = search_all_platforms(title)
-        if not info["album_image"].startswith("/"):
-            r = requests.get(info["album_image"])
-            img_pth = str(
-                settings.MEDIA_ROOT
-                + f"/{info['album_image'].split('/')[-1]}_{str(randint(100, 999))}"
-            )
-            with open(img_pth, "wb") as f:
-                f.write(r.content)
+        if "album_image" in info and info["album_image"]:
+            if not info["album_image"].startswith("/"):
+                r = requests.get(info["album_image"])
+                img_pth = str(
+                    settings.MEDIA_ROOT
+                    + f"/{info['album_image'].split('/')[-1]}_{str(randint(100, 999))}"
+                )
+                with open(img_pth, "wb") as f:
+                    f.write(r.content)

-            im = Image.open(img_pth)
-            im.save(str(f"{img_pth}.png"))
+                im = Image.open(img_pth)
+                im.save(str(f"{img_pth}.png"))

-            os.remove(img_pth)
-            img_pth = f"{img_pth}.png"
+                os.remove(img_pth)
+                img_pth = f"{img_pth}.png"
+            else:
+                img_pth = info["album_image"]
        else:
-            img_pth = info["album_image"]
+            img_pth = None
        if "genre" in info:
            song = load_track(
                path,
--- a/akarpov/music/tasks.py
+++ b/akarpov/music/tasks.py
@ -1,4 +1,5 @@
 from datetime import timedelta
+from urllib.parse import parse_qs, urlparse

 import pylast
 import spotipy
@ -54,9 +55,19 @@ def list_tracks(url, user_id):

        elif "playlist" in url or "&list=" in url:
            ytmusic = ytmusicapi.YTMusic()
-            playlist_id = url.split("=")[-1]
-            playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]["results"]

+            # Parse the URL and the query string
+            parsed_url = urlparse(url)
+            parsed_qs = parse_qs(parsed_url.query)
+
+            # Get the playlist ID from the parsed query string
+            playlist_id = parsed_qs.get("list", [None])[0]
+
+            if playlist_id:
+                playlist_songs = ytmusic.get_playlist(playlist_id)["tracks"]
+
+            else:
+                raise ValueError("No playlist ID found in the URL.")
            for song in playlist_songs:
                process_yb.apply_async(
                    kwargs={
--- a/compose/production/elasticsearch/Dockerfile
+++ b/compose/production/elasticsearch/Dockerfile
@ -0,0 +1,4 @@
+FROM elasticsearch:8.11.1
+
+# Install the ICU plugin
+RUN bin/elasticsearch-plugin install https://akarpov.ru/media/analysis-icu-8.11.1.zip
--- a/local.yml
+++ b/local.yml
@ -101,7 +101,9 @@ services:
    command: /start-flower

  elasticsearch:
-    image: elasticsearch:8.11.1
+    build:
+      context: .
+      dockerfile: ./compose/production/elasticsearch/Dockerfile
    ports:
      - "9200:9200"
      - "9300:9300"
--- a/poetry.lock
+++ b/poetry.lock
Author	SHA1	Message	Date
Alexander-D-Karpov	a2da7e724f	updated elastic build for ru ip	2024-03-02 02:34:44 +03:00
Alexander-D-Karpov	7c9890975b	fixed youtube track download, improved search	2024-03-02 02:29:49 +03:00