From f4ca7db6963afc1be3c13b40736976bb5a9ba02c Mon Sep 17 00:00:00 2001
From: sanspie <sanspie@akarpov.ru>
Date: Thu, 5 Dec 2024 16:36:50 +0300
Subject: [PATCH] updated search

---
 akarpov/music/api/serializers.py |  29 +++++----
 akarpov/music/documents.py       |   9 ++-
 akarpov/music/services/search.py | 105 ++++++-------------------------
 3 files changed, 44 insertions(+), 99 deletions(-)

diff --git a/akarpov/music/api/serializers.py b/akarpov/music/api/serializers.py
index 5c94c92..f133ef2 100644
--- a/akarpov/music/api/serializers.py
+++ b/akarpov/music/api/serializers.py
@@ -88,8 +88,12 @@ def get_liked(self, obj):
 
     @extend_schema_field(ListAlbumSerializer)
     def get_album(self, obj):
-        if obj.album:
-            return ListAlbumSerializer(Album.objects.cache().get(id=obj.album_id)).data
+        if obj.album_id:
+            try:
+                album = Album.objects.cache().get(id=obj.album_id)
+                return ListAlbumSerializer(album).data
+            except Album.DoesNotExist:
+                return None
         return None
 
     @extend_schema_field(ListAuthorSerializer(many=True))
@@ -105,16 +109,17 @@ def get_image(self, obj):
         img = None
         if obj.image_cropped:
             img = obj.image_cropped
-        else:
-            album = Album.objects.cache().get(id=obj.album_id)
-            if album.image_cropped:
-                img = album.image_cropped
-            else:
-                authors = Author.objects.cache().filter(
-                    Q(songs__id=obj.id) & ~Q(image="")
-                )
-                if authors:
-                    img = authors.first().image_cropped
+        elif obj.album_id:
+            try:
+                album = Album.objects.cache().get(id=obj.album_id)
+                if album.image_cropped:
+                    img = album.image_cropped
+            except Album.DoesNotExist:
+                pass
+        if not img:
+            authors = Author.objects.cache().filter(Q(songs__id=obj.id) & ~Q(image=""))
+            if authors.exists():
+                img = authors.first().image_cropped
         if img:
             return self.context["request"].build_absolute_uri(img.url)
         return None
diff --git a/akarpov/music/documents.py b/akarpov/music/documents.py
index 40f6c66..a4ca406 100644
--- a/akarpov/music/documents.py
+++ b/akarpov/music/documents.py
@@ -48,7 +48,7 @@ class SongDocument(Document):
         attr="name",
         fields={
             "raw": fields.KeywordField(),
-            "exact": fields.KeywordField(normalizer="lowercase"),
+            "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
         },
     )
     name_transliterated = fields.TextField(
@@ -67,6 +67,13 @@ class Index:
         settings = {
             "number_of_shards": 1,
             "number_of_replicas": 0,
+            "normalizer": {
+                "lowercase_normalizer": {
+                    "type": "custom",
+                    "char_filter": [],
+                    "filter": ["lowercase"],
+                }
+            },
             "analysis": {
                 "filter": {
                     "my_transliterator": {
diff --git a/akarpov/music/services/search.py b/akarpov/music/services/search.py
index cb944b1..8923b47 100644
--- a/akarpov/music/services/search.py
+++ b/akarpov/music/services/search.py
@@ -8,97 +8,30 @@
 
 
 def search_song(query):
-    # Split query into potential track and artist parts
-    parts = [part.strip() for part in query.split("-")]
-    track_query = parts[0]
-    artist_query = parts[1] if len(parts) > 1 else None
-
     search = SongDocument.search()
 
-    # Base queries for track name with high boost
-    should_queries = [
-        ES_Q("match_phrase", name={"query": track_query, "boost": 10}),
-        ES_Q("match", name={"query": track_query, "fuzziness": "AUTO", "boost": 8}),
-        ES_Q("wildcard", name={"value": f"*{track_query.lower()}*", "boost": 6}),
-        ES_Q(
-            "match",
-            name_transliterated={"query": track_query, "fuzziness": "AUTO", "boost": 5},
-        ),
-    ]
-
-    # Add artist-specific queries if artist part exists
-    if artist_query:
-        should_queries.extend(
-            [
-                ES_Q(
-                    "nested",
-                    path="authors",
-                    query=ES_Q(
-                        "match_phrase", name={"query": artist_query, "boost": 4}
-                    ),
-                ),
-                ES_Q(
-                    "nested",
-                    path="authors",
-                    query=ES_Q(
-                        "match",
-                        name={"query": artist_query, "fuzziness": "AUTO", "boost": 3},
-                    ),
-                ),
-                ES_Q(
-                    "nested",
-                    path="authors",
-                    query=ES_Q(
-                        "wildcard",
-                        name={"value": f"*{artist_query.lower()}*", "boost": 2},
-                    ),
-                ),
-            ]
-        )
-    else:
-        # If no explicit artist, still search in authors but with lower boost
-        should_queries.extend(
-            [
-                ES_Q(
-                    "nested",
-                    path="authors",
-                    query=ES_Q("match_phrase", name={"query": track_query, "boost": 2}),
-                ),
-                ES_Q(
-                    "nested",
-                    path="authors",
-                    query=ES_Q(
-                        "match",
-                        name={"query": track_query, "fuzziness": "AUTO", "boost": 1},
-                    ),
-                ),
-            ]
-        )
-
-    # Add album queries with lower boost
-    should_queries.extend(
-        [
-            ES_Q(
-                "nested",
-                path="album",
-                query=ES_Q("match_phrase", name={"query": track_query, "boost": 1.5}),
-            ),
-            ES_Q(
-                "nested",
-                path="album",
-                query=ES_Q(
-                    "match",
-                    name={"query": track_query, "fuzziness": "AUTO", "boost": 1},
-                ),
-            ),
-        ]
+    # Build a multi_match query that searches in song name, authors' names, and album names
+    multi_match_query = ES_Q(
+        "multi_match",
+        query=query,
+        fields=[
+            "name^5",
+            "name.raw^10",
+            "name.exact^15",
+            "authors.name^4",
+            "authors.name.raw^8",
+            "authors.name.exact^12",
+            "album.name^3",
+            "album.name.raw^6",
+            "album.name.exact^9",
+        ],
+        fuzziness="AUTO",
+        operator="and",
+        type="best_fields",
     )
 
-    # Combine all queries with minimum_should_match=1
-    search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
-
     # Execute search with size limit
-    search = search.query(search_query).extra(size=20)
+    search = search.query(multi_match_query).extra(size=20)
     response = search.execute()
 
     if response.hits: