From f4ca7db6963afc1be3c13b40736976bb5a9ba02c Mon Sep 17 00:00:00 2001 From: sanspie Date: Thu, 5 Dec 2024 16:36:50 +0300 Subject: [PATCH] updated search --- akarpov/music/api/serializers.py | 29 +++++---- akarpov/music/documents.py | 9 ++- akarpov/music/services/search.py | 105 ++++++------------------------- 3 files changed, 44 insertions(+), 99 deletions(-) diff --git a/akarpov/music/api/serializers.py b/akarpov/music/api/serializers.py index 5c94c92..f133ef2 100644 --- a/akarpov/music/api/serializers.py +++ b/akarpov/music/api/serializers.py @@ -88,8 +88,12 @@ def get_liked(self, obj): @extend_schema_field(ListAlbumSerializer) def get_album(self, obj): - if obj.album: - return ListAlbumSerializer(Album.objects.cache().get(id=obj.album_id)).data + if obj.album_id: + try: + album = Album.objects.cache().get(id=obj.album_id) + return ListAlbumSerializer(album).data + except Album.DoesNotExist: + return None return None @extend_schema_field(ListAuthorSerializer(many=True)) @@ -105,16 +109,17 @@ def get_image(self, obj): img = None if obj.image_cropped: img = obj.image_cropped - else: - album = Album.objects.cache().get(id=obj.album_id) - if album.image_cropped: - img = album.image_cropped - else: - authors = Author.objects.cache().filter( - Q(songs__id=obj.id) & ~Q(image="") - ) - if authors: - img = authors.first().image_cropped + elif obj.album_id: + try: + album = Album.objects.cache().get(id=obj.album_id) + if album.image_cropped: + img = album.image_cropped + except Album.DoesNotExist: + pass + if not img: + authors = Author.objects.cache().filter(Q(songs__id=obj.id) & ~Q(image="")) + if authors.exists(): + img = authors.first().image_cropped if img: return self.context["request"].build_absolute_uri(img.url) return None diff --git a/akarpov/music/documents.py b/akarpov/music/documents.py index 40f6c66..a4ca406 100644 --- a/akarpov/music/documents.py +++ b/akarpov/music/documents.py @@ -48,7 +48,7 @@ class SongDocument(Document): attr="name", fields={ "raw": fields.KeywordField(), - "exact": fields.KeywordField(normalizer="lowercase"), + "exact": fields.KeywordField(normalizer="lowercase_normalizer"), }, ) name_transliterated = fields.TextField( @@ -67,6 +67,13 @@ class Index: settings = { "number_of_shards": 1, "number_of_replicas": 0, + "normalizer": { + "lowercase_normalizer": { + "type": "custom", + "char_filter": [], + "filter": ["lowercase"], + } + }, "analysis": { "filter": { "my_transliterator": { diff --git a/akarpov/music/services/search.py b/akarpov/music/services/search.py index cb944b1..8923b47 100644 --- a/akarpov/music/services/search.py +++ b/akarpov/music/services/search.py @@ -8,97 +8,30 @@ def search_song(query): - # Split query into potential track and artist parts - parts = [part.strip() for part in query.split("-")] - track_query = parts[0] - artist_query = parts[1] if len(parts) > 1 else None - search = SongDocument.search() - # Base queries for track name with high boost - should_queries = [ - ES_Q("match_phrase", name={"query": track_query, "boost": 10}), - ES_Q("match", name={"query": track_query, "fuzziness": "AUTO", "boost": 8}), - ES_Q("wildcard", name={"value": f"*{track_query.lower()}*", "boost": 6}), - ES_Q( - "match", - name_transliterated={"query": track_query, "fuzziness": "AUTO", "boost": 5}, - ), - ] - - # Add artist-specific queries if artist part exists - if artist_query: - should_queries.extend( - [ - ES_Q( - "nested", - path="authors", - query=ES_Q( - "match_phrase", name={"query": artist_query, "boost": 4} - ), - ), - ES_Q( - "nested", - path="authors", - query=ES_Q( - "match", - name={"query": artist_query, "fuzziness": "AUTO", "boost": 3}, - ), - ), - ES_Q( - "nested", - path="authors", - query=ES_Q( - "wildcard", - name={"value": f"*{artist_query.lower()}*", "boost": 2}, - ), - ), - ] - ) - else: - # If no explicit artist, still search in authors but with lower boost - should_queries.extend( - [ - ES_Q( - "nested", - path="authors", - query=ES_Q("match_phrase", name={"query": track_query, "boost": 2}), - ), - ES_Q( - "nested", - path="authors", - query=ES_Q( - "match", - name={"query": track_query, "fuzziness": "AUTO", "boost": 1}, - ), - ), - ] - ) - - # Add album queries with lower boost - should_queries.extend( - [ - ES_Q( - "nested", - path="album", - query=ES_Q("match_phrase", name={"query": track_query, "boost": 1.5}), - ), - ES_Q( - "nested", - path="album", - query=ES_Q( - "match", - name={"query": track_query, "fuzziness": "AUTO", "boost": 1}, - ), - ), - ] + # Build a multi_match query that searches in song name, authors' names, and album names + multi_match_query = ES_Q( + "multi_match", + query=query, + fields=[ + "name^5", + "name.raw^10", + "name.exact^15", + "authors.name^4", + "authors.name.raw^8", + "authors.name.exact^12", + "album.name^3", + "album.name.raw^6", + "album.name.exact^9", + ], + fuzziness="AUTO", + operator="and", + type="best_fields", ) - # Combine all queries with minimum_should_match=1 - search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) - # Execute search with size limit - search = search.query(search_query).extra(size=20) + search = search.query(multi_match_query).extra(size=20) response = search.execute() if response.hits: