From 03c7c5309c20501781a34cce272203ad9bbe5d55 Mon Sep 17 00:00:00 2001 From: sanspie Date: Fri, 4 Oct 2024 17:03:31 +0300 Subject: [PATCH] updated search --- akarpov/music/api/serializers.py | 26 +++++ akarpov/music/api/urls.py | 2 + akarpov/music/api/views.py | 93 ++++++++++++++++- akarpov/music/documents.py | 105 ++++++++++++++++++- akarpov/music/services/search.py | 166 +++++++++++++++++++++---------- akarpov/music/signals.py | 3 +- 6 files changed, 334 insertions(+), 61 deletions(-) diff --git a/akarpov/music/api/serializers.py b/akarpov/music/api/serializers.py index 7290cad..5c94c92 100644 --- a/akarpov/music/api/serializers.py +++ b/akarpov/music/api/serializers.py @@ -378,3 +378,29 @@ class Meta: "link": {"read_only": True}, "image": {"read_only": True}, } + + +class AllSearchSerializer(serializers.Serializer): + songs = serializers.SerializerMethodField(method_name="get_songs") + authors = serializers.SerializerMethodField(method_name="get_authors") + albums = serializers.SerializerMethodField(method_name="get_albums") + + @extend_schema_field(ListSongSerializer(many=True)) + def get_songs(self, obj): + return ListSongSerializer( + Song.objects.cache().search(obj["query"]).to_queryset()[:10], + many=True, + context=self.context, + ).data + + @extend_schema_field(ListAuthorSerializer(many=True)) + def get_authors(self, obj): + return ListAuthorSerializer( + Author.objects.cache().search(obj["query"]).to_queryset()[:10], many=True + ).data + + @extend_schema_field(ListAlbumSerializer(many=True)) + def get_albums(self, obj): + return ListAlbumSerializer( + Album.objects.cache().search(obj["query"]).to_queryset()[:10], many=True + ).data diff --git a/akarpov/music/api/urls.py b/akarpov/music/api/urls.py index 16f4313..dda85b4 100644 --- a/akarpov/music/api/urls.py +++ b/akarpov/music/api/urls.py @@ -21,6 +21,7 @@ RetrieveUpdateDestroyAuthorAPIView, RetrieveUpdateDestroyPlaylistAPIView, RetrieveUpdateDestroySongAPIView, + SearchAllAPIView, ) app_name = "music" @@ -80,4 +81,5 @@ name="retrieve_update_delete_author", ), path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"), + path("search/", SearchAllAPIView.as_view(), name="search_all"), ] diff --git a/akarpov/music/api/views.py b/akarpov/music/api/views.py index 9189729..2625308 100644 --- a/akarpov/music/api/views.py +++ b/akarpov/music/api/views.py @@ -19,6 +19,7 @@ ListSongSlugsSerializer, PlaylistSerializer, SongSerializer, + AllSearchSerializer, ) from akarpov.music.models import ( Album, @@ -28,7 +29,7 @@ SongUserRating, UserListenHistory, ) -from akarpov.music.services.search import search_song +from akarpov.music.services.search import search_song, search_album, search_author from akarpov.music.tasks import listen_to_song from akarpov.users.models import User @@ -352,7 +353,25 @@ class ListAlbumsAPIView(generics.ListAPIView): serializer_class = ListAlbumSerializer pagination_class = StandardResultsSetPagination permission_classes = [permissions.AllowAny] - queryset = Album.objects.cache().all() + + def get_queryset(self): + search = self.request.query_params.get("search", None) + if search: + return search_album(search) + return Album.objects.cache().all() + + @extend_schema( + parameters=[ + OpenApiParameter( + name="search", + description="Search query for albums", + required=False, + type=str, + ), + ] + ) + def get(self, request, *args, **kwargs): + return super().get(request, *args, **kwargs) class RetrieveUpdateDestroyAlbumAPIView( @@ -369,7 +388,25 @@ class ListAuthorsAPIView(generics.ListAPIView): serializer_class = ListAuthorSerializer pagination_class = StandardResultsSetPagination permission_classes = [permissions.AllowAny] - queryset = Author.objects.cache().all() + + def get_queryset(self): + search = self.request.query_params.get("search", None) + if search: + return search_author(search) + return Author.objects.cache().all() + + @extend_schema( + parameters=[ + OpenApiParameter( + name="search", + description="Search query for authors", + required=False, + type=str, + ), + ] + ) + def get(self, request, *args, **kwargs): + return super().get(request, *args, **kwargs) class RetrieveUpdateDestroyAuthorAPIView( @@ -455,3 +492,53 @@ def get_queryset(self): class CreateAnonMusicUserAPIView(generics.CreateAPIView): serializer_class = AnonMusicUserSerializer permission_classes = [permissions.AllowAny] + + +class SearchAllAPIView(LikedSongsContextMixin, generics.GenericAPIView): + permission_classes = [permissions.AllowAny] + serializer_class = AllSearchSerializer + + def get_serializer_context(self): + context = super().get_serializer_context() + context["request"] = self.request + return context + + @extend_schema( + parameters=[ + OpenApiParameter( + name="query", + description="Search query", + required=True, + type=str, + ), + ], + responses={ + 200: AllSearchSerializer, + }, + ) + def get(self, request, *args, **kwargs): + query = request.query_params.get("query", "").strip() + if not query: + return Response({"songs": [], "albums": [], "authors": []}) + + songs = search_song(query)[:10] # Top 10 songs + albums = search_album(query)[:5] # Top 5 albums + authors = search_author(query)[:5] # Top 5 authors + + song_serializer = ListSongSerializer( + songs, many=True, context=self.get_serializer_context() + ) + album_serializer = ListAlbumSerializer( + albums, many=True, context=self.get_serializer_context() + ) + author_serializer = ListAuthorSerializer( + authors, many=True, context=self.get_serializer_context() + ) + + return Response( + { + "songs": song_serializer.data, + "albums": album_serializer.data, + "authors": author_serializer.data, + } + ) diff --git a/akarpov/music/documents.py b/akarpov/music/documents.py index 02c46e9..1a37d0a 100644 --- a/akarpov/music/documents.py +++ b/akarpov/music/documents.py @@ -1,7 +1,7 @@ from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl.registries import registry -from akarpov.music.models import Song +from akarpov.music.models import Song, Album, Author @registry.register_document @@ -14,6 +14,12 @@ class SongDocument(Document): "raw": fields.KeywordField(normalizer="lowercase"), }, ), + "name_transliterated": fields.TextField( + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ), "link": fields.TextField(), "meta": fields.ObjectField(dynamic=True), }, @@ -27,6 +33,12 @@ class SongDocument(Document): "raw": fields.KeywordField(normalizer="lowercase"), }, ), + "name_transliterated": fields.TextField( + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ), "link": fields.TextField(), "meta": fields.ObjectField(dynamic=True), }, @@ -39,6 +51,13 @@ class SongDocument(Document): "exact": fields.KeywordField(normalizer="lowercase"), }, ) + name_transliterated = fields.TextField( + attr="name", + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ) suggest = fields.CompletionField() meta = fields.ObjectField(dynamic=True) @@ -50,13 +69,17 @@ class Index: "number_of_replicas": 0, "analysis": { "filter": { + "my_transliterator": { + "type": "icu_transform", + "id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC", + }, "russian_stop": { "type": "stop", "stopwords": "_russian_", }, "russian_keywords": { "type": "keyword_marker", - "keywords": ["пример"], + "keywords": ["песня", "музыка", "певец", "альбом"], }, "russian_stemmer": { "type": "stemmer", @@ -82,6 +105,13 @@ class Index: }, }, "analyzer": { + "transliterate": { + "tokenizer": "standard", + "filter": [ + "lowercase", + "my_transliterator", + ], + }, "russian": { "tokenizer": "standard", "filter": [ @@ -139,3 +169,74 @@ def get_instances_from_related(self, related_instance): if isinstance(related_instance, Song): return related_instance.album return related_instance.songs.all() + + +@registry.register_document +class AuthorDocument(Document): + name = fields.TextField( + fields={ + "raw": fields.KeywordField(), + "exact": fields.KeywordField(normalizer="lowercase"), + }, + ) + name_transliterated = fields.TextField( + attr="name", + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ) + suggest = fields.CompletionField() + meta = fields.ObjectField(dynamic=True) + + class Index: + name = "authors" + settings = SongDocument.Index.settings # Reuse settings + + class Django: + model = Author + + +@registry.register_document +class AlbumDocument(Document): + name = fields.TextField( + fields={ + "raw": fields.KeywordField(), + "exact": fields.KeywordField(normalizer="lowercase"), + }, + ) + name_transliterated = fields.TextField( + attr="name", + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ) + suggest = fields.CompletionField() + meta = fields.ObjectField(dynamic=True) + authors = fields.NestedField( + attr="authors", + properties={ + "name": fields.TextField( + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ), + "name_transliterated": fields.TextField( + attr="name", + analyzer="transliterate", + fields={ + "raw": fields.KeywordField(), + }, + ), + "link": fields.TextField(), + "meta": fields.ObjectField(dynamic=True), + }, + ) + + class Index: + name = "albums" + settings = SongDocument.Index.settings # Reuse settings + + class Django: + model = Album diff --git a/akarpov/music/services/search.py b/akarpov/music/services/search.py index 3b7804a..eec744d 100644 --- a/akarpov/music/services/search.py +++ b/akarpov/music/services/search.py @@ -3,65 +3,70 @@ from django_elasticsearch_dsl.registries import registry from elasticsearch_dsl import Q as ES_Q -from akarpov.music.documents import SongDocument -from akarpov.music.models import Song +from akarpov.music.documents import SongDocument, AlbumDocument, AuthorDocument +from akarpov.music.models import Song, Author, Album def search_song(query): search = SongDocument.search() - search_query = ES_Q( - "bool", - should=[ - ES_Q("match", name=query), - ES_Q("match", name__russian=query), - ES_Q( - "multi_match", - query=query, - fields=[ - "name^5", - "name.russian^5", - "authors.name^3", - "authors.name.raw^3", - "album.name^3", - "album.name.raw^3", - "name.raw^2", - ], - type="best_fields", - fuzziness="AUTO", + + should_queries = [ + ES_Q("match_phrase", name={"query": query, "boost": 5}), + ES_Q( + "nested", + path="authors", + query=ES_Q("match_phrase", name={"query": query, "boost": 4}), + ), + ES_Q( + "nested", + path="album", + query=ES_Q("match_phrase", name={"query": query, "boost": 4}), + ), + ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}), + ES_Q( + "nested", + path="authors", + query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}), + ), + ES_Q( + "nested", + path="album", + query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}), + ), + ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}), + ES_Q( + "nested", + path="authors", + query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}), + ), + ES_Q( + "nested", + path="album", + query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}), + ), + ES_Q( + "match", + name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, + ), + ES_Q( + "nested", + path="authors", + query=ES_Q( + "match", + name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8}, ), - ES_Q( - "nested", - path="authors", - query=ES_Q( - "multi_match", - query=query, - fields=["authors.name", "authors.name.raw"], - fuzziness="AUTO", - ), + ), + ES_Q( + "nested", + path="album", + query=ES_Q( + "match", + name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8}, ), - # Correcting wildcard queries with the proper syntax: - ES_Q("wildcard", **{"name.raw": f"*{query.lower()}*"}), - ES_Q( - "nested", - path="album", - query=ES_Q( - "multi_match", - query=query, - fields=["album.name", "album.name.raw"], - fuzziness="AUTO", - ), - ), - # Ensuring the nested wildcard query is properly structured - ES_Q( - "nested", - path="album", - query=ES_Q("wildcard", **{"album.name.raw": f"*{query.lower()}*"}), - ), - # Correcting the wildcard query for `meta.raw` - ES_Q("wildcard", **{"meta.raw": f"*{query.lower()}*"}), - ], - minimum_should_match=1, - ) + ), + ] + + search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search = search.query(search_query).extra(size=20) response = search.execute() @@ -71,7 +76,6 @@ def search_song(query): songs = Song.objects.filter(id__in=hit_ids).order_by( Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) ) - return songs return Song.objects.none() @@ -96,3 +100,57 @@ def get_popular_songs(): def bulk_update_index(model_class): qs = model_class.objects.all() registry.update(qs, bulk_size=100) + + +def search_author(query): + search = AuthorDocument.search() + + should_queries = [ + ES_Q("match_phrase", name={"query": query, "boost": 5}), + ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}), + ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}), + ES_Q( + "match", + name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, + ), + ] + + search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) + search = search.query(search_query).extra(size=10) + response = search.execute() + + if response.hits: + hit_ids = [hit.meta.id for hit in response.hits] + authors = Author.objects.filter(id__in=hit_ids).order_by( + Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) + ) + return authors + + return Author.objects.none() + + +def search_album(query): + search = AlbumDocument.search() + + should_queries = [ + ES_Q("match_phrase", name={"query": query, "boost": 5}), + ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}), + ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}), + ES_Q( + "match", + name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, + ), + ] + + search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) + search = search.query(search_query).extra(size=10) + response = search.execute() + + if response.hits: + hit_ids = [hit.meta.id for hit in response.hits] + albums = Album.objects.filter(id__in=hit_ids).order_by( + Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) + ) + return albums + + return Album.objects.none() diff --git a/akarpov/music/signals.py b/akarpov/music/signals.py index 1a6d1e7..4fe4b54 100644 --- a/akarpov/music/signals.py +++ b/akarpov/music/signals.py @@ -35,8 +35,7 @@ def album_create(sender, instance, created, **kwargs): @receiver(post_save) -def send_que_status(sender, instance, created, **kwargs): - ... +def send_que_status(sender, instance, created, **kwargs): ... @receiver(pre_save, sender=SongUserRating)