updated search

This commit is contained in:
Alexander Karpov 2024-10-04 17:03:31 +03:00
parent 67d9dc324e
commit 03c7c5309c
6 changed files with 334 additions and 61 deletions

View File

@ -378,3 +378,29 @@ class Meta:
"link": {"read_only": True}, "link": {"read_only": True},
"image": {"read_only": True}, "image": {"read_only": True},
} }
class AllSearchSerializer(serializers.Serializer):
songs = serializers.SerializerMethodField(method_name="get_songs")
authors = serializers.SerializerMethodField(method_name="get_authors")
albums = serializers.SerializerMethodField(method_name="get_albums")
@extend_schema_field(ListSongSerializer(many=True))
def get_songs(self, obj):
return ListSongSerializer(
Song.objects.cache().search(obj["query"]).to_queryset()[:10],
many=True,
context=self.context,
).data
@extend_schema_field(ListAuthorSerializer(many=True))
def get_authors(self, obj):
return ListAuthorSerializer(
Author.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
@extend_schema_field(ListAlbumSerializer(many=True))
def get_albums(self, obj):
return ListAlbumSerializer(
Album.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data

View File

@ -21,6 +21,7 @@
RetrieveUpdateDestroyAuthorAPIView, RetrieveUpdateDestroyAuthorAPIView,
RetrieveUpdateDestroyPlaylistAPIView, RetrieveUpdateDestroyPlaylistAPIView,
RetrieveUpdateDestroySongAPIView, RetrieveUpdateDestroySongAPIView,
SearchAllAPIView,
) )
app_name = "music" app_name = "music"
@ -80,4 +81,5 @@
name="retrieve_update_delete_author", name="retrieve_update_delete_author",
), ),
path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"), path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"),
path("search/", SearchAllAPIView.as_view(), name="search_all"),
] ]

View File

@ -19,6 +19,7 @@
ListSongSlugsSerializer, ListSongSlugsSerializer,
PlaylistSerializer, PlaylistSerializer,
SongSerializer, SongSerializer,
AllSearchSerializer,
) )
from akarpov.music.models import ( from akarpov.music.models import (
Album, Album,
@ -28,7 +29,7 @@
SongUserRating, SongUserRating,
UserListenHistory, UserListenHistory,
) )
from akarpov.music.services.search import search_song from akarpov.music.services.search import search_song, search_album, search_author
from akarpov.music.tasks import listen_to_song from akarpov.music.tasks import listen_to_song
from akarpov.users.models import User from akarpov.users.models import User
@ -352,7 +353,25 @@ class ListAlbumsAPIView(generics.ListAPIView):
serializer_class = ListAlbumSerializer serializer_class = ListAlbumSerializer
pagination_class = StandardResultsSetPagination pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
queryset = Album.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_album(search)
return Album.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for albums",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAlbumAPIView( class RetrieveUpdateDestroyAlbumAPIView(
@ -369,7 +388,25 @@ class ListAuthorsAPIView(generics.ListAPIView):
serializer_class = ListAuthorSerializer serializer_class = ListAuthorSerializer
pagination_class = StandardResultsSetPagination pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
queryset = Author.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_author(search)
return Author.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for authors",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAuthorAPIView( class RetrieveUpdateDestroyAuthorAPIView(
@ -455,3 +492,53 @@ def get_queryset(self):
class CreateAnonMusicUserAPIView(generics.CreateAPIView): class CreateAnonMusicUserAPIView(generics.CreateAPIView):
serializer_class = AnonMusicUserSerializer serializer_class = AnonMusicUserSerializer
permission_classes = [permissions.AllowAny] permission_classes = [permissions.AllowAny]
class SearchAllAPIView(LikedSongsContextMixin, generics.GenericAPIView):
permission_classes = [permissions.AllowAny]
serializer_class = AllSearchSerializer
def get_serializer_context(self):
context = super().get_serializer_context()
context["request"] = self.request
return context
@extend_schema(
parameters=[
OpenApiParameter(
name="query",
description="Search query",
required=True,
type=str,
),
],
responses={
200: AllSearchSerializer,
},
)
def get(self, request, *args, **kwargs):
query = request.query_params.get("query", "").strip()
if not query:
return Response({"songs": [], "albums": [], "authors": []})
songs = search_song(query)[:10] # Top 10 songs
albums = search_album(query)[:5] # Top 5 albums
authors = search_author(query)[:5] # Top 5 authors
song_serializer = ListSongSerializer(
songs, many=True, context=self.get_serializer_context()
)
album_serializer = ListAlbumSerializer(
albums, many=True, context=self.get_serializer_context()
)
author_serializer = ListAuthorSerializer(
authors, many=True, context=self.get_serializer_context()
)
return Response(
{
"songs": song_serializer.data,
"albums": album_serializer.data,
"authors": author_serializer.data,
}
)

View File

@ -1,7 +1,7 @@
from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry from django_elasticsearch_dsl.registries import registry
from akarpov.music.models import Song from akarpov.music.models import Song, Album, Author
@registry.register_document @registry.register_document
@ -14,6 +14,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(dynamic=True),
}, },
@ -27,6 +33,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(dynamic=True),
}, },
@ -39,6 +51,13 @@ class SongDocument(Document):
"exact": fields.KeywordField(normalizer="lowercase"), "exact": fields.KeywordField(normalizer="lowercase"),
}, },
) )
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField() suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True) meta = fields.ObjectField(dynamic=True)
@ -50,13 +69,17 @@ class Index:
"number_of_replicas": 0, "number_of_replicas": 0,
"analysis": { "analysis": {
"filter": { "filter": {
"my_transliterator": {
"type": "icu_transform",
"id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC",
},
"russian_stop": { "russian_stop": {
"type": "stop", "type": "stop",
"stopwords": "_russian_", "stopwords": "_russian_",
}, },
"russian_keywords": { "russian_keywords": {
"type": "keyword_marker", "type": "keyword_marker",
"keywords": ["пример"], "keywords": ["песня", "музыка", "певец", "альбом"],
}, },
"russian_stemmer": { "russian_stemmer": {
"type": "stemmer", "type": "stemmer",
@ -82,6 +105,13 @@ class Index:
}, },
}, },
"analyzer": { "analyzer": {
"transliterate": {
"tokenizer": "standard",
"filter": [
"lowercase",
"my_transliterator",
],
},
"russian": { "russian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
@ -139,3 +169,74 @@ def get_instances_from_related(self, related_instance):
if isinstance(related_instance, Song): if isinstance(related_instance, Song):
return related_instance.album return related_instance.album
return related_instance.songs.all() return related_instance.songs.all()
@registry.register_document
class AuthorDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
class Index:
name = "authors"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Author
@registry.register_document
class AlbumDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
authors = fields.NestedField(
attr="authors",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
class Index:
name = "albums"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Album

View File

@ -3,65 +3,70 @@
from django_elasticsearch_dsl.registries import registry from django_elasticsearch_dsl.registries import registry
from elasticsearch_dsl import Q as ES_Q from elasticsearch_dsl import Q as ES_Q
from akarpov.music.documents import SongDocument from akarpov.music.documents import SongDocument, AlbumDocument, AuthorDocument
from akarpov.music.models import Song from akarpov.music.models import Song, Author, Album
def search_song(query): def search_song(query):
search = SongDocument.search() search = SongDocument.search()
search_query = ES_Q(
"bool", should_queries = [
should=[ ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name=query),
ES_Q("match", name__russian=query),
ES_Q( ES_Q(
"multi_match", "nested",
query=query, path="authors",
fields=[ query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
"name^5", ),
"name.russian^5", ES_Q(
"authors.name^3", "nested",
"authors.name.raw^3", path="album",
"album.name^3", query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
"album.name.raw^3", ),
"name.raw^2", ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
], ES_Q(
type="best_fields", "nested",
fuzziness="AUTO", path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
), ),
ES_Q( ES_Q(
"nested", "nested",
path="authors", path="authors",
query=ES_Q( query=ES_Q(
"multi_match", "match",
query=query, name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
fields=["authors.name", "authors.name.raw"],
fuzziness="AUTO",
), ),
), ),
# Correcting wildcard queries with the proper syntax:
ES_Q("wildcard", **{"name.raw": f"*{query.lower()}*"}),
ES_Q( ES_Q(
"nested", "nested",
path="album", path="album",
query=ES_Q( query=ES_Q(
"multi_match", "match",
query=query, name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
fields=["album.name", "album.name.raw"],
fuzziness="AUTO",
), ),
), ),
# Ensuring the nested wildcard query is properly structured ]
ES_Q(
"nested", search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
path="album",
query=ES_Q("wildcard", **{"album.name.raw": f"*{query.lower()}*"}),
),
# Correcting the wildcard query for `meta.raw`
ES_Q("wildcard", **{"meta.raw": f"*{query.lower()}*"}),
],
minimum_should_match=1,
)
search = search.query(search_query).extra(size=20) search = search.query(search_query).extra(size=20)
response = search.execute() response = search.execute()
@ -71,7 +76,6 @@ def search_song(query):
songs = Song.objects.filter(id__in=hit_ids).order_by( songs = Song.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
) )
return songs return songs
return Song.objects.none() return Song.objects.none()
@ -96,3 +100,57 @@ def get_popular_songs():
def bulk_update_index(model_class): def bulk_update_index(model_class):
qs = model_class.objects.all() qs = model_class.objects.all()
registry.update(qs, bulk_size=100) registry.update(qs, bulk_size=100)
def search_author(query):
search = AuthorDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
authors = Author.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return authors
return Author.objects.none()
def search_album(query):
search = AlbumDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
albums = Album.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return albums
return Album.objects.none()

View File

@ -35,8 +35,7 @@ def album_create(sender, instance, created, **kwargs):
@receiver(post_save) @receiver(post_save)
def send_que_status(sender, instance, created, **kwargs): def send_que_status(sender, instance, created, **kwargs): ...
...
@receiver(pre_save, sender=SongUserRating) @receiver(pre_save, sender=SongUserRating)