updated search

This commit is contained in:
Alexander Karpov 2024-10-04 17:03:31 +03:00
parent 67d9dc324e
commit 03c7c5309c
6 changed files with 334 additions and 61 deletions

View File

@ -378,3 +378,29 @@ class Meta:
"link": {"read_only": True},
"image": {"read_only": True},
}
class AllSearchSerializer(serializers.Serializer):
songs = serializers.SerializerMethodField(method_name="get_songs")
authors = serializers.SerializerMethodField(method_name="get_authors")
albums = serializers.SerializerMethodField(method_name="get_albums")
@extend_schema_field(ListSongSerializer(many=True))
def get_songs(self, obj):
return ListSongSerializer(
Song.objects.cache().search(obj["query"]).to_queryset()[:10],
many=True,
context=self.context,
).data
@extend_schema_field(ListAuthorSerializer(many=True))
def get_authors(self, obj):
return ListAuthorSerializer(
Author.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data
@extend_schema_field(ListAlbumSerializer(many=True))
def get_albums(self, obj):
return ListAlbumSerializer(
Album.objects.cache().search(obj["query"]).to_queryset()[:10], many=True
).data

View File

@ -21,6 +21,7 @@
RetrieveUpdateDestroyAuthorAPIView,
RetrieveUpdateDestroyPlaylistAPIView,
RetrieveUpdateDestroySongAPIView,
SearchAllAPIView,
)
app_name = "music"
@ -80,4 +81,5 @@
name="retrieve_update_delete_author",
),
path("anon/create/", CreateAnonMusicUserAPIView.as_view(), name="create-anon"),
path("search/", SearchAllAPIView.as_view(), name="search_all"),
]

View File

@ -19,6 +19,7 @@
ListSongSlugsSerializer,
PlaylistSerializer,
SongSerializer,
AllSearchSerializer,
)
from akarpov.music.models import (
Album,
@ -28,7 +29,7 @@
SongUserRating,
UserListenHistory,
)
from akarpov.music.services.search import search_song
from akarpov.music.services.search import search_song, search_album, search_author
from akarpov.music.tasks import listen_to_song
from akarpov.users.models import User
@ -352,7 +353,25 @@ class ListAlbumsAPIView(generics.ListAPIView):
serializer_class = ListAlbumSerializer
pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny]
queryset = Album.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_album(search)
return Album.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for albums",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAlbumAPIView(
@ -369,7 +388,25 @@ class ListAuthorsAPIView(generics.ListAPIView):
serializer_class = ListAuthorSerializer
pagination_class = StandardResultsSetPagination
permission_classes = [permissions.AllowAny]
queryset = Author.objects.cache().all()
def get_queryset(self):
search = self.request.query_params.get("search", None)
if search:
return search_author(search)
return Author.objects.cache().all()
@extend_schema(
parameters=[
OpenApiParameter(
name="search",
description="Search query for authors",
required=False,
type=str,
),
]
)
def get(self, request, *args, **kwargs):
return super().get(request, *args, **kwargs)
class RetrieveUpdateDestroyAuthorAPIView(
@ -455,3 +492,53 @@ def get_queryset(self):
class CreateAnonMusicUserAPIView(generics.CreateAPIView):
serializer_class = AnonMusicUserSerializer
permission_classes = [permissions.AllowAny]
class SearchAllAPIView(LikedSongsContextMixin, generics.GenericAPIView):
permission_classes = [permissions.AllowAny]
serializer_class = AllSearchSerializer
def get_serializer_context(self):
context = super().get_serializer_context()
context["request"] = self.request
return context
@extend_schema(
parameters=[
OpenApiParameter(
name="query",
description="Search query",
required=True,
type=str,
),
],
responses={
200: AllSearchSerializer,
},
)
def get(self, request, *args, **kwargs):
query = request.query_params.get("query", "").strip()
if not query:
return Response({"songs": [], "albums": [], "authors": []})
songs = search_song(query)[:10] # Top 10 songs
albums = search_album(query)[:5] # Top 5 albums
authors = search_author(query)[:5] # Top 5 authors
song_serializer = ListSongSerializer(
songs, many=True, context=self.get_serializer_context()
)
album_serializer = ListAlbumSerializer(
albums, many=True, context=self.get_serializer_context()
)
author_serializer = ListAuthorSerializer(
authors, many=True, context=self.get_serializer_context()
)
return Response(
{
"songs": song_serializer.data,
"albums": album_serializer.data,
"authors": author_serializer.data,
}
)

View File

@ -1,7 +1,7 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from akarpov.music.models import Song
from akarpov.music.models import Song, Album, Author
@registry.register_document
@ -14,6 +14,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
@ -27,6 +33,12 @@ class SongDocument(Document):
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
@ -39,6 +51,13 @@ class SongDocument(Document):
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
@ -50,13 +69,17 @@ class Index:
"number_of_replicas": 0,
"analysis": {
"filter": {
"my_transliterator": {
"type": "icu_transform",
"id": "Any-Latin; NFD; [:Nonspacing Mark:] Remove; NFC",
},
"russian_stop": {
"type": "stop",
"stopwords": "_russian_",
},
"russian_keywords": {
"type": "keyword_marker",
"keywords": ["пример"],
"keywords": ["песня", "музыка", "певец", "альбом"],
},
"russian_stemmer": {
"type": "stemmer",
@ -82,6 +105,13 @@ class Index:
},
},
"analyzer": {
"transliterate": {
"tokenizer": "standard",
"filter": [
"lowercase",
"my_transliterator",
],
},
"russian": {
"tokenizer": "standard",
"filter": [
@ -139,3 +169,74 @@ def get_instances_from_related(self, related_instance):
if isinstance(related_instance, Song):
return related_instance.album
return related_instance.songs.all()
@registry.register_document
class AuthorDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
class Index:
name = "authors"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Author
@registry.register_document
class AlbumDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
},
)
name_transliterated = fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
authors = fields.NestedField(
attr="authors",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
class Index:
name = "albums"
settings = SongDocument.Index.settings # Reuse settings
class Django:
model = Album

View File

@ -3,65 +3,70 @@
from django_elasticsearch_dsl.registries import registry
from elasticsearch_dsl import Q as ES_Q
from akarpov.music.documents import SongDocument
from akarpov.music.models import Song
from akarpov.music.documents import SongDocument, AlbumDocument, AuthorDocument
from akarpov.music.models import Song, Author, Album
def search_song(query):
search = SongDocument.search()
search_query = ES_Q(
"bool",
should=[
ES_Q("match", name=query),
ES_Q("match", name__russian=query),
ES_Q(
"multi_match",
query=query,
fields=[
"name^5",
"name.russian^5",
"authors.name^3",
"authors.name.raw^3",
"album.name^3",
"album.name.raw^3",
"name.raw^2",
],
type="best_fields",
fuzziness="AUTO",
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match_phrase", name={"query": query, "boost": 4}),
),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 2}),
),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 0.8}),
),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"multi_match",
query=query,
fields=["authors.name", "authors.name.raw"],
fuzziness="AUTO",
),
),
ES_Q(
"nested",
path="album",
query=ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 0.8},
),
# Correcting wildcard queries with the proper syntax:
ES_Q("wildcard", **{"name.raw": f"*{query.lower()}*"}),
ES_Q(
"nested",
path="album",
query=ES_Q(
"multi_match",
query=query,
fields=["album.name", "album.name.raw"],
fuzziness="AUTO",
),
),
# Ensuring the nested wildcard query is properly structured
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", **{"album.name.raw": f"*{query.lower()}*"}),
),
# Correcting the wildcard query for `meta.raw`
ES_Q("wildcard", **{"meta.raw": f"*{query.lower()}*"}),
],
minimum_should_match=1,
)
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=20)
response = search.execute()
@ -71,7 +76,6 @@ def search_song(query):
songs = Song.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return songs
return Song.objects.none()
@ -96,3 +100,57 @@ def get_popular_songs():
def bulk_update_index(model_class):
qs = model_class.objects.all()
registry.update(qs, bulk_size=100)
def search_author(query):
search = AuthorDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
authors = Author.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return authors
return Author.objects.none()
def search_album(query):
search = AlbumDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 1}),
ES_Q(
"match",
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
albums = Album.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return albums
return Album.objects.none()

View File

@ -35,8 +35,7 @@ def album_create(sender, instance, created, **kwargs):
@receiver(post_save)
def send_que_status(sender, instance, created, **kwargs):
...
def send_que_status(sender, instance, created, **kwargs): ...
@receiver(pre_save, sender=SongUserRating)