added music search, major improvements on file search

This commit is contained in:
Alexander Karpov 2023-12-28 02:15:50 +03:00
parent 4a407dfc06
commit af5f1f8afc
5 changed files with 162 additions and 19 deletions

View File

@ -1,4 +1,4 @@
from django_elasticsearch_dsl import Document from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry from django_elasticsearch_dsl.registries import registry
from akarpov.files.models import File from akarpov.files.models import File
@ -6,26 +6,41 @@
@registry.register_document @registry.register_document
class FileDocument(Document): class FileDocument(Document):
class Index: name = fields.TextField(
name = "files" attr="name",
settings = {"number_of_shards": 1, "number_of_replicas": 0} fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
)
description = fields.TextField(
attr="description",
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
)
content = fields.TextField(
attr="content",
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
)
class Django: class Django:
model = File model = File
fields = [
"name",
"description",
"content",
]
def prepare_description(self, instance): def prepare_description(self, instance):
# This method is called for every instance before indexing
return instance.description or "" return instance.description or ""
def prepare_content(self, instance): def prepare_content(self, instance):
# This method is called for every instance before indexing # check instance.content is not None
return ( return (
instance.content.decode("utf-8") instance.content.decode("utf-8")
if isinstance(instance.content, bytes) if instance.content and isinstance(instance.content, bytes)
else instance.content else ""
) )
class Index:
name = "files"
settings = {"number_of_shards": 1, "number_of_replicas": 0}

View File

@ -40,13 +40,16 @@ def search(self, query: str):
ES_Q( ES_Q(
"multi_match", "multi_match",
query=query, query=query,
fields=["name", "description", "content"], fields=["name^3", "description^2", "content"],
type="best_fields", type="best_fields",
fuzziness="AUTO",
), ),
ES_Q("match_phrase_prefix", name=query), ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q("wildcard", name=f"*{query}*"), ES_Q("wildcard", description__raw=f"*{query.lower()}*"),
ES_Q("wildcard", description=f"*{query}*"), ES_Q("wildcard", content__raw=f"*{query.lower()}*"),
ES_Q("wildcard", content=f"*{query}*"), ES_Q("wildcard", file_type__raw=f"*{query.lower()}*"),
ES_Q("wildcard", file_obj__raw=f"*{query.lower()}*"),
ES_Q("wildcard", preview__raw=f"*{query.lower()}*"),
], ],
minimum_should_match=1, minimum_should_match=1,
) )

View File

@ -25,6 +25,7 @@
SongUserRating, SongUserRating,
UserListenHistory, UserListenHistory,
) )
from akarpov.music.services.search import search_song
from akarpov.music.tasks import listen_to_song from akarpov.music.tasks import listen_to_song
@ -83,7 +84,11 @@ class ListCreateSongAPIView(LikedSongsContextMixin, generics.ListCreateAPIView):
pagination_class = StandardResultsSetPagination pagination_class = StandardResultsSetPagination
def get_queryset(self): def get_queryset(self):
qs = Song.objects.cache() search = self.request.query_params.get("search", None)
if search:
qs = search_song(search)
else:
qs = Song.objects.cache()
if "sort" in self.request.query_params: if "sort" in self.request.query_params:
sorts = self.request.query_params["sort"].split(",") sorts = self.request.query_params["sort"].split(",")
@ -111,6 +116,12 @@ def get_queryset(self):
@extend_schema( @extend_schema(
parameters=[ parameters=[
OpenApiParameter(
name="search",
description="Search query",
required=False,
type=str,
),
OpenApiParameter( OpenApiParameter(
name="sort", name="sort",
description="Sorting algorithm", description="Sorting algorithm",

View File

@ -0,0 +1,67 @@
from django_elasticsearch_dsl import Document, fields
from django_elasticsearch_dsl.registries import registry
from akarpov.music.models import Song
@registry.register_document
class SongDocument(Document):
authors = fields.NestedField(
attr="authors",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
album = fields.NestedField(
attr="album",
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
},
)
name = fields.TextField(
attr="name",
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
},
)
meta = fields.ObjectField(dynamic=True) # Added meta field here as dynamic object
class Index:
name = "songs"
settings = {"number_of_shards": 1, "number_of_replicas": 0}
# settings = {
# "number_of_shards": 1,
# "number_of_replicas": 0,
# "analysis": {
# "analyzer": {
# "russian_icu": {
# "type": "custom",
# "tokenizer": "icu_tokenizer",
# "filter": ["icu_folding","icu_normalizer"]
# }
# }
# }
# } TODO
class Django:
model = Song
def get_instances_from_related(self, related_instance):
if isinstance(related_instance, Song):
return related_instance.album
return related_instance.songs.all()

View File

@ -0,0 +1,47 @@
from django.db.models import Case, When
from elasticsearch_dsl import Q as ES_Q
from akarpov.music.documents import SongDocument
from akarpov.music.models import Song
def search_song(query):
search = SongDocument.search()
search_query = ES_Q(
"bool",
should=[
ES_Q(
"multi_match",
query=query,
fields=["name^3", "authors.name^2", "album.name"],
fuzziness="AUTO",
), # Change here
ES_Q("wildcard", name__raw=f"*{query.lower()}*"),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", authors__name__raw=f"*{query.lower()}*"),
),
ES_Q(
"nested",
path="album",
query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"),
),
],
minimum_should_match=1,
)
search = search.query(search_query)
response = search.execute()
# Check for hits and get song instances
if response.hits:
hit_ids = [hit.meta.id for hit in response.hits]
songs = Song.objects.filter(id__in=hit_ids).order_by(
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return songs
return Song.objects.none()