From af5f1f8afc3584e3f03ac2c2256361bb18d94e11 Mon Sep 17 00:00:00 2001 From: Alexander-D-Karpov Date: Thu, 28 Dec 2023 02:15:50 +0300 Subject: [PATCH] added music search, major improvements on file search --- akarpov/files/documents.py | 41 ++++++++++++------- akarpov/files/services/search.py | 13 ++++--- akarpov/music/api/views.py | 13 ++++++- akarpov/music/documents.py | 67 ++++++++++++++++++++++++++++++++ akarpov/music/services/search.py | 47 ++++++++++++++++++++++ 5 files changed, 162 insertions(+), 19 deletions(-) create mode 100644 akarpov/music/documents.py create mode 100644 akarpov/music/services/search.py diff --git a/akarpov/files/documents.py b/akarpov/files/documents.py index 7602064..84ca289 100644 --- a/akarpov/files/documents.py +++ b/akarpov/files/documents.py @@ -1,4 +1,4 @@ -from django_elasticsearch_dsl import Document +from django_elasticsearch_dsl import Document, fields from django_elasticsearch_dsl.registries import registry from akarpov.files.models import File @@ -6,26 +6,41 @@ @registry.register_document class FileDocument(Document): - class Index: - name = "files" - settings = {"number_of_shards": 1, "number_of_replicas": 0} + name = fields.TextField( + attr="name", + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ) + + description = fields.TextField( + attr="description", + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ) + + content = fields.TextField( + attr="content", + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ) class Django: model = File - fields = [ - "name", - "description", - "content", - ] def prepare_description(self, instance): - # This method is called for every instance before indexing return instance.description or "" def prepare_content(self, instance): - # This method is called for every instance before indexing + # check instance.content is not None return ( instance.content.decode("utf-8") - if isinstance(instance.content, bytes) - else instance.content + if instance.content and isinstance(instance.content, bytes) + else "" ) + + class Index: + name = "files" + settings = {"number_of_shards": 1, "number_of_replicas": 0} diff --git a/akarpov/files/services/search.py b/akarpov/files/services/search.py index cef6f45..f3a8ae3 100644 --- a/akarpov/files/services/search.py +++ b/akarpov/files/services/search.py @@ -40,13 +40,16 @@ def search(self, query: str): ES_Q( "multi_match", query=query, - fields=["name", "description", "content"], + fields=["name^3", "description^2", "content"], type="best_fields", + fuzziness="AUTO", ), - ES_Q("match_phrase_prefix", name=query), - ES_Q("wildcard", name=f"*{query}*"), - ES_Q("wildcard", description=f"*{query}*"), - ES_Q("wildcard", content=f"*{query}*"), + ES_Q("wildcard", name__raw=f"*{query.lower()}*"), + ES_Q("wildcard", description__raw=f"*{query.lower()}*"), + ES_Q("wildcard", content__raw=f"*{query.lower()}*"), + ES_Q("wildcard", file_type__raw=f"*{query.lower()}*"), + ES_Q("wildcard", file_obj__raw=f"*{query.lower()}*"), + ES_Q("wildcard", preview__raw=f"*{query.lower()}*"), ], minimum_should_match=1, ) diff --git a/akarpov/music/api/views.py b/akarpov/music/api/views.py index 1132c44..1648f4b 100644 --- a/akarpov/music/api/views.py +++ b/akarpov/music/api/views.py @@ -25,6 +25,7 @@ SongUserRating, UserListenHistory, ) +from akarpov.music.services.search import search_song from akarpov.music.tasks import listen_to_song @@ -83,7 +84,11 @@ class ListCreateSongAPIView(LikedSongsContextMixin, generics.ListCreateAPIView): pagination_class = StandardResultsSetPagination def get_queryset(self): - qs = Song.objects.cache() + search = self.request.query_params.get("search", None) + if search: + qs = search_song(search) + else: + qs = Song.objects.cache() if "sort" in self.request.query_params: sorts = self.request.query_params["sort"].split(",") @@ -111,6 +116,12 @@ def get_queryset(self): @extend_schema( parameters=[ + OpenApiParameter( + name="search", + description="Search query", + required=False, + type=str, + ), OpenApiParameter( name="sort", description="Sorting algorithm", diff --git a/akarpov/music/documents.py b/akarpov/music/documents.py new file mode 100644 index 0000000..7c7bc65 --- /dev/null +++ b/akarpov/music/documents.py @@ -0,0 +1,67 @@ +from django_elasticsearch_dsl import Document, fields +from django_elasticsearch_dsl.registries import registry + +from akarpov.music.models import Song + + +@registry.register_document +class SongDocument(Document): + authors = fields.NestedField( + attr="authors", + properties={ + "name": fields.TextField( + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ), + "link": fields.TextField(), + "meta": fields.ObjectField(dynamic=True), + }, + ) + + album = fields.NestedField( + attr="album", + properties={ + "name": fields.TextField( + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ), + "link": fields.TextField(), + "meta": fields.ObjectField(dynamic=True), + }, + ) + + name = fields.TextField( + attr="name", + fields={ + "raw": fields.KeywordField(normalizer="lowercase"), + }, + ) + + meta = fields.ObjectField(dynamic=True) # Added meta field here as dynamic object + + class Index: + name = "songs" + settings = {"number_of_shards": 1, "number_of_replicas": 0} + # settings = { + # "number_of_shards": 1, + # "number_of_replicas": 0, + # "analysis": { + # "analyzer": { + # "russian_icu": { + # "type": "custom", + # "tokenizer": "icu_tokenizer", + # "filter": ["icu_folding","icu_normalizer"] + # } + # } + # } + # } TODO + + class Django: + model = Song + + def get_instances_from_related(self, related_instance): + if isinstance(related_instance, Song): + return related_instance.album + return related_instance.songs.all() diff --git a/akarpov/music/services/search.py b/akarpov/music/services/search.py new file mode 100644 index 0000000..b34ab17 --- /dev/null +++ b/akarpov/music/services/search.py @@ -0,0 +1,47 @@ +from django.db.models import Case, When +from elasticsearch_dsl import Q as ES_Q + +from akarpov.music.documents import SongDocument +from akarpov.music.models import Song + + +def search_song(query): + search = SongDocument.search() + search_query = ES_Q( + "bool", + should=[ + ES_Q( + "multi_match", + query=query, + fields=["name^3", "authors.name^2", "album.name"], + fuzziness="AUTO", + ), # Change here + ES_Q("wildcard", name__raw=f"*{query.lower()}*"), + ES_Q( + "nested", + path="authors", + query=ES_Q("wildcard", authors__name__raw=f"*{query.lower()}*"), + ), + ES_Q( + "nested", + path="album", + query=ES_Q("wildcard", album__name__raw=f"*{query.lower()}*"), + ), + ], + minimum_should_match=1, + ) + + search = search.query(search_query) + + response = search.execute() + + # Check for hits and get song instances + if response.hits: + hit_ids = [hit.meta.id for hit in response.hits] + songs = Song.objects.filter(id__in=hit_ids).order_by( + Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) + ) + + return songs + + return Song.objects.none()