diff --git a/akarpov/music/documents.py b/akarpov/music/documents.py index 32969b4..3babf0e 100644 --- a/akarpov/music/documents.py +++ b/akarpov/music/documents.py @@ -21,7 +21,13 @@ class SongDocument(Document): }, ), "link": fields.TextField(), - "meta": fields.ObjectField(dynamic=True), + "meta": fields.ObjectField( + dynamic=True, + properties={ + "genre": fields.TextField(), + "release_year": fields.KeywordField(), + }, + ), }, ) @@ -40,7 +46,13 @@ class SongDocument(Document): }, ), "link": fields.TextField(), - "meta": fields.ObjectField(dynamic=True), + "meta": fields.ObjectField( + dynamic=True, + properties={ + "genre": fields.TextField(), + "release_year": fields.KeywordField(), + }, + ), }, ) @@ -60,7 +72,13 @@ class SongDocument(Document): ) suggest = fields.CompletionField() - meta = fields.ObjectField(dynamic=True) + meta = fields.ObjectField( + dynamic=True, + properties={ + "genre": fields.TextField(), + "release_year": fields.KeywordField(), + }, + ) class Index: name = "songs" @@ -194,7 +212,14 @@ class AuthorDocument(Document): }, ) suggest = fields.CompletionField() - meta = fields.ObjectField(dynamic=True) + meta = fields.ObjectField( + dynamic=True, + properties={ + "description": fields.TextField(), + # Ensure no empty date fields here either + "popularity": fields.IntegerField(), + }, + ) class Index: name = "authors" @@ -220,7 +245,13 @@ class AlbumDocument(Document): }, ) suggest = fields.CompletionField() - meta = fields.ObjectField(dynamic=True) + meta = fields.ObjectField( + dynamic=True, + properties={ + "genre": fields.TextField(), + "release_year": fields.KeywordField(), + }, + ) authors = fields.NestedField( attr="authors", properties={ @@ -230,7 +261,6 @@ class AlbumDocument(Document): }, ), "name_transliterated": fields.TextField( - attr="name", analyzer="transliterate", fields={ "raw": fields.KeywordField(), diff --git a/akarpov/music/services/search.py b/akarpov/music/services/search.py index c9b069c..ef9a3d1 100644 --- a/akarpov/music/services/search.py +++ b/akarpov/music/services/search.py @@ -8,59 +8,83 @@ def search_song(query): + if not query: + return Song.objects.none() + search = SongDocument.search() - # Split the query into words - terms = query.strip().split() + # Priorities: + # 1. Exact phrase matches in name, author name, album name + # 2. Part of author/album name + # 3. Exact name (exact matches) + # 4. Fuzzy matches + # 5. Wildcards - # Initialize must and should clauses - must_clauses = [] - should_clauses = [] + # phrase matches (highest priority) + phrase_queries = [ + ES_Q("match_phrase", name={"query": query, "boost": 10}), + ES_Q( + "nested", + path="authors", + query=ES_Q("match_phrase", authors__name={"query": query, "boost": 9}), + ), + ES_Q( + "nested", + path="album", + query=ES_Q("match_phrase", album__name={"query": query, "boost": 9}), + ), + ] - # Build queries for song names - song_name_queries = [ - ES_Q("match_phrase", name={"query": query, "boost": 5}), - ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}), + # exact keyword matches (non-case sensitive due to normalizers) + exact_queries = [ + ES_Q("term", **{"name.exact": {"value": query.lower(), "boost": 8}}) + ] + + # fuzzy matches + fuzzy_queries = [ + ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 5}), + ES_Q( + "nested", + path="authors", + query=ES_Q( + "match", authors__name={"query": query, "fuzziness": "AUTO", "boost": 4} + ), + ), + ES_Q( + "nested", + path="album", + query=ES_Q( + "match", album__name={"query": query, "fuzziness": "AUTO", "boost": 4} + ), + ), + ] + + # wildcard matches + wildcard_queries = [ ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}), - ] - - # Build queries for author names - author_name_queries = [ ES_Q( "nested", path="authors", - query=ES_Q("match_phrase", name={"query": query, "boost": 5}), + query=ES_Q( + "wildcard", authors__name={"value": f"*{query.lower()}*", "boost": 2} + ), ), ES_Q( "nested", - path="authors", - query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}), - ), - ES_Q( - "nested", - path="authors", - query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}), + path="album", + query=ES_Q( + "wildcard", album__name={"value": f"*{query.lower()}*", "boost": 2} + ), ), ] - # If the query contains multiple terms, assume it might include both song and author names - if len(terms) > 1: - # Build combined queries - must_clauses.extend( - [ - ES_Q("bool", should=song_name_queries), - ES_Q("bool", should=author_name_queries), - ] - ) - else: - # If single term, search both song and author names but with lower boost - should_clauses.extend(song_name_queries + author_name_queries) - - # Combine must and should clauses - if must_clauses: - search_query = ES_Q("bool", must=must_clauses, should=should_clauses) - else: - search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1) + # Combine queries + # We'll use a should query to incorporate all of these, relying on boosting + search_query = ES_Q( + "bool", + should=phrase_queries + exact_queries + fuzzy_queries + wildcard_queries, + minimum_should_match=1, + ) # Execute search with size limit search = search.query(search_query).extra(size=20) @@ -98,8 +122,9 @@ def bulk_update_index(model_class): def search_author(query): + if not query: + return Author.objects.none() search = AuthorDocument.search() - should_queries = [ ES_Q("match_phrase", name={"query": query, "boost": 5}), ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}), @@ -109,7 +134,6 @@ def search_author(query): name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, ), ] - search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search = search.query(search_query).extra(size=10) response = search.execute() @@ -120,11 +144,12 @@ def search_author(query): Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) ) return authors - return Author.objects.none() def search_album(query): + if not query: + return Album.objects.none() search = AlbumDocument.search() should_queries = [ @@ -136,7 +161,6 @@ def search_album(query): name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, ), ] - search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search = search.query(search_query).extra(size=10) response = search.execute() @@ -147,5 +171,4 @@ def search_album(query): Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) ) return albums - return Album.objects.none()