updated song search

This commit is contained in:
Alexander Karpov 2024-12-06 01:19:23 +03:00
parent 6c15494aab
commit 0b439f43a5
2 changed files with 103 additions and 50 deletions

View File

@ -21,7 +21,13 @@ class SongDocument(Document):
}, },
), ),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
),
}, },
) )
@ -40,7 +46,13 @@ class SongDocument(Document):
}, },
), ),
"link": fields.TextField(), "link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True), "meta": fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
),
}, },
) )
@ -60,7 +72,13 @@ class SongDocument(Document):
) )
suggest = fields.CompletionField() suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True) meta = fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
)
class Index: class Index:
name = "songs" name = "songs"
@ -194,7 +212,14 @@ class AuthorDocument(Document):
}, },
) )
suggest = fields.CompletionField() suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True) meta = fields.ObjectField(
dynamic=True,
properties={
"description": fields.TextField(),
# Ensure no empty date fields here either
"popularity": fields.IntegerField(),
},
)
class Index: class Index:
name = "authors" name = "authors"
@ -220,7 +245,13 @@ class AlbumDocument(Document):
}, },
) )
suggest = fields.CompletionField() suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True) meta = fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
)
authors = fields.NestedField( authors = fields.NestedField(
attr="authors", attr="authors",
properties={ properties={
@ -230,7 +261,6 @@ class AlbumDocument(Document):
}, },
), ),
"name_transliterated": fields.TextField( "name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate", analyzer="transliterate",
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(),

View File

@ -8,59 +8,83 @@
def search_song(query): def search_song(query):
if not query:
return Song.objects.none()
search = SongDocument.search() search = SongDocument.search()
# Split the query into words # Priorities:
terms = query.strip().split() # 1. Exact phrase matches in name, author name, album name
# 2. Part of author/album name
# 3. Exact name (exact matches)
# 4. Fuzzy matches
# 5. Wildcards
# Initialize must and should clauses # phrase matches (highest priority)
must_clauses = [] phrase_queries = [
should_clauses = [] ES_Q("match_phrase", name={"query": query, "boost": 10}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", authors__name={"query": query, "boost": 9}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match_phrase", album__name={"query": query, "boost": 9}),
),
]
# Build queries for song names # exact keyword matches (non-case sensitive due to normalizers)
song_name_queries = [ exact_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}), ES_Q("term", **{"name.exact": {"value": query.lower(), "boost": 8}})
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}), ]
# fuzzy matches
fuzzy_queries = [
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 5}),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"match", authors__name={"query": query, "fuzziness": "AUTO", "boost": 4}
),
),
ES_Q(
"nested",
path="album",
query=ES_Q(
"match", album__name={"query": query, "fuzziness": "AUTO", "boost": 4}
),
),
]
# wildcard matches
wildcard_queries = [
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}), ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
]
# Build queries for author names
author_name_queries = [
ES_Q( ES_Q(
"nested", "nested",
path="authors", path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 5}), query=ES_Q(
"wildcard", authors__name={"value": f"*{query.lower()}*", "boost": 2}
),
), ),
ES_Q( ES_Q(
"nested", "nested",
path="authors", path="album",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}), query=ES_Q(
), "wildcard", album__name={"value": f"*{query.lower()}*", "boost": 2}
ES_Q( ),
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
), ),
] ]
# If the query contains multiple terms, assume it might include both song and author names # Combine queries
if len(terms) > 1: # We'll use a should query to incorporate all of these, relying on boosting
# Build combined queries search_query = ES_Q(
must_clauses.extend( "bool",
[ should=phrase_queries + exact_queries + fuzzy_queries + wildcard_queries,
ES_Q("bool", should=song_name_queries), minimum_should_match=1,
ES_Q("bool", should=author_name_queries), )
]
)
else:
# If single term, search both song and author names but with lower boost
should_clauses.extend(song_name_queries + author_name_queries)
# Combine must and should clauses
if must_clauses:
search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
else:
search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)
# Execute search with size limit # Execute search with size limit
search = search.query(search_query).extra(size=20) search = search.query(search_query).extra(size=20)
@ -98,8 +122,9 @@ def bulk_update_index(model_class):
def search_author(query): def search_author(query):
if not query:
return Author.objects.none()
search = AuthorDocument.search() search = AuthorDocument.search()
should_queries = [ should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}), ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}), ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
@ -109,7 +134,6 @@ def search_author(query):
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
), ),
] ]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10) search = search.query(search_query).extra(size=10)
response = search.execute() response = search.execute()
@ -120,11 +144,12 @@ def search_author(query):
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
) )
return authors return authors
return Author.objects.none() return Author.objects.none()
def search_album(query): def search_album(query):
if not query:
return Album.objects.none()
search = AlbumDocument.search() search = AlbumDocument.search()
should_queries = [ should_queries = [
@ -136,7 +161,6 @@ def search_album(query):
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1}, name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
), ),
] ]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1) search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10) search = search.query(search_query).extra(size=10)
response = search.execute() response = search.execute()
@ -147,5 +171,4 @@ def search_album(query):
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)]) Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
) )
return albums return albums
return Album.objects.none() return Album.objects.none()