updated song search

This commit is contained in:
Alexander Karpov 2024-12-06 01:19:23 +03:00
parent 6c15494aab
commit 0b439f43a5
2 changed files with 103 additions and 50 deletions

View File

@ -21,7 +21,13 @@ class SongDocument(Document):
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
"meta": fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
),
},
)
@ -40,7 +46,13 @@ class SongDocument(Document):
},
),
"link": fields.TextField(),
"meta": fields.ObjectField(dynamic=True),
"meta": fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
),
},
)
@ -60,7 +72,13 @@ class SongDocument(Document):
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
meta = fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
)
class Index:
name = "songs"
@ -194,7 +212,14 @@ class AuthorDocument(Document):
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
meta = fields.ObjectField(
dynamic=True,
properties={
"description": fields.TextField(),
# Ensure no empty date fields here either
"popularity": fields.IntegerField(),
},
)
class Index:
name = "authors"
@ -220,7 +245,13 @@ class AlbumDocument(Document):
},
)
suggest = fields.CompletionField()
meta = fields.ObjectField(dynamic=True)
meta = fields.ObjectField(
dynamic=True,
properties={
"genre": fields.TextField(),
"release_year": fields.KeywordField(),
},
)
authors = fields.NestedField(
attr="authors",
properties={
@ -230,7 +261,6 @@ class AlbumDocument(Document):
},
),
"name_transliterated": fields.TextField(
attr="name",
analyzer="transliterate",
fields={
"raw": fields.KeywordField(),

View File

@ -8,59 +8,83 @@
def search_song(query):
if not query:
return Song.objects.none()
search = SongDocument.search()
# Split the query into words
terms = query.strip().split()
# Priorities:
# 1. Exact phrase matches in name, author name, album name
# 2. Part of author/album name
# 3. Exact name (exact matches)
# 4. Fuzzy matches
# 5. Wildcards
# Initialize must and should clauses
must_clauses = []
should_clauses = []
# phrase matches (highest priority)
phrase_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 10}),
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", authors__name={"query": query, "boost": 9}),
),
ES_Q(
"nested",
path="album",
query=ES_Q("match_phrase", album__name={"query": query, "boost": 9}),
),
]
# Build queries for song names
song_name_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
# exact keyword matches (non-case sensitive due to normalizers)
exact_queries = [
ES_Q("term", **{"name.exact": {"value": query.lower(), "boost": 8}})
]
# fuzzy matches
fuzzy_queries = [
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 5}),
ES_Q(
"nested",
path="authors",
query=ES_Q(
"match", authors__name={"query": query, "fuzziness": "AUTO", "boost": 4}
),
),
ES_Q(
"nested",
path="album",
query=ES_Q(
"match", album__name={"query": query, "fuzziness": "AUTO", "boost": 4}
),
),
]
# wildcard matches
wildcard_queries = [
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
]
# Build queries for author names
author_name_queries = [
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 5}),
query=ES_Q(
"wildcard", authors__name={"value": f"*{query.lower()}*", "boost": 2}
),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
path="album",
query=ES_Q(
"wildcard", album__name={"value": f"*{query.lower()}*", "boost": 2}
),
),
]
# If the query contains multiple terms, assume it might include both song and author names
if len(terms) > 1:
# Build combined queries
must_clauses.extend(
[
ES_Q("bool", should=song_name_queries),
ES_Q("bool", should=author_name_queries),
]
)
else:
# If single term, search both song and author names but with lower boost
should_clauses.extend(song_name_queries + author_name_queries)
# Combine must and should clauses
if must_clauses:
search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
else:
search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)
# Combine queries
# We'll use a should query to incorporate all of these, relying on boosting
search_query = ES_Q(
"bool",
should=phrase_queries + exact_queries + fuzzy_queries + wildcard_queries,
minimum_should_match=1,
)
# Execute search with size limit
search = search.query(search_query).extra(size=20)
@ -98,8 +122,9 @@ def bulk_update_index(model_class):
def search_author(query):
if not query:
return Author.objects.none()
search = AuthorDocument.search()
should_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 3}),
@ -109,7 +134,6 @@ def search_author(query):
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
@ -120,11 +144,12 @@ def search_author(query):
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return authors
return Author.objects.none()
def search_album(query):
if not query:
return Album.objects.none()
search = AlbumDocument.search()
should_queries = [
@ -136,7 +161,6 @@ def search_album(query):
name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 1},
),
]
search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
search = search.query(search_query).extra(size=10)
response = search.execute()
@ -147,5 +171,4 @@ def search_album(query):
Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
)
return albums
return Album.objects.none()