Compare commits

...

2 Commits

Author SHA1 Message Date
6c15494aab updated song search 2024-12-05 19:02:03 +03:00
bf182dbd0a fixed song search 2024-12-05 18:45:59 +03:00
2 changed files with 69 additions and 37 deletions

View File

@ -11,7 +11,7 @@ class SongDocument(Document):
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
"raw": fields.KeywordField(normalizer="lowercase_normalizer"),
},
),
"name_transliterated": fields.TextField(
@ -30,7 +30,7 @@ class SongDocument(Document):
properties={
"name": fields.TextField(
fields={
"raw": fields.KeywordField(normalizer="lowercase"),
"raw": fields.KeywordField(normalizer="lowercase_normalizer"),
},
),
"name_transliterated": fields.TextField(
@ -67,14 +67,14 @@ class Index:
settings = {
"number_of_shards": 1,
"number_of_replicas": 0,
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase"],
}
},
"analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase"],
}
},
"filter": {
"my_transliterator": {
"type": "icu_transform",
@ -151,8 +151,8 @@ class Index:
"filter": [
"lowercase",
"autocomplete_filter",
"english_stemmer", # Apply English stemming for autocomplete
"russian_stemmer", # Include Russian stemming if applicable
"english_stemmer",
"russian_stemmer",
],
},
"search_synonym_with_stemming": {
@ -161,8 +161,8 @@ class Index:
"filter": [
"lowercase",
"synonym_filter",
"english_stemmer", # Apply English stemming for synonym search
"russian_stemmer", # Include Russian stemming if processing Russian synonyms
"english_stemmer",
"russian_stemmer",
],
},
},
@ -183,7 +183,7 @@ class AuthorDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
"exact": fields.KeywordField(normalizer="lowercase_normalizer"),
},
)
name_transliterated = fields.TextField(
@ -198,7 +198,7 @@ class AuthorDocument(Document):
class Index:
name = "authors"
settings = SongDocument.Index.settings # Reuse settings
settings = SongDocument.Index.settings
class Django:
model = Author
@ -209,7 +209,7 @@ class AlbumDocument(Document):
name = fields.TextField(
fields={
"raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"),
"exact": fields.KeywordField(normalizer="lowercase_normalizer"),
},
)
name_transliterated = fields.TextField(
@ -243,7 +243,7 @@ class AlbumDocument(Document):
class Index:
name = "albums"
settings = SongDocument.Index.settings # Reuse settings
settings = SongDocument.Index.settings
class Django:
model = Album

View File

@ -10,28 +10,60 @@
def search_song(query):
search = SongDocument.search()
# Build a multi_match query that searches in song name, authors' names, and album names
multi_match_query = ES_Q(
"multi_match",
query=query,
fields=[
"name^5",
"name.raw^10",
"name.exact^15",
"authors.name^4",
"authors.name.raw^8",
"authors.name.exact^12",
"album.name^3",
"album.name.raw^6",
"album.name.exact^9",
],
fuzziness="AUTO",
operator="and",
type="best_fields",
)
# Split the query into words
terms = query.strip().split()
# Initialize must and should clauses
must_clauses = []
should_clauses = []
# Build queries for song names
song_name_queries = [
ES_Q("match_phrase", name={"query": query, "boost": 5}),
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
]
# Build queries for author names
author_name_queries = [
ES_Q(
"nested",
path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 5}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
),
]
# If the query contains multiple terms, assume it might include both song and author names
if len(terms) > 1:
# Build combined queries
must_clauses.extend(
[
ES_Q("bool", should=song_name_queries),
ES_Q("bool", should=author_name_queries),
]
)
else:
# If single term, search both song and author names but with lower boost
should_clauses.extend(song_name_queries + author_name_queries)
# Combine must and should clauses
if must_clauses:
search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
else:
search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)
# Execute search with size limit
search = search.query(multi_match_query).extra(size=20)
search = search.query(search_query).extra(size=20)
response = search.execute()
if response.hits: