Compare commits

...

2 Commits

Author SHA1 Message Date
6c15494aab updated song search 2024-12-05 19:02:03 +03:00
bf182dbd0a fixed song search 2024-12-05 18:45:59 +03:00
2 changed files with 69 additions and 37 deletions

View File

@ -11,7 +11,7 @@ class SongDocument(Document):
properties={ properties={
"name": fields.TextField( "name": fields.TextField(
fields={ fields={
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase_normalizer"),
}, },
), ),
"name_transliterated": fields.TextField( "name_transliterated": fields.TextField(
@ -30,7 +30,7 @@ class SongDocument(Document):
properties={ properties={
"name": fields.TextField( "name": fields.TextField(
fields={ fields={
"raw": fields.KeywordField(normalizer="lowercase"), "raw": fields.KeywordField(normalizer="lowercase_normalizer"),
}, },
), ),
"name_transliterated": fields.TextField( "name_transliterated": fields.TextField(
@ -67,14 +67,14 @@ class Index:
settings = { settings = {
"number_of_shards": 1, "number_of_shards": 1,
"number_of_replicas": 0, "number_of_replicas": 0,
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase"],
}
},
"analysis": { "analysis": {
"normalizer": {
"lowercase_normalizer": {
"type": "custom",
"char_filter": [],
"filter": ["lowercase"],
}
},
"filter": { "filter": {
"my_transliterator": { "my_transliterator": {
"type": "icu_transform", "type": "icu_transform",
@ -151,8 +151,8 @@ class Index:
"filter": [ "filter": [
"lowercase", "lowercase",
"autocomplete_filter", "autocomplete_filter",
"english_stemmer", # Apply English stemming for autocomplete "english_stemmer",
"russian_stemmer", # Include Russian stemming if applicable "russian_stemmer",
], ],
}, },
"search_synonym_with_stemming": { "search_synonym_with_stemming": {
@ -161,8 +161,8 @@ class Index:
"filter": [ "filter": [
"lowercase", "lowercase",
"synonym_filter", "synonym_filter",
"english_stemmer", # Apply English stemming for synonym search "english_stemmer",
"russian_stemmer", # Include Russian stemming if processing Russian synonyms "russian_stemmer",
], ],
}, },
}, },
@ -183,7 +183,7 @@ class AuthorDocument(Document):
name = fields.TextField( name = fields.TextField(
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"), "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
}, },
) )
name_transliterated = fields.TextField( name_transliterated = fields.TextField(
@ -198,7 +198,7 @@ class AuthorDocument(Document):
class Index: class Index:
name = "authors" name = "authors"
settings = SongDocument.Index.settings # Reuse settings settings = SongDocument.Index.settings
class Django: class Django:
model = Author model = Author
@ -209,7 +209,7 @@ class AlbumDocument(Document):
name = fields.TextField( name = fields.TextField(
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase"), "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
}, },
) )
name_transliterated = fields.TextField( name_transliterated = fields.TextField(
@ -243,7 +243,7 @@ class AlbumDocument(Document):
class Index: class Index:
name = "albums" name = "albums"
settings = SongDocument.Index.settings # Reuse settings settings = SongDocument.Index.settings
class Django: class Django:
model = Album model = Album

View File

@ -10,28 +10,60 @@
def search_song(query): def search_song(query):
search = SongDocument.search() search = SongDocument.search()
# Build a multi_match query that searches in song name, authors' names, and album names # Split the query into words
multi_match_query = ES_Q( terms = query.strip().split()
"multi_match",
query=query, # Initialize must and should clauses
fields=[ must_clauses = []
"name^5", should_clauses = []
"name.raw^10",
"name.exact^15", # Build queries for song names
"authors.name^4", song_name_queries = [
"authors.name.raw^8", ES_Q("match_phrase", name={"query": query, "boost": 5}),
"authors.name.exact^12", ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
"album.name^3", ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
"album.name.raw^6", ]
"album.name.exact^9",
], # Build queries for author names
fuzziness="AUTO", author_name_queries = [
operator="and", ES_Q(
type="best_fields", "nested",
) path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 5}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
),
]
# If the query contains multiple terms, assume it might include both song and author names
if len(terms) > 1:
# Build combined queries
must_clauses.extend(
[
ES_Q("bool", should=song_name_queries),
ES_Q("bool", should=author_name_queries),
]
)
else:
# If single term, search both song and author names but with lower boost
should_clauses.extend(song_name_queries + author_name_queries)
# Combine must and should clauses
if must_clauses:
search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
else:
search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)
# Execute search with size limit # Execute search with size limit
search = search.query(multi_match_query).extra(size=20) search = search.query(search_query).extra(size=20)
response = search.execute() response = search.execute()
if response.hits: if response.hits: