Compare commits

..

No commits in common. "6c15494aab9bf48ce58dab0feee71603e55c5d3b" and "f2682120945c3bb125e45a0636baa8541afaaf4e" have entirely different histories.

2 changed files with 37 additions and 69 deletions

View File

@ -11,7 +11,7 @@ class SongDocument(Document):
properties={ properties={
"name": fields.TextField( "name": fields.TextField(
fields={ fields={
"raw": fields.KeywordField(normalizer="lowercase_normalizer"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField( "name_transliterated": fields.TextField(
@ -30,7 +30,7 @@ class SongDocument(Document):
properties={ properties={
"name": fields.TextField( "name": fields.TextField(
fields={ fields={
"raw": fields.KeywordField(normalizer="lowercase_normalizer"), "raw": fields.KeywordField(normalizer="lowercase"),
}, },
), ),
"name_transliterated": fields.TextField( "name_transliterated": fields.TextField(
@ -67,7 +67,6 @@ class Index:
settings = { settings = {
"number_of_shards": 1, "number_of_shards": 1,
"number_of_replicas": 0, "number_of_replicas": 0,
"analysis": {
"normalizer": { "normalizer": {
"lowercase_normalizer": { "lowercase_normalizer": {
"type": "custom", "type": "custom",
@ -75,6 +74,7 @@ class Index:
"filter": ["lowercase"], "filter": ["lowercase"],
} }
}, },
"analysis": {
"filter": { "filter": {
"my_transliterator": { "my_transliterator": {
"type": "icu_transform", "type": "icu_transform",
@ -151,8 +151,8 @@ class Index:
"filter": [ "filter": [
"lowercase", "lowercase",
"autocomplete_filter", "autocomplete_filter",
"english_stemmer", "english_stemmer", # Apply English stemming for autocomplete
"russian_stemmer", "russian_stemmer", # Include Russian stemming if applicable
], ],
}, },
"search_synonym_with_stemming": { "search_synonym_with_stemming": {
@ -161,8 +161,8 @@ class Index:
"filter": [ "filter": [
"lowercase", "lowercase",
"synonym_filter", "synonym_filter",
"english_stemmer", "english_stemmer", # Apply English stemming for synonym search
"russian_stemmer", "russian_stemmer", # Include Russian stemming if processing Russian synonyms
], ],
}, },
}, },
@ -183,7 +183,7 @@ class AuthorDocument(Document):
name = fields.TextField( name = fields.TextField(
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase_normalizer"), "exact": fields.KeywordField(normalizer="lowercase"),
}, },
) )
name_transliterated = fields.TextField( name_transliterated = fields.TextField(
@ -198,7 +198,7 @@ class AuthorDocument(Document):
class Index: class Index:
name = "authors" name = "authors"
settings = SongDocument.Index.settings settings = SongDocument.Index.settings # Reuse settings
class Django: class Django:
model = Author model = Author
@ -209,7 +209,7 @@ class AlbumDocument(Document):
name = fields.TextField( name = fields.TextField(
fields={ fields={
"raw": fields.KeywordField(), "raw": fields.KeywordField(),
"exact": fields.KeywordField(normalizer="lowercase_normalizer"), "exact": fields.KeywordField(normalizer="lowercase"),
}, },
) )
name_transliterated = fields.TextField( name_transliterated = fields.TextField(
@ -243,7 +243,7 @@ class AlbumDocument(Document):
class Index: class Index:
name = "albums" name = "albums"
settings = SongDocument.Index.settings settings = SongDocument.Index.settings # Reuse settings
class Django: class Django:
model = Album model = Album

View File

@ -10,60 +10,28 @@
def search_song(query): def search_song(query):
search = SongDocument.search() search = SongDocument.search()
# Split the query into words # Build a multi_match query that searches in song name, authors' names, and album names
terms = query.strip().split() multi_match_query = ES_Q(
"multi_match",
# Initialize must and should clauses query=query,
must_clauses = [] fields=[
should_clauses = [] "name^5",
"name.raw^10",
# Build queries for song names "name.exact^15",
song_name_queries = [ "authors.name^4",
ES_Q("match_phrase", name={"query": query, "boost": 5}), "authors.name.raw^8",
ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}), "authors.name.exact^12",
ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}), "album.name^3",
] "album.name.raw^6",
"album.name.exact^9",
# Build queries for author names ],
author_name_queries = [ fuzziness="AUTO",
ES_Q( operator="and",
"nested", type="best_fields",
path="authors",
query=ES_Q("match_phrase", name={"query": query, "boost": 5}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
),
ES_Q(
"nested",
path="authors",
query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
),
]
# If the query contains multiple terms, assume it might include both song and author names
if len(terms) > 1:
# Build combined queries
must_clauses.extend(
[
ES_Q("bool", should=song_name_queries),
ES_Q("bool", should=author_name_queries),
]
) )
else:
# If single term, search both song and author names but with lower boost
should_clauses.extend(song_name_queries + author_name_queries)
# Combine must and should clauses
if must_clauses:
search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
else:
search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)
# Execute search with size limit # Execute search with size limit
search = search.query(search_query).extra(size=20) search = search.query(multi_match_query).extra(size=20)
response = search.execute() response = search.execute()
if response.hits: if response.hits: