updated song search

fixed song search
2025-09-25 21:36:33 +03:00 · 2024-12-05 19:02:03 +03:00 · 2024-12-05 18:45:59 +03:00
2 changed files with 69 additions and 37 deletions
--- a/akarpov/music/documents.py
+++ b/akarpov/music/documents.py
@ -11,7 +11,7 @@ class SongDocument(Document):
        properties={
            "name": fields.TextField(
                fields={
-                    "raw": fields.KeywordField(normalizer="lowercase"),
+                    "raw": fields.KeywordField(normalizer="lowercase_normalizer"),
                },
            ),
            "name_transliterated": fields.TextField(
@ -30,7 +30,7 @@ class SongDocument(Document):
        properties={
            "name": fields.TextField(
                fields={
-                    "raw": fields.KeywordField(normalizer="lowercase"),
+                    "raw": fields.KeywordField(normalizer="lowercase_normalizer"),
                },
            ),
            "name_transliterated": fields.TextField(
@ -67,14 +67,14 @@ class Index:
        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 0,
-            "normalizer": {
-                "lowercase_normalizer": {
-                    "type": "custom",
-                    "char_filter": [],
-                    "filter": ["lowercase"],
-                }
-            },
            "analysis": {
+                "normalizer": {
+                    "lowercase_normalizer": {
+                        "type": "custom",
+                        "char_filter": [],
+                        "filter": ["lowercase"],
+                    }
+                },
                "filter": {
                    "my_transliterator": {
                        "type": "icu_transform",
@ -151,8 +151,8 @@ class Index:
                        "filter": [
                            "lowercase",
                            "autocomplete_filter",
-                            "english_stemmer",  # Apply English stemming for autocomplete
-                            "russian_stemmer",  # Include Russian stemming if applicable
+                            "english_stemmer",
+                            "russian_stemmer",
                        ],
                    },
                    "search_synonym_with_stemming": {
@ -161,8 +161,8 @@ class Index:
                        "filter": [
                            "lowercase",
                            "synonym_filter",
-                            "english_stemmer",  # Apply English stemming for synonym search
-                            "russian_stemmer",  # Include Russian stemming if processing Russian synonyms
+                            "english_stemmer",
+                            "russian_stemmer",
                        ],
                    },
                },
@ -183,7 +183,7 @@ class AuthorDocument(Document):
    name = fields.TextField(
        fields={
            "raw": fields.KeywordField(),
-            "exact": fields.KeywordField(normalizer="lowercase"),
+            "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
        },
    )
    name_transliterated = fields.TextField(
@ -198,7 +198,7 @@ class AuthorDocument(Document):

    class Index:
        name = "authors"
-        settings = SongDocument.Index.settings  # Reuse settings
+        settings = SongDocument.Index.settings

    class Django:
        model = Author
@ -209,7 +209,7 @@ class AlbumDocument(Document):
    name = fields.TextField(
        fields={
            "raw": fields.KeywordField(),
-            "exact": fields.KeywordField(normalizer="lowercase"),
+            "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
        },
    )
    name_transliterated = fields.TextField(
@ -243,7 +243,7 @@ class AlbumDocument(Document):

    class Index:
        name = "albums"
-        settings = SongDocument.Index.settings  # Reuse settings
+        settings = SongDocument.Index.settings

    class Django:
        model = Album
--- a/akarpov/music/services/search.py
+++ b/akarpov/music/services/search.py
@ -10,28 +10,60 @@
 def search_song(query):
    search = SongDocument.search()

-    # Build a multi_match query that searches in song name, authors' names, and album names
-    multi_match_query = ES_Q(
-        "multi_match",
-        query=query,
-        fields=[
-            "name^5",
-            "name.raw^10",
-            "name.exact^15",
-            "authors.name^4",
-            "authors.name.raw^8",
-            "authors.name.exact^12",
-            "album.name^3",
-            "album.name.raw^6",
-            "album.name.exact^9",
-        ],
-        fuzziness="AUTO",
-        operator="and",
-        type="best_fields",
-    )
+    # Split the query into words
+    terms = query.strip().split()
+
+    # Initialize must and should clauses
+    must_clauses = []
+    should_clauses = []
+
+    # Build queries for song names
+    song_name_queries = [
+        ES_Q("match_phrase", name={"query": query, "boost": 5}),
+        ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
+        ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
+    ]
+
+    # Build queries for author names
+    author_name_queries = [
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q("match_phrase", name={"query": query, "boost": 5}),
+        ),
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 4}),
+        ),
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
+        ),
+    ]
+
+    # If the query contains multiple terms, assume it might include both song and author names
+    if len(terms) > 1:
+        # Build combined queries
+        must_clauses.extend(
+            [
+                ES_Q("bool", should=song_name_queries),
+                ES_Q("bool", should=author_name_queries),
+            ]
+        )
+    else:
+        # If single term, search both song and author names but with lower boost
+        should_clauses.extend(song_name_queries + author_name_queries)
+
+    # Combine must and should clauses
+    if must_clauses:
+        search_query = ES_Q("bool", must=must_clauses, should=should_clauses)
+    else:
+        search_query = ES_Q("bool", should=should_clauses, minimum_should_match=1)

    # Execute search with size limit
-    search = search.query(multi_match_query).extra(size=20)
+    search = search.query(search_query).extra(size=20)
    response = search.execute()

    if response.hits:
Author	SHA1	Message	Date
sanspie	6c15494aab	updated song search	2024-12-05 19:02:03 +03:00
sanspie	bf182dbd0a	fixed song search	2024-12-05 18:45:59 +03:00