update search

2026-01-07 06:50:46 +03:00 · 2025-11-11 19:06:23 +03:00 · 2025-11-11 19:06:23 +03:00 · 00e337f2fd
commit 00e337f2fd
parent 558c749d23
3 changed files with 174 additions and 28 deletions
--- a/akarpov/common/models.py
+++ b/akarpov/common/models.py
@ -35,7 +35,7 @@ def _generate_charset():
            return generate_charset(private_slug_length)
        return generate_charset(slug_length)

-    if instance.id is None:
+    if instance.id is None and not instance.slug:
        model = sender
        slug_length = 5
        private_slug_length = 20
@ -63,6 +63,9 @@ class SlugModel(models.Model):
    """
    model to store and generate slug for model instances
    for custom slug length use: slug_length, private_slug_length Meta options
+
+    If a slug is already set when creating the instance, it will be preserved.
+    Random slug generation only occurs when the instance is new and has no slug.
    """

    slug = models.SlugField(max_length=20, blank=True, unique=True, db_index=True)
--- a/akarpov/music/documents.py
+++ b/akarpov/music/documents.py
@ -70,6 +70,14 @@ class SongDocument(Document):
            "raw": fields.KeywordField(),
        },
    )
+    # New slug field for searchable transliterated title
+    slug = fields.TextField(
+        attr="slug",
+        fields={
+            "raw": fields.KeywordField(),
+            "exact": fields.KeywordField(normalizer="lowercase_normalizer"),
+        },
+    )
    suggest = fields.CompletionField()

    meta = fields.ObjectField(
--- a/akarpov/music/services/search.py
+++ b/akarpov/music/services/search.py
@ -12,91 +12,226 @@ def search_song(query):
        return Song.objects.none()

    search = SongDocument.search()
+    query = query.strip()
+    terms = query.split()

    # Priorities:
-    # 1. Exact phrase matches in name, author name, album name
-    # 2. Part of author/album name
-    # 3. Exact name (exact matches)
-    # 4. Fuzzy matches
-    # 5. Wildcards
+    # 1. Combined field matches (Song name + Author/Album) – highest priority
+    # 2. Exact phrase matches in name, author name, album name
+    # 3. Exact keyword matches (name.exact, slug.exact)
+    # 4. Fuzzy matches (name, authors, album, slug, including transliterated fields)
+    # 5. Wildcard matches (name, authors, album, slug, including transliterated fields)

-    # phrase matches (highest priority)
+    # Phrase matches (high priority for exact phrases in each field)
    phrase_queries = [
        ES_Q("match_phrase", name={"query": query, "boost": 10}),
        ES_Q(
            "nested",
            path="authors",
-            query=ES_Q("match_phrase", authors__name={"query": query, "boost": 9}),
+            query=ES_Q(
+                "match_phrase", **{"authors__name": {"query": query, "boost": 9}}
+            ),
        ),
        ES_Q(
            "nested",
            path="album",
-            query=ES_Q("match_phrase", album__name={"query": query, "boost": 9}),
+            query=ES_Q("match_phrase", **{"album__name": {"query": query, "boost": 9}}),
+        ),
+        # Include transliterated name and names for phrase matching
+        ES_Q("match_phrase", name_transliterated={"query": query, "boost": 10}),
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q(
+                "match_phrase",
+                **{"authors__name_transliterated": {"query": query, "boost": 8}},
+            ),
+        ),
+        ES_Q(
+            "nested",
+            path="album",
+            query=ES_Q(
+                "match_phrase",
+                **{"album__name_transliterated": {"query": query, "boost": 8}},
+            ),
        ),
    ]

-    # exact keyword matches (non-case sensitive due to normalizers)
+    # Exact keyword matches (case-insensitive exact matches)
    exact_queries = [
-        ES_Q("term", **{"name.exact": {"value": query.lower(), "boost": 8}})
+        ES_Q("term", **{"name.exact": {"value": query.lower(), "boost": 8}}),
+        ES_Q(
+            "term", **{"slug.exact": {"value": query.lower(), "boost": 15}}
+        ),  # exact slug match (highest boost)
    ]

-    # fuzzy matches
+    # Fuzzy matches (to catch typos or variations)
    fuzzy_queries = [
        ES_Q("match", name={"query": query, "fuzziness": "AUTO", "boost": 5}),
        ES_Q(
            "nested",
            path="authors",
            query=ES_Q(
-                "match", authors__name={"query": query, "fuzziness": "AUTO", "boost": 4}
+                "match",
+                **{"authors__name": {"query": query, "fuzziness": "AUTO", "boost": 4}},
            ),
        ),
        ES_Q(
            "nested",
            path="album",
            query=ES_Q(
-                "match", album__name={"query": query, "fuzziness": "AUTO", "boost": 4}
+                "match",
+                **{"album__name": {"query": query, "fuzziness": "AUTO", "boost": 4}},
+            ),
+        ),
+        ES_Q(
+            "match", slug={"query": query, "fuzziness": "AUTO", "boost": 5}
+        ),  # fuzzy on slug
+        # Fuzzy on transliterated fields
+        ES_Q(
+            "match",
+            name_transliterated={"query": query, "fuzziness": "AUTO", "boost": 4},
+        ),
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q(
+                "match",
+                **{
+                    "authors__name_transliterated": {
+                        "query": query,
+                        "fuzziness": "AUTO",
+                        "boost": 3,
+                    }
+                },
+            ),
+        ),
+        ES_Q(
+            "nested",
+            path="album",
+            query=ES_Q(
+                "match",
+                **{
+                    "album__name_transliterated": {
+                        "query": query,
+                        "fuzziness": "AUTO",
+                        "boost": 3,
+                    }
+                },
            ),
        ),
    ]

-    # wildcard matches
+    # Wildcard matches (partial substrings)
    wildcard_queries = [
        ES_Q("wildcard", name={"value": f"*{query.lower()}*", "boost": 2}),
        ES_Q(
            "nested",
            path="authors",
            query=ES_Q(
-                "wildcard", authors__name={"value": f"*{query.lower()}*", "boost": 2}
+                "wildcard",
+                **{"authors__name": {"value": f"*{query.lower()}*", "boost": 2}},
            ),
        ),
        ES_Q(
            "nested",
            path="album",
            query=ES_Q(
-                "wildcard", album__name={"value": f"*{query.lower()}*", "boost": 2}
+                "wildcard",
+                **{"album__name": {"value": f"*{query.lower()}*", "boost": 2}},
+            ),
+        ),
+        ES_Q("wildcard", slug={"value": f"*{query.lower()}*", "boost": 2}),
+        # Wildcard on transliterated fields
+        ES_Q(
+            "wildcard", name_transliterated={"value": f"*{query.lower()}*", "boost": 2}
+        ),
+        ES_Q(
+            "nested",
+            path="authors",
+            query=ES_Q(
+                "wildcard",
+                **{
+                    "authors__name_transliterated": {
+                        "value": f"*{query.lower()}*",
+                        "boost": 1,
+                    }
+                },
+            ),
+        ),
+        ES_Q(
+            "nested",
+            path="album",
+            query=ES_Q(
+                "wildcard",
+                **{
+                    "album__name_transliterated": {
+                        "value": f"*{query.lower()}*",
+                        "boost": 1,
+                    }
+                },
            ),
        ),
    ]

-    # Combine queries
-    # We'll use a should query to incorporate all of these, relying on boosting
-    search_query = ES_Q(
-        "bool",
-        should=phrase_queries + exact_queries + fuzzy_queries + wildcard_queries,
-        minimum_should_match=1,
-    )
+    # Combined field matches (song name + author/album terms) for multi-term queries
+    combined_queries = []
+    if len(terms) >= 2:
+        # If query has multiple words, require all terms across name and author fields (song title + author)
+        combined_queries.append(
+            ES_Q(
+                "multi_match",
+                query=query,
+                fields=["name", "authors.name"],
+                type="cross_fields",
+                operator="and",
+                boost=12,
+            )
+        )
+        # Song title + album combination
+        combined_queries.append(
+            ES_Q(
+                "multi_match",
+                query=query,
+                fields=["name", "album.name"],
+                type="cross_fields",
+                operator="and",
+                boost=11,
+            )
+        )
+    if len(terms) >= 3:
+        # If query has three or more terms, consider title+author+album all present
+        combined_queries.append(
+            ES_Q(
+                "multi_match",
+                query=query,
+                fields=["name", "authors.name", "album.name"],
+                type="cross_fields",
+                operator="and",
+                boost=13,
+            )
+        )

-    # Execute search with size limit
-    search = search.query(search_query).extra(size=20)
-    response = search.execute()
+    # Combine all queries using SHOULD (OR), so any can match, with boosts determining relevance
+    should_queries = (
+        phrase_queries
+        + exact_queries
+        + fuzzy_queries
+        + wildcard_queries
+        + combined_queries
+    )
+    search_query = ES_Q("bool", should=should_queries, minimum_should_match=1)
+
+    # Execute search with a reasonable limit
+    response = search.query(search_query).extra(size=20).execute()

    if response.hits:
+        # Preserve the search result ordering
        hit_ids = [hit.meta.id for hit in response.hits]
        songs = Song.objects.filter(id__in=hit_ids).order_by(
            Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(hit_ids)])
        )
        return songs
-
    return Song.objects.none()