From da2b9e16d41d68015c70788ec28dea6ae2643c65 Mon Sep 17 00:00:00 2001
From: Alexandr Karpov <alexandr.d.karpov@gmail.com>
Date: Sat, 22 Oct 2022 19:23:25 +0300
Subject: [PATCH] updated score algoth

---
 app/search/models.py               |  1 -
 app/search/services/search.py      | 64 +++++++++++++++++-------------
 app/search/services/spell_check.py | 16 +++++---
 3 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/app/search/models.py b/app/search/models.py
index 456b0b8..44375cc 100644
--- a/app/search/models.py
+++ b/app/search/models.py
@@ -78,7 +78,6 @@ class Product(models.Model):
 
     class Meta:
         db_table = "product"
-        ordering = ["-score"]
 
 
 class ProductCharacteristic(models.Model):
diff --git a/app/search/services/search.py b/app/search/services/search.py
index 8e51033..7216217 100644
--- a/app/search/services/search.py
+++ b/app/search/services/search.py
@@ -1,7 +1,3 @@
-import string
-
-from django.db.models import QuerySet
-
 from search.models import (
     Product,
     Characteristic,
@@ -13,7 +9,7 @@ from search.models import (
 from typing import List
 
 from search.services.hints import get_hints
-from search.services.spell_check import spell_check_ru as spell_check
+from search.services.spell_check import spell_check_ru as spell_check, lemmatize
 
 
 def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
@@ -45,43 +41,55 @@ def _clean_text(text: str) -> List[str]:
     for st in [".", ",", "!", "?"]:
         text = text.replace(st, " ")
     text = text.split()
-    return text
-
-
-def apply_qs_search(qs: Product.objects, text: str):
-    text = _clean_text(text)
-    words = Product.objects.none()
+    re = []
     for word in text:
-        words = (
-            words
+        re.append(lemmatize(word))
+    return re
+
+
+def apply_qs_search(text: str):
+    text = _clean_text(text)
+    products = Product.objects.none()
+    for word in text:
+        products = (
+            products
             | Product.objects.filter(name__unaccent__trigram_similar=word)
             | Product.objects.filter(name__unaccent__icontains=word)
         )
-    print(words)
-    qs = qs | words
-    print(qs)
-    return qs
+    products = products.order_by("-score")
+    print(products)
+    return products
 
 
 def apply_all_qs_search(orig_qs, text: str):
     # words
-    qs = apply_qs_search(Product.objects.none(), text)
+    qs = apply_qs_search(text)
     text = _clean_text(text)
 
     # categories
     cats = Category.objects.none()
     for word in text:
         cats = cats | cats.filter(name__icontains=word)
-    qs = qs | Product.objects.filter(category__in=cats)
+    qs = Product.objects.filter(category__in=cats).order_by("-score") | qs
 
     # characteristics
     chars = Characteristic.objects.none()
     for word in text:
-        chars = chars | chars.filter(
-            value__icontains=word,
+        chars = (
+            chars
+            | Characteristic.objects.filter(
+                value__icontains=word,
+            )
+            | Characteristic.objects.filter(
+                value__unaccent__trigram_similar=word,
+            )
         )
-    qs = qs | Product.objects.filter(characteristics__characteristic__in=chars)
-    # print(qs)
+    qs = (
+        Product.objects.filter(characteristics__characteristic__in=chars).order_by(
+            "-score"
+        )
+        | qs
+    )
 
     return qs & orig_qs
 
@@ -164,19 +172,22 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
         typ = x["type"]
         val = x["value"]
         if typ == "Name":
-            qs = apply_qs_search(qs, val)
+            qs = qs & apply_qs_search(val)
+            qs = qs.order_by("-score")
         elif typ == "All":
             qs = apply_all_qs_search(qs, val)
         elif typ == "Category":
             qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter(
                 category__name__icontains=val
             )
+            qs = qs.order_by("-score")
         elif typ == "Characteristic":
             char = ProductCharacteristic.objects.filter(product__in=qs)
             char = char.filter(characteristic__value__icontains=val) | char.filter(
                 characteristic__value__unaccent__trigram_similar=val
             )
             qs = qs.filter(characteristics__in=char)
+            qs = qs.order_by("-score")
         elif typ == "Unknown":
             continue
         else:
@@ -184,7 +195,4 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
                 qs = qs.filter(unit_characteristics__in=val)
             else:
                 qs = qs.filter(characteristics__in=val)
-    return [
-        x.serialize_self()
-        for x in qs.distinct().order_by("-score")[offset : offset + limit]
-    ]
+    return [x.serialize_self() for x in qs.distinct()[offset : offset + limit]]
diff --git a/app/search/services/spell_check.py b/app/search/services/spell_check.py
index 34446d0..4a11556 100644
--- a/app/search/services/spell_check.py
+++ b/app/search/services/spell_check.py
@@ -1,18 +1,24 @@
-import requests as r
+import pymorphy2
 from spellchecker import SpellChecker
 
-speller_ru = SpellChecker(language='ru')
-speller_eng = SpellChecker(language='en')
+speller_ru = SpellChecker(language="ru")
+speller_eng = SpellChecker(language="en")
 
 
 def spell_check_ru(word: str) -> str:
     res = speller_ru.correction(word)
-    if not len(res): 
+    if not res or not len(res):
         return word
     return res
 
+
 def spell_check_en(word: str) -> str:
     res = speller_eng.correction(word)
-    if not len(res): 
+    if not res or not len(res):
         return word
     return res
+
+
+def lemmatize(word):
+    p = pymorphy2.MorphAnalyzer().parse(word)[0]
+    return p.normal_form