improved search: spelling, unit characteristic lookup, type gassing

2025-11-09 19:17:25 +03:00 · 2022-10-22 10:55:10 +03:00 · 2022-10-22 10:55:10 +03:00 · 8b5fddbd67
commit 8b5fddbd67
parent 497b930ca1
7 changed files with 119 additions and 28 deletions
--- a/app/conf/settings/base.py
+++ b/app/conf/settings/base.py
@ -214,3 +214,5 @@ REST_FRAMEWORK = {

 # django-cors-headers
 CORS_ALLOW_ALL_ORIGINS = True
+
+YANDEX_DICT_API_KEY = "dict.1.1.20221022T010312Z.55cce597a6cfa505.f416aba85e7642eedc1b76b8e21c06506fb17f83"
--- a/app/search/models.py
+++ b/app/search/models.py
@ -18,13 +18,14 @@ class Characteristic(models.Model):
 class UnitCharacteristic(models.Model):
    name = models.TextField("Имя", blank=False)
    value = models.TextField("Значение", blank=False)
+    numeric_value = models.IntegerField(default=0)
    unit = models.TextField("Размерность", blank=False)

    def __str__(self):
        return str(self.name)

    def serialize_self(self):
-        return {"name": self.name, "value": self.value, "unit": self.unit}
+        return {"name": self.name, "value": self.numeric_value, "unit": self.unit}

    class Meta:
        db_table = "unit_characteristic"
--- a/app/search/services/autocomplete_schema.py
+++ b/app/search/services/autocomplete_schema.py
@ -27,8 +27,8 @@ def autocomplete_schema(val: str):
    schema.extend(
        [
            {
-                "coordinate": char.name.index(val),
-                "value": {"type": char.name, "value": char.value},
+                "coordinate": char["name"].index(val),
+                "value": {"type": char["name"], "value": char["value"]},
            }
            for char in Characteristic.objects.filter(name__contains=val).values(
                "name", "value"
--- a/app/search/services/load_products.py
+++ b/app/search/services/load_products.py
@ -1,3 +1,4 @@
+import re
 from ast import literal_eval

 import pandas as pd
@ -59,3 +60,16 @@ def load_excel():
            # malformed node or string: nan \ duplicate key
            print("СКОРОСШИВАТЕЛЬ")
            continue
+
+
+def process_unit_character():
+    for el in UnitCharacteristic.objects.all():
+        nums = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", el.value)
+        if len(nums) != 1:
+            el.delete()
+        else:
+            try:
+                el.numeric_value = int(float(nums[0].replace(",", ".")))
+                el.save()
+            except ValueError:
+                el.delete()
--- a/app/search/services/search.py
+++ b/app/search/services/search.py
@ -1,32 +1,96 @@
-from search.models import Product, Characteristic, ProductCharacteristic
+import string
+
+from search.models import (
+    Product,
+    Characteristic,
+    ProductCharacteristic,
+    ProductUnitCharacteristic,
+    UnitCharacteristic,
+)
 from typing import List

+from search.services.hints import get_hints
+from search.services.spell_check import spell_check
+from search.services.translate import translate_en_ru, translate_ru_en

-def process_search(data: List[dict]) -> List[dict]:
+
+def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
+    if operation.startswith("<=") or operation.startswith("=<"):
+        return unit.filter(characteristic__numeric_value__lte=int(float(operation[:2])))
+    elif operation.startswith("=>") or operation.startswith(">="):
+        return unit.filter(characteristic__numeric_value__gte=int(float(operation[:2])))
+    elif operation.startswith(">"):
+        return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
+    elif operation.startswith("<"):
+        return unit.filter(characteristic__numeric_value__lt=int(float(operation[:1])))
+    elif operation.startswith("="):
+        return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
+    return unit
+
+
+def process_search(data: List[dict], limit=10, offset=0) -> List[dict]:
    prep_data = []
    prep_dict = {}
    prep_dict_char_type = {}
-
+    # --------------------------------------- prepare filters -------------------------------------------------------- #
    for x in data:
        dat = dict(x)
-        if x["type"] in ["Name", "Category", "Unknown"]:
-            prep_data.append(dat)
+        if x["type"] in ["Name", "Category"]:
+            prep_data.append(
+                {
+                    "type": dat["type"],
+                    "value": spell_check(
+                        dat["value"],
+                    ),
+                }
+            )
+        elif x["type"] == "Unknown":
+            type = get_hints(dat["value"])
+            prep_data.append(
+                {
+                    "type": type,
+                    "value": spell_check(
+                        dat["value"],
+                    ),
+                }
+            )
        else:
+            val = spell_check(
+                dat["value"],
+            )
            if x["type"] in list(prep_dict.keys()):
+                if x["type"].startswith("*"):
+                    unit = ProductUnitCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                    )
+                    prep_dict[x["type"]] = prep_dict[
+                        x["type"]
+                    ] | process_unit_operation(unit, x["value"])
+                else:
                    prep_dict[x["type"]] = prep_dict[
                        x["type"]
                    ] | ProductCharacteristic.objects.filter(
                        characteristic__in=prep_dict_char_type[x["type"]],
-                    characteristic__value__unaccent__trigram_similar=x["value"],
+                        characteristic__value__unaccent__trigram_similar=val,
                    )
+            else:
+                if x["type"].startswith("*"):
+                    prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter(
+                        name__unaccent__trigram_similar=x["type"]
+                    )
+                    unit = ProductUnitCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                    )
+                    prep_dict[x["type"]] = process_unit_operation(unit, x["value"])
                else:
                    prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
-                    name__contains=x["type"]
+                        name__unaccent__trigram_similar=x["type"]
                    )
                    prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
                        characteristic__in=prep_dict_char_type[x["type"]],
-                    characteristic__value__unaccent__trigram_similar=x["value"],
+                        characteristic__value__unaccent__trigram_similar=val,
                    )
+    # ----------------------------------- apply filters on QuerySet -------------------------------------------------- #
    for el, val in prep_dict.items():
        prep_data.append({"type": el, "value": val})
    qs = Product.objects.filter()
@ -38,7 +102,8 @@ def process_search(data: List[dict]) -> List[dict]:
        elif typ == "Category":
            qs = qs.filter(category__name__unaccent__trigram_similar=val)
        elif typ == "Unknown":
+            # add translate
            continue
        else:
            qs = qs.filter(characteristics__in=val)
-    return [x.serialize_self() for x in qs[:5]]
+    return [x.serialize_self() for x in qs[offset: offset + limit]]
--- a/app/search/services/spell_check.py
+++ b/app/search/services/spell_check.py
@ -2,5 +2,9 @@ import requests as r


 def spell_check(word: str) -> str:
-    res = r.get(f'https://speller.yandex.net/services/spellservice.json/checkText?text={word}')
-    return res.json()[0]['s'][0]
+    res = r.get(
+        f"https://speller.yandex.net/services/spellservice.json/checkText?text={word}"
+    )
+    if not res.json():
+        return word
+    return res.json()[0]["s"][0]
--- a/app/search/services/translate.py
+++ b/app/search/services/translate.py
@ -5,9 +5,14 @@ from typing import List


 def translate_ru_en(word: str) -> List[str]:
-    res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}")
-    return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
+    res = r.get(
+        f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}"
+    )
+    return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
+

 def translate_en_ru(word: str) -> List[str]:
-    res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}")
-    return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
+    res = r.get(
+        f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}"
+    )
+    return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]