Merge branch 'main' into speller-translate

2025-10-02 17:56:34 +03:00 · 2022-10-22 18:26:28 +03:00 · 2022-10-22 18:26:28 +03:00 · 84216870eb
commit 84216870eb
parent 4c913ba776 dff46d3717
12 changed files with 413 additions and 61 deletions
--- a/app/conf/api.py
+++ b/app/conf/api.py
@ -1,9 +1,15 @@
 from django.urls import path

-from search.api.views import SearchApi, HintApi, AutoCompleteApi
+from search.api.views import (
+    SearchApi,
+    HintApi,
+    AutoCompleteApi,
+    IncreaseProductScoreApi,
+)

 urlpatterns = [
    path("search", SearchApi.as_view(), name="search_api"),
-    path("hint", HintApi.as_view(), name="hint api"),
-    path('autocomplete_schema', AutoCompleteApi.as_view(), name='autocomplete api')
+    path("hint", HintApi.as_view(), name="hint_api"),
+    path("autocomplete_schema", AutoCompleteApi.as_view(), name="autocomplete_api"),
+    path("score/<int:pk>", IncreaseProductScoreApi.as_view(), name="score_api"),
 ]
--- a/app/conf/settings/base.py
+++ b/app/conf/settings/base.py
@ -63,6 +63,7 @@ DJANGO_APPS = [
    "django.contrib.humanize",
    "django.contrib.admin",
    "django.forms",
+    "django.contrib.postgres",
 ]
 THIRD_PARTY_APPS = ["rest_framework", "corsheaders", "drf_yasg"]

@ -216,3 +217,5 @@ REST_FRAMEWORK = {

 # django-cors-headers
 CORS_ALLOW_ALL_ORIGINS = True
+
+YANDEX_DICT_API_KEY = "dict.1.1.20221022T010312Z.55cce597a6cfa505.f416aba85e7642eedc1b76b8e21c06506fb17f83"
--- a/app/search/api/serializers.py
+++ b/app/search/api/serializers.py
@ -2,9 +2,21 @@ from rest_framework import serializers
 from django.core.validators import MinLengthValidator, MinValueValidator


+class QueryFilterSerializer(serializers.Serializer):
+    value = serializers.CharField(max_length=100)
+    type = serializers.CharField(max_length=100)
+
+    def create(self, validated_data):
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError
+

 class SearchSerializer(serializers.Serializer):
-    body = serializers.CharField(max_length=200)
+    body = serializers.ListSerializer(child=QueryFilterSerializer())
+    limit = serializers.IntegerField(default=5, min_value=1)
+    offset = serializers.IntegerField(default=0, min_value=0)

    def create(self, validated_data):
        raise NotImplementedError
@ -27,22 +39,50 @@ class HintRequestSerializer(serializers.Serializer):
    content = serializers.CharField()

    def create(self, validated_data):
-        raise NotImplemented
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError


 class HintResponseSerializer(serializers.Serializer):
    type = serializers.CharField()
    content = serializers.CharField()

+    def create(self, validated_data):
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError
+

 class AutoCompleteRequestSerializer(serializers.Serializer):
    content = serializers.CharField(validators=[MinLengthValidator(3)])
+    exclude = serializers.ListSerializer(child=QueryFilterSerializer(), default=[])
+
+    def create(self, validated_data):
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError


 class AutoCompleteSerializerNode(serializers.Serializer):
    coordinate = serializers.IntegerField(validators=[MinValueValidator(0)])
    value = HintResponseSerializer()

+    def create(self, validated_data):
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError
+

 class AutoCompleteResponseSerializer(serializers.Serializer):
    nodes = serializers.ListField(child=AutoCompleteSerializerNode())
+
+    def create(self, validated_data):
+        raise NotImplementedError
+
+    def update(self, instance, validated_data):
+        raise NotImplementedError
--- a/app/search/api/views.py
+++ b/app/search/api/views.py
@ -1,51 +1,93 @@
 from drf_yasg import openapi
 from drf_yasg.utils import swagger_auto_schema
 from rest_framework import status
+from rest_framework.generics import get_object_or_404
 from rest_framework.response import Response
 from rest_framework.views import APIView
 from search.api.serializers import HintRequestSerializer

-from search.api.serializers import SearchSerializer, ResponseSerializer, HintResponseSerializer, AutoCompleteRequestSerializer, AutoCompleteResponseSerializer
-from search.services.search import process_string
+from search.api.serializers import (
+    SearchSerializer,
+    ResponseSerializer,
+    HintResponseSerializer,
+    AutoCompleteRequestSerializer,
+    AutoCompleteResponseSerializer,
+)
+from search.models import Product
+from search.services.search import process_search
 from search.services.autocomplete_schema import autocomplete_schema

 from search.services.hints import get_hints

 user_response = openapi.Response("search results", ResponseSerializer)
 hint_response = openapi.Response("hints", HintResponseSerializer)
-autocomplete_response = openapi.Response("autocomplete schema", AutoCompleteResponseSerializer)
+autocomplete_response = openapi.Response(
+    "autocomplete schema", AutoCompleteResponseSerializer
+)


 class SearchApi(APIView):
    @swagger_auto_schema(request_body=SearchSerializer, responses={200: user_response})
-    def post(self, request, format=None):
+    def post(self, request):
        serializer = SearchSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        return Response(
-            process_string(serializer.data["body"]), status=status.HTTP_200_OK
+            process_search(
+                serializer.data["body"],
+                serializer.data["limit"],
+                serializer.data["offset"],
+            ),
+            status=status.HTTP_200_OK,
        )


 class HintApi(APIView):
-    @swagger_auto_schema(request_body=HintRequestSerializer, responses={200: hint_response})
-    def post(self, request, format=None):
+    @swagger_auto_schema(
+        request_body=HintRequestSerializer, responses={200: hint_response}
+    )
+    def post(self, request):
        serializer = HintRequestSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        return Response(
            {
-                'type': get_hints(serializer.data['content']),
-                'value': serializer.data['content']
+                "type": get_hints(serializer.data["content"]),
+                "value": serializer.data["content"],
            },
-            status=status.HTTP_200_OK   
+            status=status.HTTP_200_OK,
        )

+
 class AutoCompleteApi(APIView):
-    @swagger_auto_schema(request_body=AutoCompleteRequestSerializer, responses={200: autocomplete_response})
-    def post(self, request, format=None):
+    @swagger_auto_schema(
+        request_body=AutoCompleteRequestSerializer,
+        responses={200: autocomplete_response},
+    )
+    def post(self, request):
        serializer = AutoCompleteRequestSerializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        return Response(
            {
-                'nodes': autocomplete_schema(serializer.data['content'])
-            }, status=status.HTTP_200_OK
+                "nodes": autocomplete_schema(
+                    serializer.data["content"], serializer.data["exclude"]
+                )
+            },
+            status=status.HTTP_200_OK,
        )
+
+
+class IncreaseProductScoreApi(APIView):
+    @swagger_auto_schema(
+        manual_parameters=[
+            openapi.Parameter(
+                "id",
+                openapi.IN_PATH,
+                description="Product id",
+                type=openapi.TYPE_INTEGER,
+            )
+        ]
+    )
+    def post(self, request, pk):
+        product = get_object_or_404(Product, id=pk)
+        product.score += 1
+        product.save(update_fields=["score"])
+        return Response({"score": product.score}, status=status.HTTP_200_OK)
--- a/app/search/models.py
+++ b/app/search/models.py
@ -18,13 +18,21 @@ class Characteristic(models.Model):
 class UnitCharacteristic(models.Model):
    name = models.TextField("Имя", blank=False)
    value = models.TextField("Значение", blank=False)
+    numeric_value_min = models.IntegerField(default=0)
+    numeric_value_max = models.IntegerField(default=0)
    unit = models.TextField("Размерность", blank=False)

    def __str__(self):
        return str(self.name)

    def serialize_self(self):
-        return {"name": self.name, "value": self.value, "unit": self.unit}
+        return {
+            "name": self.name,
+            "value": self.numeric_value_min
+            if self.numeric_value_min == self.numeric_value_max
+            else f"{self.numeric_value_min}:{self.numeric_value_max}",
+            "unit": self.unit,
+        }

    class Meta:
        db_table = "unit_characteristic"
@ -49,20 +57,28 @@ class Product(models.Model):
        Category, related_name="products", on_delete=models.CASCADE
    )

+    score = models.IntegerField(default=0)
+
    def __str__(self):
        return str(self.name)

    def serialize_self(self) -> dict:
        return {
+            "id": self.id,
            "name": self.name,
+            "score": self.score,
            "characteristic": [
-                x.serialize_self() for x in self.characteristics.objects.all()
+                x.characteristic.serialize_self() for x in self.characteristics.all()
            ]
-            + [x.serialize_self() for x in self.unit_characteristics.objects.all()],
+            + [
+                x.characteristic.serialize_self()
+                for x in self.unit_characteristics.all()
+            ],
        }

    class Meta:
        db_table = "product"
+        ordering = ["-score"]


 class ProductCharacteristic(models.Model):
--- a/app/search/services/autocomplete_schema.py
+++ b/app/search/services/autocomplete_schema.py
@ -1,37 +1,49 @@
+from typing import List, Dict
+
 from search.models import Product, Category, Characteristic

-def autocomplete_schema(val: str):
+
+def autocomplete_schema(val: str, exclude: List[Dict]):
+    exclude = [dict(x) for x in exclude]
+    name_exclude = [x["value"] for x in exclude if x["type"] == "Name"]
+    category_exclude = [x["value"] for x in exclude if x["type"] == "Category"]
    schema = []
+    if not name_exclude:
+        schema.extend(
+            [
+                {
+                    "coordinate": product["name"].lower().index(val.lower()),
+                    "value": {
+                        "type": "Name",
+                        "value": product["name"],
+                    },
+                }
+                for product in Product.objects.filter(name__unaccent__icontains=val)[
+                    :20
+                ].values("name")
+            ]
+        )
+    if not category_exclude:
+        schema.extend(
+            [
+                {
+                    "coordinate": cat["name"].lower().index(val.lower()),
+                    "value": {"type": "Category", "value": cat["name"]},
+                }
+                for cat in Category.objects.filter(name__unaccent__icontains=val)[
+                    :20
+                ].values("name")
+            ]
+        )
    schema.extend(
        [
            {
-                'coordinate': product['name'].index(val),
-                'value': {
-                    'type': 'Name',
-                    'value': product['name'],
-                }
-            } for product in Product.objects.filter(name__contains=val).values('name')]
-    )
-    schema.extend(
-        [
-            {
-                'coordinate': cat['name'].index(val),
-                'value': {
-                    'type': 'Category',
-                    'value': cat['name']
-                }
-            } for cat in Category.objects.filter(name__contains=val).values('name')
-        ]
-    )
-    schema.extend(
-        [
-            {
-                'coordinate': char.name.index(val),
-                'value': {
-                    'type': char.name,
-                    'value': char.value
-                }
-            } for char in Characteristic.objects.filter(name__contains=val).values('name', 'value')
+                "coordinate": char["value"].lower().index(val.lower()),
+                "value": {"type": char["name"], "value": char["value"]},
+            }
+            for char in Characteristic.objects.filter(value__unaccent__icontains=val)[
+                :20
+            ].values("name", "value")
        ]
    )
    return schema
--- a/app/search/services/hints.py
+++ b/app/search/services/hints.py
@ -2,11 +2,11 @@ from search.models import Product, Category, Characteristic


 def get_hints(content: str) -> str:
-    category = 'Unknown'
+    category = "All"
    if content in list(map(lambda product: product.name, Product.objects.all())):
-        category = 'Name'
+        category = "Name"
    elif content in list(map(lambda category: category.name, Category.objects.all())):
-        category = 'Category'
+        category = "Category"
    elif content in list(map(lambda char: char.value, Characteristic.objects.all())):
-        category = Characteristic.objects.get(value=content).name
+        category = Characteristic.objects.filter(value=content).first().name
    return category
--- a/app/search/services/load_products.py
+++ b/app/search/services/load_products.py
@ -1,3 +1,4 @@
+import re
 from ast import literal_eval

 import pandas as pd
@ -59,3 +60,27 @@ def load_excel():
            # malformed node or string: nan \ duplicate key
            print("СКОРОСШИВАТЕЛЬ")
            continue
+
+
+def process_unit_character():
+    for el in UnitCharacteristic.objects.all():
+        nums = re.findall(
+            "[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", el.value
+        )
+        if len(nums) == 1:
+            try:
+                el.numeric_value_min = int(float(nums[0].replace(",", ".")))
+                el.numeric_value_max = int(float(nums[0].replace(",", ".")))
+                el.save()
+            except ValueError:
+                el.delete()
+        elif len(nums):
+            try:
+                nums = [int(float(x.replace(",", "."))) for x in nums]
+                min_num = min(nums)
+                max_num = max(nums)
+                el.numeric_value_min = min_num
+                el.numeric_value_max = max_num
+                el.save()
+            except ValueError:
+                el.delete()
--- a/app/search/services/search.py
+++ b/app/search/services/search.py
@ -1,6 +1,206 @@
-from search.models import Product
+import string
+
+from django.db.models import QuerySet
+
+from search.models import (
+    Product,
+    Characteristic,
+    ProductCharacteristic,
+    ProductUnitCharacteristic,
+    UnitCharacteristic,
+    Category,
+)
 from typing import List

+from search.services.hints import get_hints
+from search.services.spell_check import spell_check
+from search.services.translate import translate_en_ru, translate_ru_en

-def process_string(text: str) -> List[dict]:
-    return [x.serialize_self() for x in Product.objects.filter(name__contains=text)[5:]]
+
+def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
+    if operation.startswith("<=") or operation.startswith("=<"):
+        return unit.filter(
+            characteristic__numeric_value_max__lte=int(float(operation[2:]))
+        )
+    elif operation.startswith("=>") or operation.startswith(">="):
+        return unit.filter(
+            characteristic__numeric_value_min__gte=int(float(operation[2:]))
+        )
+    elif operation.startswith(">"):
+        return unit.filter(
+            characteristic__numeric_value_min__gt=int(float(operation[1:]))
+        )
+    elif operation.startswith("<"):
+        return unit.filter(
+            characteristic__numeric_value_max__lt=int(float(operation[1:]))
+        )
+    elif operation.startswith("="):
+        return unit.filter(
+            characteristic__numeric_value_min__gte=int(float(operation[1:])),
+            characteristic__numeric_value_max__lte=int(float(operation[1:])),
+        )
+    return unit
+
+
+def _clean_text(text: str) -> List[str]:
+    for st in [".", ",", "!", "?"]:
+        text = text.replace(st, " ")
+    text = text.split()
+    return text
+
+
+def apply_qs_search(qs: Product.objects, text: str):
+    text = _clean_text(text)
+    words = Product.objects.none()
+    for word in text:
+        words = (
+            words
+            | Product.objects.filter(name__unaccent__trigram_similar=word)
+            | Product.objects.filter(name__unaccent__icontains=word)
+        )
+    print(words)
+    qs = qs | words
+    print(qs)
+    return qs
+
+
+def apply_all_qs_search(orig_qs, text: str):
+    # words
+    qs = apply_qs_search(Product.objects.none(), text)
+    text = _clean_text(text)
+
+    # categories
+    cats = Category.objects.none()
+    for word in text:
+        cats = cats | cats.filter(name__icontains=word)
+    qs = qs | Product.objects.filter(category__in=cats)
+
+    # characteristics
+    chars = Characteristic.objects.none()
+    for word in text:
+        chars = chars | chars.filter(
+            value__icontains=word,
+        )
+    qs = qs | Product.objects.filter(characteristics__characteristic__in=chars)
+    # print(qs)
+
+    return qs & orig_qs
+
+
+def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
+    prep_data = []
+    prep_dict = {}
+    prep_dict_char_type = {}
+    # --------------------------------------- prepare filters -------------------------------------------------------- #
+    for x in data:
+        dat = dict(x)
+        if x["type"] in ["Name", "Category", "Characteristic", "All"]:
+            prep_data.append(
+                {
+                    "type": dat["type"],
+                    "value": spell_check(
+                        dat["value"],
+                    ),
+                }
+            )
+        elif x["type"] == "Unknown":
+            type = get_hints(dat["value"])
+            prep_data.append(
+                {
+                    "type": type,
+                    "value": spell_check(
+                        dat["value"],
+                    ),
+                }
+            )
+        else:
+            val = spell_check(
+                dat["value"],
+            )
+            if x["type"] in list(prep_dict.keys()):
+                if x["type"].startswith("*"):
+                    unit = ProductUnitCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                    )
+                    prep_dict[x["type"]] = prep_dict[
+                        x["type"]
+                    ] | process_unit_operation(unit, x["value"])
+                else:
+                    prep_dict[x["type"]] = (
+                        prep_dict[x["type"]]
+                        | ProductCharacteristic.objects.filter(
+                            characteristic__in=prep_dict_char_type[x["type"]],
+                            characteristic__value__unaccent__trigram_similar=val,
+                        )
+                        | ProductCharacteristic.objects.filter(
+                            characteristic__in=prep_dict_char_type[x["type"]],
+                            characteristic__value__icontains=val,
+                        )
+                    )
+            else:
+                if x["type"].startswith("*"):
+                    prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter(
+                        name__unaccent__trigram_similar=x["type"]
+                    ) | UnitCharacteristic.objects.filter(name__icontains=x["type"])
+                    unit = ProductUnitCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                    )
+                    prep_dict[x["type"]] = process_unit_operation(unit, x["value"])
+                else:
+                    prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
+                        name__unaccent__trigram_similar=x["type"]
+                    ) | Characteristic.objects.filter(name__icontains=x["type"])
+                    prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                        characteristic__value__unaccent__trigram_similar=val,
+                    ) | ProductCharacteristic.objects.filter(
+                        characteristic__in=prep_dict_char_type[x["type"]],
+                        characteristic__value__icontains=val,
+                    )
+    for el, val in prep_dict.items():
+        prep_data.append({"type": el, "value": val})
+    # ----------------------------------- apply filters on QuerySet -------------------------------------------------- #
+    qs = Product.objects.filter()
+    for x in prep_data:
+        typ = x["type"]
+        val = x["value"]
+        if typ == "Name":
+            qs = apply_qs_search(qs, val)
+        elif typ == "All":
+            qs = apply_all_qs_search(qs, val)
+        elif typ == "Category":
+            qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter(
+                category__name__icontains=val
+            )
+        elif typ == "Characteristic":
+            char = ProductCharacteristic.objects.filter(product__in=qs)
+            char = char.filter(characteristic__value__icontains=val) | char.filter(
+                characteristic__value__unaccent__trigram_similar=val
+            )
+            qs = qs.filter(characteristics__in=char)
+        elif typ == "Unknown":
+            if val[0] in string.printable:
+                val = "".join(translate_en_ru(val))
+            else:
+                val = "".join(translate_ru_en(val))
+            type = get_hints(val)
+            if type == "Name":
+                qs = apply_qs_search(qs, val)
+            elif type == "Category":
+                qs = qs.filter(category__name__unaccent__trigram_similar=val)
+            elif type == "Unknown":
+                continue
+            else:
+                qs = qs.filter(
+                    characteristics__characteristic__name__unaccent__trigram_similar=val
+                )
+            continue
+        else:
+            if typ.startswith("*"):
+                qs = qs.filter(unit_characteristics__in=val)
+            else:
+                qs = qs.filter(characteristics__in=val)
+    return [
+        x.serialize_self()
+        for x in qs.distinct().order_by("-score")[offset : offset + limit]
+    ]
--- a/app/search/services/spell_check.py
+++ b/app/search/services/spell_check.py
@ -4,6 +4,7 @@ from spellchecker import SpellChecker
 speller_ru = SpellChecker(language='ru')
 speller_eng = SpellChecker(language='en')

+
 def spell_check_ru(word: str) -> str:
    res = speller_ru.correction(word)
    if not len(res): 
--- a/app/search/services/translate.py
+++ b/app/search/services/translate.py
@ -5,9 +5,14 @@ from typing import List


 def translate_ru_en(word: str) -> List[str]:
-    res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}")
-    return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
+    res = r.get(
+        f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}"
+    )
+    return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
+

 def translate_en_ru(word: str) -> List[str]:
-    res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}")
-    return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
+    res = r.get(
+        f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}"
+    )
+    return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
--- a/pg.sql
+++ b/pg.sql
@ -0,0 +1,2 @@
+CREATE EXTENSION unaccent;
+CREATE EXTENSION pg_trgm;