diff --git a/app/conf/api.py b/app/conf/api.py index ad55947..472a8f8 100644 --- a/app/conf/api.py +++ b/app/conf/api.py @@ -1,9 +1,15 @@ from django.urls import path -from search.api.views import SearchApi, HintApi, AutoCompleteApi +from search.api.views import ( + SearchApi, + HintApi, + AutoCompleteApi, + IncreaseProductScoreApi, +) urlpatterns = [ path("search", SearchApi.as_view(), name="search_api"), - path("hint", HintApi.as_view(), name="hint api"), - path('autocomplete_schema', AutoCompleteApi.as_view(), name='autocomplete api') + path("hint", HintApi.as_view(), name="hint_api"), + path("autocomplete_schema", AutoCompleteApi.as_view(), name="autocomplete_api"), + path("score/", IncreaseProductScoreApi.as_view(), name="score_api"), ] diff --git a/app/conf/settings/base.py b/app/conf/settings/base.py index ac87dee..567d2f3 100644 --- a/app/conf/settings/base.py +++ b/app/conf/settings/base.py @@ -63,6 +63,7 @@ DJANGO_APPS = [ "django.contrib.humanize", "django.contrib.admin", "django.forms", + "django.contrib.postgres", ] THIRD_PARTY_APPS = ["rest_framework", "corsheaders", "drf_yasg"] @@ -216,3 +217,5 @@ REST_FRAMEWORK = { # django-cors-headers CORS_ALLOW_ALL_ORIGINS = True + +YANDEX_DICT_API_KEY = "dict.1.1.20221022T010312Z.55cce597a6cfa505.f416aba85e7642eedc1b76b8e21c06506fb17f83" diff --git a/app/search/api/serializers.py b/app/search/api/serializers.py index aa79b4b..e408e3a 100644 --- a/app/search/api/serializers.py +++ b/app/search/api/serializers.py @@ -2,9 +2,21 @@ from rest_framework import serializers from django.core.validators import MinLengthValidator, MinValueValidator +class QueryFilterSerializer(serializers.Serializer): + value = serializers.CharField(max_length=100) + type = serializers.CharField(max_length=100) + + def create(self, validated_data): + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError + class SearchSerializer(serializers.Serializer): - body = serializers.CharField(max_length=200) + body = serializers.ListSerializer(child=QueryFilterSerializer()) + limit = serializers.IntegerField(default=5, min_value=1) + offset = serializers.IntegerField(default=0, min_value=0) def create(self, validated_data): raise NotImplementedError @@ -27,22 +39,50 @@ class HintRequestSerializer(serializers.Serializer): content = serializers.CharField() def create(self, validated_data): - raise NotImplemented + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError class HintResponseSerializer(serializers.Serializer): type = serializers.CharField() content = serializers.CharField() + def create(self, validated_data): + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError + class AutoCompleteRequestSerializer(serializers.Serializer): content = serializers.CharField(validators=[MinLengthValidator(3)]) + exclude = serializers.ListSerializer(child=QueryFilterSerializer(), default=[]) + + def create(self, validated_data): + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError class AutoCompleteSerializerNode(serializers.Serializer): coordinate = serializers.IntegerField(validators=[MinValueValidator(0)]) value = HintResponseSerializer() + def create(self, validated_data): + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError + class AutoCompleteResponseSerializer(serializers.Serializer): nodes = serializers.ListField(child=AutoCompleteSerializerNode()) + + def create(self, validated_data): + raise NotImplementedError + + def update(self, instance, validated_data): + raise NotImplementedError diff --git a/app/search/api/views.py b/app/search/api/views.py index 81e5993..e450be0 100644 --- a/app/search/api/views.py +++ b/app/search/api/views.py @@ -1,51 +1,93 @@ from drf_yasg import openapi from drf_yasg.utils import swagger_auto_schema from rest_framework import status +from rest_framework.generics import get_object_or_404 from rest_framework.response import Response from rest_framework.views import APIView from search.api.serializers import HintRequestSerializer -from search.api.serializers import SearchSerializer, ResponseSerializer, HintResponseSerializer, AutoCompleteRequestSerializer, AutoCompleteResponseSerializer -from search.services.search import process_string +from search.api.serializers import ( + SearchSerializer, + ResponseSerializer, + HintResponseSerializer, + AutoCompleteRequestSerializer, + AutoCompleteResponseSerializer, +) +from search.models import Product +from search.services.search import process_search from search.services.autocomplete_schema import autocomplete_schema from search.services.hints import get_hints user_response = openapi.Response("search results", ResponseSerializer) hint_response = openapi.Response("hints", HintResponseSerializer) -autocomplete_response = openapi.Response("autocomplete schema", AutoCompleteResponseSerializer) +autocomplete_response = openapi.Response( + "autocomplete schema", AutoCompleteResponseSerializer +) class SearchApi(APIView): @swagger_auto_schema(request_body=SearchSerializer, responses={200: user_response}) - def post(self, request, format=None): + def post(self, request): serializer = SearchSerializer(data=request.data) serializer.is_valid(raise_exception=True) return Response( - process_string(serializer.data["body"]), status=status.HTTP_200_OK + process_search( + serializer.data["body"], + serializer.data["limit"], + serializer.data["offset"], + ), + status=status.HTTP_200_OK, ) class HintApi(APIView): - @swagger_auto_schema(request_body=HintRequestSerializer, responses={200: hint_response}) - def post(self, request, format=None): + @swagger_auto_schema( + request_body=HintRequestSerializer, responses={200: hint_response} + ) + def post(self, request): serializer = HintRequestSerializer(data=request.data) serializer.is_valid(raise_exception=True) return Response( { - 'type': get_hints(serializer.data['content']), - 'value': serializer.data['content'] + "type": get_hints(serializer.data["content"]), + "value": serializer.data["content"], }, - status=status.HTTP_200_OK + status=status.HTTP_200_OK, ) + class AutoCompleteApi(APIView): - @swagger_auto_schema(request_body=AutoCompleteRequestSerializer, responses={200: autocomplete_response}) - def post(self, request, format=None): + @swagger_auto_schema( + request_body=AutoCompleteRequestSerializer, + responses={200: autocomplete_response}, + ) + def post(self, request): serializer = AutoCompleteRequestSerializer(data=request.data) serializer.is_valid(raise_exception=True) return Response( { - 'nodes': autocomplete_schema(serializer.data['content']) - }, status=status.HTTP_200_OK + "nodes": autocomplete_schema( + serializer.data["content"], serializer.data["exclude"] + ) + }, + status=status.HTTP_200_OK, ) + + +class IncreaseProductScoreApi(APIView): + @swagger_auto_schema( + manual_parameters=[ + openapi.Parameter( + "id", + openapi.IN_PATH, + description="Product id", + type=openapi.TYPE_INTEGER, + ) + ] + ) + def post(self, request, pk): + product = get_object_or_404(Product, id=pk) + product.score += 1 + product.save(update_fields=["score"]) + return Response({"score": product.score}, status=status.HTTP_200_OK) diff --git a/app/search/models.py b/app/search/models.py index 6dc7e69..456b0b8 100644 --- a/app/search/models.py +++ b/app/search/models.py @@ -18,13 +18,21 @@ class Characteristic(models.Model): class UnitCharacteristic(models.Model): name = models.TextField("Имя", blank=False) value = models.TextField("Значение", blank=False) + numeric_value_min = models.IntegerField(default=0) + numeric_value_max = models.IntegerField(default=0) unit = models.TextField("Размерность", blank=False) def __str__(self): return str(self.name) def serialize_self(self): - return {"name": self.name, "value": self.value, "unit": self.unit} + return { + "name": self.name, + "value": self.numeric_value_min + if self.numeric_value_min == self.numeric_value_max + else f"{self.numeric_value_min}:{self.numeric_value_max}", + "unit": self.unit, + } class Meta: db_table = "unit_characteristic" @@ -49,20 +57,28 @@ class Product(models.Model): Category, related_name="products", on_delete=models.CASCADE ) + score = models.IntegerField(default=0) + def __str__(self): return str(self.name) def serialize_self(self) -> dict: return { + "id": self.id, "name": self.name, + "score": self.score, "characteristic": [ - x.serialize_self() for x in self.characteristics.objects.all() + x.characteristic.serialize_self() for x in self.characteristics.all() ] - + [x.serialize_self() for x in self.unit_characteristics.objects.all()], + + [ + x.characteristic.serialize_self() + for x in self.unit_characteristics.all() + ], } class Meta: db_table = "product" + ordering = ["-score"] class ProductCharacteristic(models.Model): diff --git a/app/search/services/autocomplete_schema.py b/app/search/services/autocomplete_schema.py index 9e6b185..f7f003e 100644 --- a/app/search/services/autocomplete_schema.py +++ b/app/search/services/autocomplete_schema.py @@ -1,37 +1,49 @@ +from typing import List, Dict + from search.models import Product, Category, Characteristic -def autocomplete_schema(val: str): + +def autocomplete_schema(val: str, exclude: List[Dict]): + exclude = [dict(x) for x in exclude] + name_exclude = [x["value"] for x in exclude if x["type"] == "Name"] + category_exclude = [x["value"] for x in exclude if x["type"] == "Category"] schema = [] + if not name_exclude: + schema.extend( + [ + { + "coordinate": product["name"].lower().index(val.lower()), + "value": { + "type": "Name", + "value": product["name"], + }, + } + for product in Product.objects.filter(name__unaccent__icontains=val)[ + :20 + ].values("name") + ] + ) + if not category_exclude: + schema.extend( + [ + { + "coordinate": cat["name"].lower().index(val.lower()), + "value": {"type": "Category", "value": cat["name"]}, + } + for cat in Category.objects.filter(name__unaccent__icontains=val)[ + :20 + ].values("name") + ] + ) schema.extend( [ { - 'coordinate': product['name'].index(val), - 'value': { - 'type': 'Name', - 'value': product['name'], - } - } for product in Product.objects.filter(name__contains=val).values('name')] - ) - schema.extend( - [ - { - 'coordinate': cat['name'].index(val), - 'value': { - 'type': 'Category', - 'value': cat['name'] - } - } for cat in Category.objects.filter(name__contains=val).values('name') - ] - ) - schema.extend( - [ - { - 'coordinate': char.name.index(val), - 'value': { - 'type': char.name, - 'value': char.value - } - } for char in Characteristic.objects.filter(name__contains=val).values('name', 'value') + "coordinate": char["value"].lower().index(val.lower()), + "value": {"type": char["name"], "value": char["value"]}, + } + for char in Characteristic.objects.filter(value__unaccent__icontains=val)[ + :20 + ].values("name", "value") ] ) return schema diff --git a/app/search/services/hints.py b/app/search/services/hints.py index d7d7c42..7763a0b 100644 --- a/app/search/services/hints.py +++ b/app/search/services/hints.py @@ -2,11 +2,11 @@ from search.models import Product, Category, Characteristic def get_hints(content: str) -> str: - category = 'Unknown' + category = "All" if content in list(map(lambda product: product.name, Product.objects.all())): - category = 'Name' + category = "Name" elif content in list(map(lambda category: category.name, Category.objects.all())): - category = 'Category' + category = "Category" elif content in list(map(lambda char: char.value, Characteristic.objects.all())): - category = Characteristic.objects.get(value=content).name + category = Characteristic.objects.filter(value=content).first().name return category diff --git a/app/search/services/load_products.py b/app/search/services/load_products.py index c28f54c..192c4d4 100644 --- a/app/search/services/load_products.py +++ b/app/search/services/load_products.py @@ -1,3 +1,4 @@ +import re from ast import literal_eval import pandas as pd @@ -59,3 +60,27 @@ def load_excel(): # malformed node or string: nan \ duplicate key print("СКОРОСШИВАТЕЛЬ") continue + + +def process_unit_character(): + for el in UnitCharacteristic.objects.all(): + nums = re.findall( + "[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", el.value + ) + if len(nums) == 1: + try: + el.numeric_value_min = int(float(nums[0].replace(",", "."))) + el.numeric_value_max = int(float(nums[0].replace(",", "."))) + el.save() + except ValueError: + el.delete() + elif len(nums): + try: + nums = [int(float(x.replace(",", "."))) for x in nums] + min_num = min(nums) + max_num = max(nums) + el.numeric_value_min = min_num + el.numeric_value_max = max_num + el.save() + except ValueError: + el.delete() diff --git a/app/search/services/search.py b/app/search/services/search.py index 5ebf07e..2b11939 100644 --- a/app/search/services/search.py +++ b/app/search/services/search.py @@ -1,6 +1,206 @@ -from search.models import Product +import string + +from django.db.models import QuerySet + +from search.models import ( + Product, + Characteristic, + ProductCharacteristic, + ProductUnitCharacteristic, + UnitCharacteristic, + Category, +) from typing import List +from search.services.hints import get_hints +from search.services.spell_check import spell_check +from search.services.translate import translate_en_ru, translate_ru_en -def process_string(text: str) -> List[dict]: - return [x.serialize_self() for x in Product.objects.filter(name__contains=text)[5:]] + +def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str): + if operation.startswith("<=") or operation.startswith("=<"): + return unit.filter( + characteristic__numeric_value_max__lte=int(float(operation[2:])) + ) + elif operation.startswith("=>") or operation.startswith(">="): + return unit.filter( + characteristic__numeric_value_min__gte=int(float(operation[2:])) + ) + elif operation.startswith(">"): + return unit.filter( + characteristic__numeric_value_min__gt=int(float(operation[1:])) + ) + elif operation.startswith("<"): + return unit.filter( + characteristic__numeric_value_max__lt=int(float(operation[1:])) + ) + elif operation.startswith("="): + return unit.filter( + characteristic__numeric_value_min__gte=int(float(operation[1:])), + characteristic__numeric_value_max__lte=int(float(operation[1:])), + ) + return unit + + +def _clean_text(text: str) -> List[str]: + for st in [".", ",", "!", "?"]: + text = text.replace(st, " ") + text = text.split() + return text + + +def apply_qs_search(qs: Product.objects, text: str): + text = _clean_text(text) + words = Product.objects.none() + for word in text: + words = ( + words + | Product.objects.filter(name__unaccent__trigram_similar=word) + | Product.objects.filter(name__unaccent__icontains=word) + ) + print(words) + qs = qs | words + print(qs) + return qs + + +def apply_all_qs_search(orig_qs, text: str): + # words + qs = apply_qs_search(Product.objects.none(), text) + text = _clean_text(text) + + # categories + cats = Category.objects.none() + for word in text: + cats = cats | cats.filter(name__icontains=word) + qs = qs | Product.objects.filter(category__in=cats) + + # characteristics + chars = Characteristic.objects.none() + for word in text: + chars = chars | chars.filter( + value__icontains=word, + ) + qs = qs | Product.objects.filter(characteristics__characteristic__in=chars) + # print(qs) + + return qs & orig_qs + + +def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: + prep_data = [] + prep_dict = {} + prep_dict_char_type = {} + # --------------------------------------- prepare filters -------------------------------------------------------- # + for x in data: + dat = dict(x) + if x["type"] in ["Name", "Category", "Characteristic", "All"]: + prep_data.append( + { + "type": dat["type"], + "value": spell_check( + dat["value"], + ), + } + ) + elif x["type"] == "Unknown": + type = get_hints(dat["value"]) + prep_data.append( + { + "type": type, + "value": spell_check( + dat["value"], + ), + } + ) + else: + val = spell_check( + dat["value"], + ) + if x["type"] in list(prep_dict.keys()): + if x["type"].startswith("*"): + unit = ProductUnitCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + ) + prep_dict[x["type"]] = prep_dict[ + x["type"] + ] | process_unit_operation(unit, x["value"]) + else: + prep_dict[x["type"]] = ( + prep_dict[x["type"]] + | ProductCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + characteristic__value__unaccent__trigram_similar=val, + ) + | ProductCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + characteristic__value__icontains=val, + ) + ) + else: + if x["type"].startswith("*"): + prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter( + name__unaccent__trigram_similar=x["type"] + ) | UnitCharacteristic.objects.filter(name__icontains=x["type"]) + unit = ProductUnitCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + ) + prep_dict[x["type"]] = process_unit_operation(unit, x["value"]) + else: + prep_dict_char_type[x["type"]] = Characteristic.objects.filter( + name__unaccent__trigram_similar=x["type"] + ) | Characteristic.objects.filter(name__icontains=x["type"]) + prep_dict[x["type"]] = ProductCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + characteristic__value__unaccent__trigram_similar=val, + ) | ProductCharacteristic.objects.filter( + characteristic__in=prep_dict_char_type[x["type"]], + characteristic__value__icontains=val, + ) + for el, val in prep_dict.items(): + prep_data.append({"type": el, "value": val}) + # ----------------------------------- apply filters on QuerySet -------------------------------------------------- # + qs = Product.objects.filter() + for x in prep_data: + typ = x["type"] + val = x["value"] + if typ == "Name": + qs = apply_qs_search(qs, val) + elif typ == "All": + qs = apply_all_qs_search(qs, val) + elif typ == "Category": + qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter( + category__name__icontains=val + ) + elif typ == "Characteristic": + char = ProductCharacteristic.objects.filter(product__in=qs) + char = char.filter(characteristic__value__icontains=val) | char.filter( + characteristic__value__unaccent__trigram_similar=val + ) + qs = qs.filter(characteristics__in=char) + elif typ == "Unknown": + if val[0] in string.printable: + val = "".join(translate_en_ru(val)) + else: + val = "".join(translate_ru_en(val)) + type = get_hints(val) + if type == "Name": + qs = apply_qs_search(qs, val) + elif type == "Category": + qs = qs.filter(category__name__unaccent__trigram_similar=val) + elif type == "Unknown": + continue + else: + qs = qs.filter( + characteristics__characteristic__name__unaccent__trigram_similar=val + ) + continue + else: + if typ.startswith("*"): + qs = qs.filter(unit_characteristics__in=val) + else: + qs = qs.filter(characteristics__in=val) + return [ + x.serialize_self() + for x in qs.distinct().order_by("-score")[offset : offset + limit] + ] diff --git a/app/search/services/spell_check.py b/app/search/services/spell_check.py index 1a74ec3..34446d0 100644 --- a/app/search/services/spell_check.py +++ b/app/search/services/spell_check.py @@ -4,6 +4,7 @@ from spellchecker import SpellChecker speller_ru = SpellChecker(language='ru') speller_eng = SpellChecker(language='en') + def spell_check_ru(word: str) -> str: res = speller_ru.correction(word) if not len(res): diff --git a/app/search/services/translate.py b/app/search/services/translate.py index 8fa8f39..35e540e 100644 --- a/app/search/services/translate.py +++ b/app/search/services/translate.py @@ -5,9 +5,14 @@ from typing import List def translate_ru_en(word: str) -> List[str]: - res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}") - return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])] + res = r.get( + f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}" + ) + return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])] + def translate_en_ru(word: str) -> List[str]: - res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}") - return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])] + res = r.get( + f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}" + ) + return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])] diff --git a/pg.sql b/pg.sql new file mode 100644 index 0000000..84bc687 --- /dev/null +++ b/pg.sql @@ -0,0 +1,2 @@ +CREATE EXTENSION unaccent; +CREATE EXTENSION pg_trgm;