optimised search result

This commit is contained in:
Alexander Karpov 2022-10-23 18:07:06 +03:00
parent fc0049d816
commit 1817656adc
7 changed files with 96 additions and 28 deletions

View File

@ -1,6 +1,8 @@
from rest_framework import serializers from rest_framework import serializers
from django.core.validators import MinLengthValidator, MinValueValidator from django.core.validators import MinLengthValidator, MinValueValidator
from search.models import Product, UnitCharacteristic, Characteristic
class QueryFilterSerializer(serializers.Serializer): class QueryFilterSerializer(serializers.Serializer):
value = serializers.CharField(max_length=100) value = serializers.CharField(max_length=100)
@ -86,3 +88,38 @@ class AutoCompleteResponseSerializer(serializers.Serializer):
def update(self, instance, validated_data): def update(self, instance, validated_data):
raise NotImplementedError raise NotImplementedError
class CharacteristicSerializer(serializers.ModelSerializer):
class Meta:
fields = ["name", "value"]
model = Characteristic
class UnitCharacteristicSerializer(serializers.ModelSerializer):
value = serializers.SerializerMethodField("get_value_n")
def get_value_n(self, obj):
return obj.num_value
class Meta:
fields = ["name", "value", "unit"]
model = UnitCharacteristic
class ProductSerializer(serializers.ModelSerializer):
characteristic = serializers.SerializerMethodField("get_characteristic_n")
def get_characteristic_n(self, obj: Product):
return (
CharacteristicSerializer(
Characteristic.objects.filter(products__product=obj), many=True
).data
+ UnitCharacteristicSerializer(
UnitCharacteristic.objects.filter(products__product=obj), many=True
).data
)
class Meta:
fields = ["id", "name", "score", "characteristic"]
model = Product

View File

@ -4,7 +4,7 @@ from rest_framework import status
from rest_framework.generics import get_object_or_404 from rest_framework.generics import get_object_or_404
from rest_framework.response import Response from rest_framework.response import Response
from rest_framework.views import APIView from rest_framework.views import APIView
from search.api.serializers import HintRequestSerializer from search.api.serializers import HintRequestSerializer, ProductSerializer
from search.api.serializers import ( from search.api.serializers import (
SearchSerializer, SearchSerializer,
@ -33,14 +33,14 @@ class SearchApi(APIView):
serializer = SearchSerializer(data=request.data) serializer = SearchSerializer(data=request.data)
serializer.is_valid(raise_exception=True) serializer.is_valid(raise_exception=True)
return Response( return Response(
group( ProductSerializer(
process_search( process_search(
serializer.data["body"], serializer.data["body"],
serializer.data["limit"], serializer.data["limit"],
serializer.data["offset"], serializer.data["offset"],
), ),
serializer.data["body"], many=True,
), ).data,
status=status.HTTP_200_OK, status=status.HTTP_200_OK,
) )

View File

@ -34,6 +34,14 @@ class UnitCharacteristic(models.Model):
"unit": self.unit, "unit": self.unit,
} }
@property
def num_value(self):
return (
self.numeric_value_min
if self.numeric_value_min == self.numeric_value_max
else f"{self.numeric_value_min}:{self.numeric_value_max}"
)
class Meta: class Meta:
db_table = "unit_characteristic" db_table = "unit_characteristic"

View File

@ -10,9 +10,29 @@ from search.services.search.prepare import apply_union
from search.models import Product from search.models import Product
def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: def call(prep_data):
prep_data = apply_union(data) if len(prep_data) == 1:
# ----------------------------------- apply filters on QuerySet -------------------------------------------------- # typ = prep_data[0]["type"]
val = prep_data[0]["value"]
if typ == "Name":
return apply_qs_search(val).order_by("-score")
elif typ == "All":
return apply_all_qs_search(val).order_by("-score")
elif typ == "Category":
return Product.objects.filter(category__name__icontains=val).order_by(
"-score"
)
elif typ == "Characteristic":
return appy_qs_characteristic(Product.objects.filter(), val).order_by(
"-score"
)
elif typ == "Unknown":
return []
else:
if typ.startswith("*"):
return Product.objects.filter(unit_characteristics__in=val)
else:
return Product.objects.filter(characteristics__in=val)
qs = Product.objects.filter() qs = Product.objects.filter()
for x in prep_data: for x in prep_data:
typ = x["type"] typ = x["type"]
@ -35,4 +55,9 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
qs = qs.filter(unit_characteristics__in=val) qs = qs.filter(unit_characteristics__in=val)
else: else:
qs = qs.filter(characteristics__in=val) qs = qs.filter(characteristics__in=val)
return [x.serialize_self() for x in qs.distinct()[offset : offset + limit]] return []
def process_search(body: List[dict], limit=5, offset=0) -> List[dict]:
prep_data = apply_union(body)
return call(prep_data)[offset : offset + limit]

View File

@ -1,8 +1,5 @@
from functools import cache
from typing import List from typing import List
from django.utils.text import slugify
from search.models import ( from search.models import (
Product, Product,
ProductCharacteristic, ProductCharacteristic,
@ -12,15 +9,20 @@ from search.services.spell_check import pos, spell_check
def _clean_text(text: str) -> List[str]: def _clean_text(text: str) -> List[str]:
for st in [".", ",", "!", "?"]: for st in [".", ",", "!", "?"]:
text = text.replace(st, " ") text = text.replace(st, " ")
text = text.split() text = text.split()
functors_pos = {"INTJ", "PRCL", "CONJ", "PREP"} # function words functors_pos = {"INTJ", "PRCL", "CONJ", "PREP"} # function words
text = [word for word in text if pos(word) not in functors_pos] text = [word for word in text if pos(word) not in functors_pos]
return [spell_check(x) for x in text]
text = [spell_check(x) for x in text]
return text
@cache
def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str): def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
if operation.startswith("<=") or operation.startswith("=<"): if operation.startswith("<=") or operation.startswith("=<"):
return unit.filter( return unit.filter(
@ -46,7 +48,6 @@ def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: s
return unit return unit
@cache
def apply_qs_search(text: str): def apply_qs_search(text: str):
text = _clean_text(text) text = _clean_text(text)
qs = Product.objects.filter() qs = Product.objects.filter()
@ -58,7 +59,6 @@ def apply_qs_search(text: str):
return products return products
@cache
def apply_all_qs_search(text: str): def apply_all_qs_search(text: str):
# words # words
text = _clean_text(text) text = _clean_text(text)
@ -103,16 +103,22 @@ def apply_all_qs_search(text: str):
del text[i] del text[i]
break break
prod = Product.objects.filter() if u_qs:
prod = Product.objects.filter(unit_characteristics__in=u_qs)
else:
prod = Product.objects.filter()
for word in text: for word in text:
car = ProductCharacteristic.objects.filter( car = ProductCharacteristic.objects.filter(
characteristic__value__icontains=word, characteristic__value__icontains=word,
) | ProductCharacteristic.objects.filter(
characteristic__value__trigram_similar=word,
) )
qs = ( qs = (
Product.objects.filter(name__icontains=word) Product.objects.filter(name__icontains=word)
| Product.objects.filter(name__trigram_similar=word)
| Product.objects.filter(category__name__icontains=word)
| Product.objects.filter(characteristics__in=car) | Product.objects.filter(characteristics__in=car)
| Product.objects.filter(category__name__icontains=word)
| Product.objects.filter(name__trigram_similar=word)
) )
if any( if any(
x in word x in word
@ -126,22 +132,16 @@ def apply_all_qs_search(text: str):
) )
) )
) )
print(qs)
prod = prod & qs prod = prod & qs
if u_qs:
prod = prod & Product.objects.filter(unit_characteristics__in=u_qs)
return prod return prod
@cache
def apply_qs_category(qs, name: str): def apply_qs_category(qs, name: str):
qs = qs.filter(category__name__icontains=name) qs = qs.filter(category__name__icontains=name)
return qs return qs
@cache
def appy_qs_characteristic(qs, name: str): def appy_qs_characteristic(qs, name: str):
char = ProductCharacteristic.objects.filter(product__in=qs) char = ProductCharacteristic.objects.filter(product__in=qs)
char = char.filter(characteristic__value__icontains=name) | char.filter( char = char.filter(characteristic__value__icontains=name) | char.filter(

View File

@ -27,9 +27,7 @@ def apply_union(data: List[Dict]) -> List[Dict]:
prep_data.append( prep_data.append(
{ {
"type": dat["type"], "type": dat["type"],
"value": spell_check( "value": dat["value"],
dat["value"],
),
} }
) )
elif x["type"] == "Unknown": elif x["type"] == "Unknown":

View File

@ -5,7 +5,7 @@ speller_ru = SpellChecker(language="ru")
speller_eng = SpellChecker(language="en") speller_eng = SpellChecker(language="en")
def spell_check_ru(word: str) -> str: def spell_check(word: str) -> str:
res = speller_ru.correction(word) res = speller_ru.correction(word)
if not res or not len(res): if not res or not len(res):
return word return word