backend/app/search/services/search.py

199 lines
7.3 KiB
Python
Raw Normal View History

from search.models import (
Product,
Characteristic,
ProductCharacteristic,
ProductUnitCharacteristic,
UnitCharacteristic,
2022-10-22 18:20:58 +03:00
Category,
)
2022-10-21 23:22:14 +03:00
from typing import List
2022-10-21 22:36:36 +03:00
from search.services.hints import get_hints
2022-10-22 19:23:25 +03:00
from search.services.spell_check import spell_check_ru as spell_check, lemmatize
def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
if operation.startswith("<=") or operation.startswith("=<"):
return unit.filter(
characteristic__numeric_value_max__lte=int(float(operation[2:]))
)
elif operation.startswith("=>") or operation.startswith(">="):
return unit.filter(
characteristic__numeric_value_min__gte=int(float(operation[2:]))
)
elif operation.startswith(">"):
return unit.filter(
characteristic__numeric_value_min__gt=int(float(operation[1:]))
)
elif operation.startswith("<"):
return unit.filter(
characteristic__numeric_value_max__lt=int(float(operation[1:]))
)
elif operation.startswith("="):
return unit.filter(
characteristic__numeric_value_min__gte=int(float(operation[1:])),
characteristic__numeric_value_max__lte=int(float(operation[1:])),
)
return unit
2022-10-21 22:36:36 +03:00
2022-10-22 18:20:58 +03:00
def _clean_text(text: str) -> List[str]:
2022-10-22 12:52:23 +03:00
for st in [".", ",", "!", "?"]:
text = text.replace(st, " ")
text = text.split()
2022-10-22 19:23:25 +03:00
re = []
for word in text:
re.append(lemmatize(word))
return re
2022-10-22 18:20:58 +03:00
2022-10-22 19:23:25 +03:00
def apply_qs_search(text: str):
2022-10-22 18:20:58 +03:00
text = _clean_text(text)
2022-10-22 19:23:25 +03:00
products = Product.objects.none()
2022-10-22 12:52:23 +03:00
for word in text:
2022-10-22 19:23:25 +03:00
products = (
products
2022-10-22 18:20:58 +03:00
| Product.objects.filter(name__unaccent__trigram_similar=word)
| Product.objects.filter(name__unaccent__icontains=word)
)
2022-10-22 19:23:25 +03:00
products = products.order_by("-score")
print(products)
return products
2022-10-22 12:52:23 +03:00
2022-10-22 18:20:58 +03:00
def apply_all_qs_search(orig_qs, text: str):
# words
2022-10-22 19:23:25 +03:00
qs = apply_qs_search(text)
2022-10-22 18:20:58 +03:00
text = _clean_text(text)
# categories
cats = Category.objects.none()
for word in text:
cats = cats | cats.filter(name__icontains=word)
2022-10-22 19:23:25 +03:00
qs = Product.objects.filter(category__in=cats).order_by("-score") | qs
2022-10-22 18:20:58 +03:00
# characteristics
chars = Characteristic.objects.none()
for word in text:
2022-10-22 19:23:25 +03:00
chars = (
chars
| Characteristic.objects.filter(
value__icontains=word,
)
| Characteristic.objects.filter(
value__unaccent__trigram_similar=word,
)
)
qs = (
Product.objects.filter(characteristics__characteristic__in=chars).order_by(
"-score"
2022-10-22 18:20:58 +03:00
)
2022-10-22 19:23:25 +03:00
| qs
)
2022-10-22 18:20:58 +03:00
return qs & orig_qs
2022-10-22 11:20:24 +03:00
def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
2022-10-22 05:07:25 +03:00
prep_data = []
prep_dict = {}
prep_dict_char_type = {}
# --------------------------------------- prepare filters -------------------------------------------------------- #
2022-10-22 05:07:25 +03:00
for x in data:
dat = dict(x)
2022-10-22 18:20:58 +03:00
if x["type"] in ["Name", "Category", "Characteristic", "All"]:
prep_data.append(
{
"type": dat["type"],
"value": spell_check(
dat["value"],
),
}
)
elif x["type"] == "Unknown":
type = get_hints(dat["value"])
prep_data.append(
{
"type": type,
"value": spell_check(
dat["value"],
),
}
)
2022-10-22 05:07:25 +03:00
else:
val = spell_check(
dat["value"],
)
2022-10-22 05:07:25 +03:00
if x["type"] in list(prep_dict.keys()):
if x["type"].startswith("*"):
unit = ProductUnitCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
)
prep_dict[x["type"]] = prep_dict[
x["type"]
] | process_unit_operation(unit, x["value"])
else:
2022-10-22 18:20:58 +03:00
prep_dict[x["type"]] = (
prep_dict[x["type"]]
| ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__unaccent__trigram_similar=val,
)
| ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__icontains=val,
)
)
2022-10-22 05:07:25 +03:00
else:
if x["type"].startswith("*"):
prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter(
name__unaccent__trigram_similar=x["type"]
2022-10-22 18:20:58 +03:00
) | UnitCharacteristic.objects.filter(name__icontains=x["type"])
unit = ProductUnitCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
)
prep_dict[x["type"]] = process_unit_operation(unit, x["value"])
else:
prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
name__unaccent__trigram_similar=x["type"]
2022-10-22 18:20:58 +03:00
) | Characteristic.objects.filter(name__icontains=x["type"])
prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__unaccent__trigram_similar=val,
2022-10-22 18:20:58 +03:00
) | ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__icontains=val,
)
2022-10-22 05:07:25 +03:00
for el, val in prep_dict.items():
prep_data.append({"type": el, "value": val})
2022-10-22 11:20:24 +03:00
# ----------------------------------- apply filters on QuerySet -------------------------------------------------- #
2022-10-22 05:07:25 +03:00
qs = Product.objects.filter()
for x in prep_data:
typ = x["type"]
val = x["value"]
if typ == "Name":
2022-10-22 19:23:25 +03:00
qs = qs & apply_qs_search(val)
qs = qs.order_by("-score")
2022-10-22 18:20:58 +03:00
elif typ == "All":
qs = apply_all_qs_search(qs, val)
2022-10-22 05:07:25 +03:00
elif typ == "Category":
2022-10-22 18:20:58 +03:00
qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter(
category__name__icontains=val
)
2022-10-22 19:23:25 +03:00
qs = qs.order_by("-score")
2022-10-22 18:20:58 +03:00
elif typ == "Characteristic":
char = ProductCharacteristic.objects.filter(product__in=qs)
char = char.filter(characteristic__value__icontains=val) | char.filter(
characteristic__value__unaccent__trigram_similar=val
)
qs = qs.filter(characteristics__in=char)
2022-10-22 19:23:25 +03:00
qs = qs.order_by("-score")
2022-10-22 05:07:25 +03:00
elif typ == "Unknown":
continue
else:
2022-10-22 11:06:49 +03:00
if typ.startswith("*"):
qs = qs.filter(unit_characteristics__in=val)
else:
qs = qs.filter(characteristics__in=val)
2022-10-22 19:23:25 +03:00
return [x.serialize_self() for x in qs.distinct()[offset : offset + limit]]