From a47a89a43fd98ed508cebb14706b4828491dfc42 Mon Sep 17 00:00:00 2001 From: Alexandr Karpov Date: Sun, 23 Oct 2022 01:20:57 +0300 Subject: [PATCH] made all search smart --- app/search/services/autocomplete_schema.py | 36 ++++---- app/search/services/colors.py | 2 +- app/search/services/search.py | 99 +++++++++++++++------- app/search/services/spell_check.py | 5 +- 4 files changed, 92 insertions(+), 50 deletions(-) diff --git a/app/search/services/autocomplete_schema.py b/app/search/services/autocomplete_schema.py index 690e414..b2be1c4 100644 --- a/app/search/services/autocomplete_schema.py +++ b/app/search/services/autocomplete_schema.py @@ -8,37 +8,37 @@ def autocomplete_schema(val: str, exclude: List[Dict]): name_exclude = [x["value"] for x in exclude if x["type"] == "Name"] category_exclude = [x["value"] for x in exclude if x["type"] == "Category"] schema = [] + if not category_exclude: + schema.extend( + [ + { + "coordinate": cat["name"].replace("ё", "е").lower().index(val.lower()), + "value": {"type": "Category", "value": cat["name"]}, + } + for cat in Category.objects.filter(name__unaccent__icontains=val)[ + :10 + ].values("name") + ] + ) if not name_exclude: schema.extend( [ { - "coordinate": product["name"].lower().index(val.lower()), + "coordinate": product["name"].replace("ё", "е").lower().index(val.lower()), "value": { "type": "Name", "value": product["name"], }, } for product in Product.objects.filter(name__unaccent__icontains=val)[ - :20 - ].values("name") - ] - ) - if not category_exclude: - schema.extend( - [ - { - "coordinate": cat["name"].lower().index(val.lower()), - "value": {"type": "Category", "value": cat["name"]}, - } - for cat in Category.objects.filter(name__unaccent__icontains=val)[ - :20 + :30 ].values("name") ] ) schema.extend( [ { - "coordinate": char["value"].lower().index(val.lower()), + "coordinate": char["value"].replace("ё", "е").lower().index(val.lower()), "value": {"type": char["name"], "value": char["value"]}, } for char in Characteristic.objects.filter(value__unaccent__icontains=val)[ @@ -49,12 +49,12 @@ def autocomplete_schema(val: str, exclude: List[Dict]): schema.extend( [ { - "coordinate": char["value"].lower().index(val.lower()), + "coordinate": char["name"].lower().replace("ё", "е").index(val.lower()), "value": { "type": char["name"] + "_numeric", - "value": char["value"] + "value": char["name"] } - } for char in UnitCharacteristic.objects.filter(value__unaccent__icontains=val)[:20].values("name", "value") + } for char in UnitCharacteristic.objects.filter(name__unaccent__icontains=val)[:20].values("name", "value") ] ) return schema diff --git a/app/search/services/colors.py b/app/search/services/colors.py index e8c8d7e..4f5ad62 100644 --- a/app/search/services/colors.py +++ b/app/search/services/colors.py @@ -18,7 +18,7 @@ def group(data: List[Dict], search_fields_d: List[Dict]) -> List[Dict]: else: re[field] = [] re[field].append( - [x["type"] for x in search_fields_dict if x["value"] == field][ + [x["type"] for x in search_fields_dict if x["value"].lower() == field][ 0 ] ) diff --git a/app/search/services/search.py b/app/search/services/search.py index 9100a71..cbd4e70 100644 --- a/app/search/services/search.py +++ b/app/search/services/search.py @@ -43,7 +43,7 @@ def _clean_text(text: str) -> List[str]: text = text.split() re = [] for word in text: - re.append(lemmatize(word)) + re.append(word) return re @@ -62,35 +62,76 @@ def apply_qs_search(text: str): def apply_all_qs_search(orig_qs, text: str): # words - qs = apply_qs_search(text) text = _clean_text(text) - # categories - cats = Category.objects.none() - for word in text: - cats = cats | cats.filter(name__icontains=word) - qs = Product.objects.filter(category__in=cats).order_by("-score") | qs + u_qs = None - # characteristics - chars = Characteristic.objects.none() - for word in text: - chars = ( - chars - | Characteristic.objects.filter( - value__icontains=word, - ) - | Characteristic.objects.filter( - value__unaccent__trigram_similar=word, - ) - ) - qs = ( - Product.objects.filter(characteristics__characteristic__in=chars).order_by( - "-score" - ) - | qs - ) + # try to find Unit characteristics + if any(x.isnumeric() for x in text): + u_qs = ProductUnitCharacteristic.objects.filter() + for i in range(len(text)): + el = text[i] + if el.isnumeric(): + if i == len(text) - 1: + if ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i - 1] + ).exists(): + unit = ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i - 1] + ) + u_qs = u_qs & process_unit_operation(unit, f"={text[i]}") + del text[i] + del text[i - 1] + break + elif len(text) - 1 > i >= 1: + if ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i - 1] + ).exists(): + unit = ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i - 1] + )[0] + u_qs = u_qs & process_unit_operation(unit, f"={text[i]}") + del text[i] + del text[i - 1] + break + elif ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i + 1] + ).exists(): + unit = UnitCharacteristic.objects.filter( + ProductUnitCharacteristic=text[i + 1] + )[0] + u_qs = u_qs & process_unit_operation(unit, f"={text[i]}") + del text[i] + del text[i + 1] + break + else: + if ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i + 1] + ).exists(): + unit = ProductUnitCharacteristic.objects.filter( + characteristic__name__icontains=text[i + 1] + )[0] + u_qs = u_qs & process_unit_operation(unit, f"={text[i]}") + del text[i] + del text[i + 1] + break - return qs & orig_qs + prod = Product.objects.filter() + for word in text: + car = ProductCharacteristic.objects.filter( + characteristic__value__icontains=word, + ) + qs = ( + Product.objects.filter(name__icontains=word) + | Product.objects.filter(name__unaccent__trigram_similar=word) + | Product.objects.filter(category__name__icontains=word) + | Product.objects.filter(characteristics__in=car) + ) + prod = prod & qs + if u_qs: + prod = prod & Product.objects.filter(unit_characteristics__in=u_qs) + + return prod def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: @@ -174,11 +215,9 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: qs = qs & apply_qs_search(val) qs = qs.order_by("-score") elif typ == "All": - qs = apply_all_qs_search(qs, val) + qs = apply_all_qs_search(qs, val) & qs elif typ == "Category": - qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter( - category__name__icontains=val - ) + qs = qs.filter(category__name__icontains=val) qs = qs.order_by("-score") elif typ == "Characteristic": char = ProductCharacteristic.objects.filter(product__in=qs) diff --git a/app/search/services/spell_check.py b/app/search/services/spell_check.py index 4a11556..9995cac 100644 --- a/app/search/services/spell_check.py +++ b/app/search/services/spell_check.py @@ -19,6 +19,9 @@ def spell_check_en(word: str) -> str: return res +morph = pymorphy2.MorphAnalyzer() + + def lemmatize(word): - p = pymorphy2.MorphAnalyzer().parse(word)[0] + p = morph.parse(word)[0] return p.normal_form