From da2b9e16d41d68015c70788ec28dea6ae2643c65 Mon Sep 17 00:00:00 2001 From: Alexandr Karpov Date: Sat, 22 Oct 2022 19:23:25 +0300 Subject: [PATCH] updated score algoth --- app/search/models.py | 1 - app/search/services/search.py | 64 +++++++++++++++++------------- app/search/services/spell_check.py | 16 +++++--- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/app/search/models.py b/app/search/models.py index 456b0b8..44375cc 100644 --- a/app/search/models.py +++ b/app/search/models.py @@ -78,7 +78,6 @@ class Product(models.Model): class Meta: db_table = "product" - ordering = ["-score"] class ProductCharacteristic(models.Model): diff --git a/app/search/services/search.py b/app/search/services/search.py index 8e51033..7216217 100644 --- a/app/search/services/search.py +++ b/app/search/services/search.py @@ -1,7 +1,3 @@ -import string - -from django.db.models import QuerySet - from search.models import ( Product, Characteristic, @@ -13,7 +9,7 @@ from search.models import ( from typing import List from search.services.hints import get_hints -from search.services.spell_check import spell_check_ru as spell_check +from search.services.spell_check import spell_check_ru as spell_check, lemmatize def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str): @@ -45,43 +41,55 @@ def _clean_text(text: str) -> List[str]: for st in [".", ",", "!", "?"]: text = text.replace(st, " ") text = text.split() - return text - - -def apply_qs_search(qs: Product.objects, text: str): - text = _clean_text(text) - words = Product.objects.none() + re = [] for word in text: - words = ( - words + re.append(lemmatize(word)) + return re + + +def apply_qs_search(text: str): + text = _clean_text(text) + products = Product.objects.none() + for word in text: + products = ( + products | Product.objects.filter(name__unaccent__trigram_similar=word) | Product.objects.filter(name__unaccent__icontains=word) ) - print(words) - qs = qs | words - print(qs) - return qs + products = products.order_by("-score") + print(products) + return products def apply_all_qs_search(orig_qs, text: str): # words - qs = apply_qs_search(Product.objects.none(), text) + qs = apply_qs_search(text) text = _clean_text(text) # categories cats = Category.objects.none() for word in text: cats = cats | cats.filter(name__icontains=word) - qs = qs | Product.objects.filter(category__in=cats) + qs = Product.objects.filter(category__in=cats).order_by("-score") | qs # characteristics chars = Characteristic.objects.none() for word in text: - chars = chars | chars.filter( - value__icontains=word, + chars = ( + chars + | Characteristic.objects.filter( + value__icontains=word, + ) + | Characteristic.objects.filter( + value__unaccent__trigram_similar=word, + ) ) - qs = qs | Product.objects.filter(characteristics__characteristic__in=chars) - # print(qs) + qs = ( + Product.objects.filter(characteristics__characteristic__in=chars).order_by( + "-score" + ) + | qs + ) return qs & orig_qs @@ -164,19 +172,22 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: typ = x["type"] val = x["value"] if typ == "Name": - qs = apply_qs_search(qs, val) + qs = qs & apply_qs_search(val) + qs = qs.order_by("-score") elif typ == "All": qs = apply_all_qs_search(qs, val) elif typ == "Category": qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter( category__name__icontains=val ) + qs = qs.order_by("-score") elif typ == "Characteristic": char = ProductCharacteristic.objects.filter(product__in=qs) char = char.filter(characteristic__value__icontains=val) | char.filter( characteristic__value__unaccent__trigram_similar=val ) qs = qs.filter(characteristics__in=char) + qs = qs.order_by("-score") elif typ == "Unknown": continue else: @@ -184,7 +195,4 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]: qs = qs.filter(unit_characteristics__in=val) else: qs = qs.filter(characteristics__in=val) - return [ - x.serialize_self() - for x in qs.distinct().order_by("-score")[offset : offset + limit] - ] + return [x.serialize_self() for x in qs.distinct()[offset : offset + limit]] diff --git a/app/search/services/spell_check.py b/app/search/services/spell_check.py index 34446d0..4a11556 100644 --- a/app/search/services/spell_check.py +++ b/app/search/services/spell_check.py @@ -1,18 +1,24 @@ -import requests as r +import pymorphy2 from spellchecker import SpellChecker -speller_ru = SpellChecker(language='ru') -speller_eng = SpellChecker(language='en') +speller_ru = SpellChecker(language="ru") +speller_eng = SpellChecker(language="en") def spell_check_ru(word: str) -> str: res = speller_ru.correction(word) - if not len(res): + if not res or not len(res): return word return res + def spell_check_en(word: str) -> str: res = speller_eng.correction(word) - if not len(res): + if not res or not len(res): return word return res + + +def lemmatize(word): + p = pymorphy2.MorphAnalyzer().parse(word)[0] + return p.normal_form