updated score algoth

This commit is contained in:
Alexander Karpov 2022-10-22 19:23:25 +03:00
parent 8aef80af48
commit da2b9e16d4
3 changed files with 47 additions and 34 deletions

View File

@ -78,7 +78,6 @@ class Product(models.Model):
class Meta: class Meta:
db_table = "product" db_table = "product"
ordering = ["-score"]
class ProductCharacteristic(models.Model): class ProductCharacteristic(models.Model):

View File

@ -1,7 +1,3 @@
import string
from django.db.models import QuerySet
from search.models import ( from search.models import (
Product, Product,
Characteristic, Characteristic,
@ -13,7 +9,7 @@ from search.models import (
from typing import List from typing import List
from search.services.hints import get_hints from search.services.hints import get_hints
from search.services.spell_check import spell_check_ru as spell_check from search.services.spell_check import spell_check_ru as spell_check, lemmatize
def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str): def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
@ -45,43 +41,55 @@ def _clean_text(text: str) -> List[str]:
for st in [".", ",", "!", "?"]: for st in [".", ",", "!", "?"]:
text = text.replace(st, " ") text = text.replace(st, " ")
text = text.split() text = text.split()
return text re = []
def apply_qs_search(qs: Product.objects, text: str):
text = _clean_text(text)
words = Product.objects.none()
for word in text: for word in text:
words = ( re.append(lemmatize(word))
words return re
def apply_qs_search(text: str):
text = _clean_text(text)
products = Product.objects.none()
for word in text:
products = (
products
| Product.objects.filter(name__unaccent__trigram_similar=word) | Product.objects.filter(name__unaccent__trigram_similar=word)
| Product.objects.filter(name__unaccent__icontains=word) | Product.objects.filter(name__unaccent__icontains=word)
) )
print(words) products = products.order_by("-score")
qs = qs | words print(products)
print(qs) return products
return qs
def apply_all_qs_search(orig_qs, text: str): def apply_all_qs_search(orig_qs, text: str):
# words # words
qs = apply_qs_search(Product.objects.none(), text) qs = apply_qs_search(text)
text = _clean_text(text) text = _clean_text(text)
# categories # categories
cats = Category.objects.none() cats = Category.objects.none()
for word in text: for word in text:
cats = cats | cats.filter(name__icontains=word) cats = cats | cats.filter(name__icontains=word)
qs = qs | Product.objects.filter(category__in=cats) qs = Product.objects.filter(category__in=cats).order_by("-score") | qs
# characteristics # characteristics
chars = Characteristic.objects.none() chars = Characteristic.objects.none()
for word in text: for word in text:
chars = chars | chars.filter( chars = (
chars
| Characteristic.objects.filter(
value__icontains=word, value__icontains=word,
) )
qs = qs | Product.objects.filter(characteristics__characteristic__in=chars) | Characteristic.objects.filter(
# print(qs) value__unaccent__trigram_similar=word,
)
)
qs = (
Product.objects.filter(characteristics__characteristic__in=chars).order_by(
"-score"
)
| qs
)
return qs & orig_qs return qs & orig_qs
@ -164,19 +172,22 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
typ = x["type"] typ = x["type"]
val = x["value"] val = x["value"]
if typ == "Name": if typ == "Name":
qs = apply_qs_search(qs, val) qs = qs & apply_qs_search(val)
qs = qs.order_by("-score")
elif typ == "All": elif typ == "All":
qs = apply_all_qs_search(qs, val) qs = apply_all_qs_search(qs, val)
elif typ == "Category": elif typ == "Category":
qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter( qs = qs.filter(category__name__unaccent__trigram_similar=val) | qs.filter(
category__name__icontains=val category__name__icontains=val
) )
qs = qs.order_by("-score")
elif typ == "Characteristic": elif typ == "Characteristic":
char = ProductCharacteristic.objects.filter(product__in=qs) char = ProductCharacteristic.objects.filter(product__in=qs)
char = char.filter(characteristic__value__icontains=val) | char.filter( char = char.filter(characteristic__value__icontains=val) | char.filter(
characteristic__value__unaccent__trigram_similar=val characteristic__value__unaccent__trigram_similar=val
) )
qs = qs.filter(characteristics__in=char) qs = qs.filter(characteristics__in=char)
qs = qs.order_by("-score")
elif typ == "Unknown": elif typ == "Unknown":
continue continue
else: else:
@ -184,7 +195,4 @@ def process_search(data: List[dict], limit=5, offset=0) -> List[dict]:
qs = qs.filter(unit_characteristics__in=val) qs = qs.filter(unit_characteristics__in=val)
else: else:
qs = qs.filter(characteristics__in=val) qs = qs.filter(characteristics__in=val)
return [ return [x.serialize_self() for x in qs.distinct()[offset : offset + limit]]
x.serialize_self()
for x in qs.distinct().order_by("-score")[offset : offset + limit]
]

View File

@ -1,18 +1,24 @@
import requests as r import pymorphy2
from spellchecker import SpellChecker from spellchecker import SpellChecker
speller_ru = SpellChecker(language='ru') speller_ru = SpellChecker(language="ru")
speller_eng = SpellChecker(language='en') speller_eng = SpellChecker(language="en")
def spell_check_ru(word: str) -> str: def spell_check_ru(word: str) -> str:
res = speller_ru.correction(word) res = speller_ru.correction(word)
if not len(res): if not res or not len(res):
return word return word
return res return res
def spell_check_en(word: str) -> str: def spell_check_en(word: str) -> str:
res = speller_eng.correction(word) res = speller_eng.correction(word)
if not len(res): if not res or not len(res):
return word return word
return res return res
def lemmatize(word):
p = pymorphy2.MorphAnalyzer().parse(word)[0]
return p.normal_form