improved search: spelling, unit characteristic lookup, type gassing

This commit is contained in:
Alexander Karpov 2022-10-22 10:55:10 +03:00
parent 497b930ca1
commit 8b5fddbd67
7 changed files with 119 additions and 28 deletions

View File

@ -214,3 +214,5 @@ REST_FRAMEWORK = {
# django-cors-headers # django-cors-headers
CORS_ALLOW_ALL_ORIGINS = True CORS_ALLOW_ALL_ORIGINS = True
YANDEX_DICT_API_KEY = "dict.1.1.20221022T010312Z.55cce597a6cfa505.f416aba85e7642eedc1b76b8e21c06506fb17f83"

View File

@ -18,13 +18,14 @@ class Characteristic(models.Model):
class UnitCharacteristic(models.Model): class UnitCharacteristic(models.Model):
name = models.TextField("Имя", blank=False) name = models.TextField("Имя", blank=False)
value = models.TextField("Значение", blank=False) value = models.TextField("Значение", blank=False)
numeric_value = models.IntegerField(default=0)
unit = models.TextField("Размерность", blank=False) unit = models.TextField("Размерность", blank=False)
def __str__(self): def __str__(self):
return str(self.name) return str(self.name)
def serialize_self(self): def serialize_self(self):
return {"name": self.name, "value": self.value, "unit": self.unit} return {"name": self.name, "value": self.numeric_value, "unit": self.unit}
class Meta: class Meta:
db_table = "unit_characteristic" db_table = "unit_characteristic"

View File

@ -27,8 +27,8 @@ def autocomplete_schema(val: str):
schema.extend( schema.extend(
[ [
{ {
"coordinate": char.name.index(val), "coordinate": char["name"].index(val),
"value": {"type": char.name, "value": char.value}, "value": {"type": char["name"], "value": char["value"]},
} }
for char in Characteristic.objects.filter(name__contains=val).values( for char in Characteristic.objects.filter(name__contains=val).values(
"name", "value" "name", "value"

View File

@ -1,3 +1,4 @@
import re
from ast import literal_eval from ast import literal_eval
import pandas as pd import pandas as pd
@ -59,3 +60,16 @@ def load_excel():
# malformed node or string: nan \ duplicate key # malformed node or string: nan \ duplicate key
print("СКОРОСШИВАТЕЛЬ") print("СКОРОСШИВАТЕЛЬ")
continue continue
def process_unit_character():
for el in UnitCharacteristic.objects.all():
nums = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", el.value)
if len(nums) != 1:
el.delete()
else:
try:
el.numeric_value = int(float(nums[0].replace(",", ".")))
el.save()
except ValueError:
el.delete()

View File

@ -1,32 +1,96 @@
from search.models import Product, Characteristic, ProductCharacteristic import string
from search.models import (
Product,
Characteristic,
ProductCharacteristic,
ProductUnitCharacteristic,
UnitCharacteristic,
)
from typing import List from typing import List
from search.services.hints import get_hints
from search.services.spell_check import spell_check
from search.services.translate import translate_en_ru, translate_ru_en
def process_search(data: List[dict]) -> List[dict]:
def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
if operation.startswith("<=") or operation.startswith("=<"):
return unit.filter(characteristic__numeric_value__lte=int(float(operation[:2])))
elif operation.startswith("=>") or operation.startswith(">="):
return unit.filter(characteristic__numeric_value__gte=int(float(operation[:2])))
elif operation.startswith(">"):
return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
elif operation.startswith("<"):
return unit.filter(characteristic__numeric_value__lt=int(float(operation[:1])))
elif operation.startswith("="):
return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
return unit
def process_search(data: List[dict], limit=10, offset=0) -> List[dict]:
prep_data = [] prep_data = []
prep_dict = {} prep_dict = {}
prep_dict_char_type = {} prep_dict_char_type = {}
# --------------------------------------- prepare filters -------------------------------------------------------- #
for x in data: for x in data:
dat = dict(x) dat = dict(x)
if x["type"] in ["Name", "Category", "Unknown"]: if x["type"] in ["Name", "Category"]:
prep_data.append(dat) prep_data.append(
{
"type": dat["type"],
"value": spell_check(
dat["value"],
),
}
)
elif x["type"] == "Unknown":
type = get_hints(dat["value"])
prep_data.append(
{
"type": type,
"value": spell_check(
dat["value"],
),
}
)
else: else:
val = spell_check(
dat["value"],
)
if x["type"] in list(prep_dict.keys()): if x["type"] in list(prep_dict.keys()):
prep_dict[x["type"]] = prep_dict[ if x["type"].startswith("*"):
x["type"] unit = ProductUnitCharacteristic.objects.filter(
] | ProductCharacteristic.objects.filter( characteristic__in=prep_dict_char_type[x["type"]],
characteristic__in=prep_dict_char_type[x["type"]], )
characteristic__value__unaccent__trigram_similar=x["value"], prep_dict[x["type"]] = prep_dict[
) x["type"]
] | process_unit_operation(unit, x["value"])
else:
prep_dict[x["type"]] = prep_dict[
x["type"]
] | ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__unaccent__trigram_similar=val,
)
else: else:
prep_dict_char_type[x["type"]] = Characteristic.objects.filter( if x["type"].startswith("*"):
name__contains=x["type"] prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter(
) name__unaccent__trigram_similar=x["type"]
prep_dict[x["type"]] = ProductCharacteristic.objects.filter( )
characteristic__in=prep_dict_char_type[x["type"]], unit = ProductUnitCharacteristic.objects.filter(
characteristic__value__unaccent__trigram_similar=x["value"], characteristic__in=prep_dict_char_type[x["type"]],
) )
prep_dict[x["type"]] = process_unit_operation(unit, x["value"])
else:
prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
name__unaccent__trigram_similar=x["type"]
)
prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
characteristic__in=prep_dict_char_type[x["type"]],
characteristic__value__unaccent__trigram_similar=val,
)
# ----------------------------------- apply filters on QuerySet -------------------------------------------------- #
for el, val in prep_dict.items(): for el, val in prep_dict.items():
prep_data.append({"type": el, "value": val}) prep_data.append({"type": el, "value": val})
qs = Product.objects.filter() qs = Product.objects.filter()
@ -38,7 +102,8 @@ def process_search(data: List[dict]) -> List[dict]:
elif typ == "Category": elif typ == "Category":
qs = qs.filter(category__name__unaccent__trigram_similar=val) qs = qs.filter(category__name__unaccent__trigram_similar=val)
elif typ == "Unknown": elif typ == "Unknown":
# add translate
continue continue
else: else:
qs = qs.filter(characteristics__in=val) qs = qs.filter(characteristics__in=val)
return [x.serialize_self() for x in qs[:5]] return [x.serialize_self() for x in qs[offset: offset + limit]]

View File

@ -2,5 +2,9 @@ import requests as r
def spell_check(word: str) -> str: def spell_check(word: str) -> str:
res = r.get(f'https://speller.yandex.net/services/spellservice.json/checkText?text={word}') res = r.get(
return res.json()[0]['s'][0] f"https://speller.yandex.net/services/spellservice.json/checkText?text={word}"
)
if not res.json():
return word
return res.json()[0]["s"][0]

View File

@ -5,9 +5,14 @@ from typing import List
def translate_ru_en(word: str) -> List[str]: def translate_ru_en(word: str) -> List[str]:
res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}") res = r.get(
return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])] f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}"
)
return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
def translate_en_ru(word: str) -> List[str]: def translate_en_ru(word: str) -> List[str]:
res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}") res = r.get(
return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])] f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}"
)
return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]