mirror of
https://github.com/magnum-opus-tender-hack/backend.git
synced 2024-11-10 19:46:35 +03:00
improved search: spelling, unit characteristic lookup, type gassing
This commit is contained in:
parent
497b930ca1
commit
8b5fddbd67
|
@ -214,3 +214,5 @@ REST_FRAMEWORK = {
|
||||||
|
|
||||||
# django-cors-headers
|
# django-cors-headers
|
||||||
CORS_ALLOW_ALL_ORIGINS = True
|
CORS_ALLOW_ALL_ORIGINS = True
|
||||||
|
|
||||||
|
YANDEX_DICT_API_KEY = "dict.1.1.20221022T010312Z.55cce597a6cfa505.f416aba85e7642eedc1b76b8e21c06506fb17f83"
|
||||||
|
|
|
@ -18,13 +18,14 @@ class Characteristic(models.Model):
|
||||||
class UnitCharacteristic(models.Model):
|
class UnitCharacteristic(models.Model):
|
||||||
name = models.TextField("Имя", blank=False)
|
name = models.TextField("Имя", blank=False)
|
||||||
value = models.TextField("Значение", blank=False)
|
value = models.TextField("Значение", blank=False)
|
||||||
|
numeric_value = models.IntegerField(default=0)
|
||||||
unit = models.TextField("Размерность", blank=False)
|
unit = models.TextField("Размерность", blank=False)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return str(self.name)
|
return str(self.name)
|
||||||
|
|
||||||
def serialize_self(self):
|
def serialize_self(self):
|
||||||
return {"name": self.name, "value": self.value, "unit": self.unit}
|
return {"name": self.name, "value": self.numeric_value, "unit": self.unit}
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
db_table = "unit_characteristic"
|
db_table = "unit_characteristic"
|
||||||
|
|
|
@ -27,8 +27,8 @@ def autocomplete_schema(val: str):
|
||||||
schema.extend(
|
schema.extend(
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"coordinate": char.name.index(val),
|
"coordinate": char["name"].index(val),
|
||||||
"value": {"type": char.name, "value": char.value},
|
"value": {"type": char["name"], "value": char["value"]},
|
||||||
}
|
}
|
||||||
for char in Characteristic.objects.filter(name__contains=val).values(
|
for char in Characteristic.objects.filter(name__contains=val).values(
|
||||||
"name", "value"
|
"name", "value"
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import re
|
||||||
from ast import literal_eval
|
from ast import literal_eval
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
@ -59,3 +60,16 @@ def load_excel():
|
||||||
# malformed node or string: nan \ duplicate key
|
# malformed node or string: nan \ duplicate key
|
||||||
print("СКОРОСШИВАТЕЛЬ")
|
print("СКОРОСШИВАТЕЛЬ")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
def process_unit_character():
|
||||||
|
for el in UnitCharacteristic.objects.all():
|
||||||
|
nums = re.findall("[-+]?[.]?[\d]+(?:,\d\d\d)*[\.]?\d*(?:[eE][-+]?\d+)?", el.value)
|
||||||
|
if len(nums) != 1:
|
||||||
|
el.delete()
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
el.numeric_value = int(float(nums[0].replace(",", ".")))
|
||||||
|
el.save()
|
||||||
|
except ValueError:
|
||||||
|
el.delete()
|
||||||
|
|
|
@ -1,32 +1,96 @@
|
||||||
from search.models import Product, Characteristic, ProductCharacteristic
|
import string
|
||||||
|
|
||||||
|
from search.models import (
|
||||||
|
Product,
|
||||||
|
Characteristic,
|
||||||
|
ProductCharacteristic,
|
||||||
|
ProductUnitCharacteristic,
|
||||||
|
UnitCharacteristic,
|
||||||
|
)
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
from search.services.hints import get_hints
|
||||||
|
from search.services.spell_check import spell_check
|
||||||
|
from search.services.translate import translate_en_ru, translate_ru_en
|
||||||
|
|
||||||
def process_search(data: List[dict]) -> List[dict]:
|
|
||||||
|
def process_unit_operation(unit: ProductUnitCharacteristic.objects, operation: str):
|
||||||
|
if operation.startswith("<=") or operation.startswith("=<"):
|
||||||
|
return unit.filter(characteristic__numeric_value__lte=int(float(operation[:2])))
|
||||||
|
elif operation.startswith("=>") or operation.startswith(">="):
|
||||||
|
return unit.filter(characteristic__numeric_value__gte=int(float(operation[:2])))
|
||||||
|
elif operation.startswith(">"):
|
||||||
|
return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
|
||||||
|
elif operation.startswith("<"):
|
||||||
|
return unit.filter(characteristic__numeric_value__lt=int(float(operation[:1])))
|
||||||
|
elif operation.startswith("="):
|
||||||
|
return unit.filter(characteristic__numeric_value__gt=int(float(operation[:1])))
|
||||||
|
return unit
|
||||||
|
|
||||||
|
|
||||||
|
def process_search(data: List[dict], limit=10, offset=0) -> List[dict]:
|
||||||
prep_data = []
|
prep_data = []
|
||||||
prep_dict = {}
|
prep_dict = {}
|
||||||
prep_dict_char_type = {}
|
prep_dict_char_type = {}
|
||||||
|
# --------------------------------------- prepare filters -------------------------------------------------------- #
|
||||||
for x in data:
|
for x in data:
|
||||||
dat = dict(x)
|
dat = dict(x)
|
||||||
if x["type"] in ["Name", "Category", "Unknown"]:
|
if x["type"] in ["Name", "Category"]:
|
||||||
prep_data.append(dat)
|
prep_data.append(
|
||||||
|
{
|
||||||
|
"type": dat["type"],
|
||||||
|
"value": spell_check(
|
||||||
|
dat["value"],
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
elif x["type"] == "Unknown":
|
||||||
|
type = get_hints(dat["value"])
|
||||||
|
prep_data.append(
|
||||||
|
{
|
||||||
|
"type": type,
|
||||||
|
"value": spell_check(
|
||||||
|
dat["value"],
|
||||||
|
),
|
||||||
|
}
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
|
val = spell_check(
|
||||||
|
dat["value"],
|
||||||
|
)
|
||||||
if x["type"] in list(prep_dict.keys()):
|
if x["type"] in list(prep_dict.keys()):
|
||||||
prep_dict[x["type"]] = prep_dict[
|
if x["type"].startswith("*"):
|
||||||
x["type"]
|
unit = ProductUnitCharacteristic.objects.filter(
|
||||||
] | ProductCharacteristic.objects.filter(
|
characteristic__in=prep_dict_char_type[x["type"]],
|
||||||
characteristic__in=prep_dict_char_type[x["type"]],
|
)
|
||||||
characteristic__value__unaccent__trigram_similar=x["value"],
|
prep_dict[x["type"]] = prep_dict[
|
||||||
)
|
x["type"]
|
||||||
|
] | process_unit_operation(unit, x["value"])
|
||||||
|
else:
|
||||||
|
prep_dict[x["type"]] = prep_dict[
|
||||||
|
x["type"]
|
||||||
|
] | ProductCharacteristic.objects.filter(
|
||||||
|
characteristic__in=prep_dict_char_type[x["type"]],
|
||||||
|
characteristic__value__unaccent__trigram_similar=val,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
|
if x["type"].startswith("*"):
|
||||||
name__contains=x["type"]
|
prep_dict_char_type[x["type"]] = UnitCharacteristic.objects.filter(
|
||||||
)
|
name__unaccent__trigram_similar=x["type"]
|
||||||
prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
|
)
|
||||||
characteristic__in=prep_dict_char_type[x["type"]],
|
unit = ProductUnitCharacteristic.objects.filter(
|
||||||
characteristic__value__unaccent__trigram_similar=x["value"],
|
characteristic__in=prep_dict_char_type[x["type"]],
|
||||||
)
|
)
|
||||||
|
prep_dict[x["type"]] = process_unit_operation(unit, x["value"])
|
||||||
|
else:
|
||||||
|
prep_dict_char_type[x["type"]] = Characteristic.objects.filter(
|
||||||
|
name__unaccent__trigram_similar=x["type"]
|
||||||
|
)
|
||||||
|
prep_dict[x["type"]] = ProductCharacteristic.objects.filter(
|
||||||
|
characteristic__in=prep_dict_char_type[x["type"]],
|
||||||
|
characteristic__value__unaccent__trigram_similar=val,
|
||||||
|
)
|
||||||
|
# ----------------------------------- apply filters on QuerySet -------------------------------------------------- #
|
||||||
for el, val in prep_dict.items():
|
for el, val in prep_dict.items():
|
||||||
prep_data.append({"type": el, "value": val})
|
prep_data.append({"type": el, "value": val})
|
||||||
qs = Product.objects.filter()
|
qs = Product.objects.filter()
|
||||||
|
@ -38,7 +102,8 @@ def process_search(data: List[dict]) -> List[dict]:
|
||||||
elif typ == "Category":
|
elif typ == "Category":
|
||||||
qs = qs.filter(category__name__unaccent__trigram_similar=val)
|
qs = qs.filter(category__name__unaccent__trigram_similar=val)
|
||||||
elif typ == "Unknown":
|
elif typ == "Unknown":
|
||||||
|
# add translate
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
qs = qs.filter(characteristics__in=val)
|
qs = qs.filter(characteristics__in=val)
|
||||||
return [x.serialize_self() for x in qs[:5]]
|
return [x.serialize_self() for x in qs[offset: offset + limit]]
|
||||||
|
|
|
@ -2,5 +2,9 @@ import requests as r
|
||||||
|
|
||||||
|
|
||||||
def spell_check(word: str) -> str:
|
def spell_check(word: str) -> str:
|
||||||
res = r.get(f'https://speller.yandex.net/services/spellservice.json/checkText?text={word}')
|
res = r.get(
|
||||||
return res.json()[0]['s'][0]
|
f"https://speller.yandex.net/services/spellservice.json/checkText?text={word}"
|
||||||
|
)
|
||||||
|
if not res.json():
|
||||||
|
return word
|
||||||
|
return res.json()[0]["s"][0]
|
||||||
|
|
|
@ -5,9 +5,14 @@ from typing import List
|
||||||
|
|
||||||
|
|
||||||
def translate_ru_en(word: str) -> List[str]:
|
def translate_ru_en(word: str) -> List[str]:
|
||||||
res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}")
|
res = r.get(
|
||||||
return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
|
f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=ru-en&text={word}"
|
||||||
|
)
|
||||||
|
return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
|
||||||
|
|
||||||
|
|
||||||
def translate_en_ru(word: str) -> List[str]:
|
def translate_en_ru(word: str) -> List[str]:
|
||||||
res = r.get(f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}")
|
res = r.get(
|
||||||
return [i['text'] for i in chain(*[j['tr']for j in res.json()['def']])]
|
f"https://dictionary.yandex.net/api/v1/dicservice.json/lookup?key={YANDEX_DICT_API_KEY}&lang=en-ru&text={word}"
|
||||||
|
)
|
||||||
|
return [i["text"] for i in chain(*[j["tr"] for j in res.json()["def"]])]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user